lib/lzmadecode: Allow for 8 byte reads on 64bit
This adds an optimization to lzma decode to also read from the boot medium in chunks of 8 bytes if that is the general purpose register length instead of always 4 bytes. It depends on the cache / memory / spi controller whether this is faster, but it's likely to be either the same or faster. TESTED - google/vilboz: cached boot medium 64bit before - 32bit - 64bit after load FSP-M: 35,674 - 35,595 - 34,690 load ramstage: 42,134 - 43,378 - 40,882 load FSP-S: 24,954 - 25,496 - 24,368 - foxconn/g41m: uncached boot medium for testing 64bit before - 32bit - 64bit after load ramstage: 51,164 - 51,872 - 51,894 Change-Id: I890c075307c0aec877618d9902ea352ae42a3bfa Signed-off-by: Arthur Heymans <arthur@aheymans.xyz> Reviewed-on: https://review.coreboot.org/c/coreboot/+/70175 Tested-by: build bot (Jenkins) <no-reply@coreboot.org> Reviewed-by: Lean Sheng Tan <sheng.tan@9elements.com> Reviewed-by: Julius Werner <jwerner@chromium.org>
This commit is contained in:
parent
f317068fc3
commit
7552eb210c
@ -35,15 +35,15 @@
|
||||
#define kBitModelTotal (1 << kNumBitModelTotalBits)
|
||||
#define kNumMoveBits 5
|
||||
|
||||
/* Use 32-bit reads whenever possible to avoid bad flash performance. Fall back
|
||||
* to byte reads for last 4 bytes since RC_TEST returns an error when BufferLim
|
||||
/* Use sizeof(SizeT) sized reads whenever possible to avoid bad flash performance. Fall back
|
||||
* to byte reads for last sizeof(SizeT) bytes since RC_TEST returns an error when BufferLim
|
||||
* is *reached* (not surpassed!), meaning we can't allow that to happen while
|
||||
* there are still bytes to decode from the algorithm's point of view. */
|
||||
#define RC_READ_BYTE \
|
||||
(look_ahead_ptr < 4 ? look_ahead.raw[look_ahead_ptr++] \
|
||||
: ((((uintptr_t) Buffer & 3) \
|
||||
|| ((SizeT) (BufferLim - Buffer) <= 4)) ? (*Buffer++) \
|
||||
: ((look_ahead.dw = *(UInt32 *)Buffer), (Buffer += 4), \
|
||||
(look_ahead_ptr < sizeof(SizeT) ? look_ahead.raw[look_ahead_ptr++] \
|
||||
: ((((uintptr_t) Buffer & (sizeof(SizeT) - 1)) \
|
||||
|| ((SizeT) (BufferLim - Buffer) <= sizeof(SizeT))) ? (*Buffer++) \
|
||||
: ((look_ahead.dw = *(SizeT *)Buffer), (Buffer += sizeof(SizeT)), \
|
||||
(look_ahead_ptr = 1), look_ahead.raw[0])))
|
||||
|
||||
#define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \
|
||||
@ -207,10 +207,10 @@ int LzmaDecode(CLzmaDecoderState *vs,
|
||||
int len = 0;
|
||||
const Byte *Buffer;
|
||||
const Byte *BufferLim;
|
||||
int look_ahead_ptr = 4;
|
||||
int look_ahead_ptr = sizeof(SizeT);
|
||||
union {
|
||||
Byte raw[4];
|
||||
UInt32 dw;
|
||||
Byte raw[sizeof(SizeT)];
|
||||
SizeT dw;
|
||||
} look_ahead;
|
||||
UInt32 Range;
|
||||
UInt32 Code;
|
||||
|
@ -22,10 +22,12 @@
|
||||
#ifndef __LZMADECODE_H
|
||||
#define __LZMADECODE_H
|
||||
|
||||
#include <types.h>
|
||||
|
||||
typedef unsigned char Byte;
|
||||
typedef unsigned short UInt16;
|
||||
typedef unsigned int UInt32;
|
||||
typedef UInt32 SizeT;
|
||||
typedef size_t SizeT;
|
||||
|
||||
#define CProb UInt16
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user