lib/lzmadecode: Allow for 8 byte reads on 64bit

This adds an optimization to lzma decode to also read from the boot
medium in chunks of 8 bytes if that is the general purpose register
length instead of always 4 bytes. It depends on the cache / memory / spi
controller whether this is faster, but it's likely to be either the same
or faster.

TESTED
- google/vilboz: cached boot medium
64bit before - 32bit - 64bit after
load FSP-M: 35,674 - 35,595 - 34,690
load ramstage: 42,134 - 43,378 - 40,882
load FSP-S: 24,954 - 25,496 - 24,368

- foxconn/g41m: uncached boot medium for testing
64bit before - 32bit - 64bit after
load ramstage: 51,164 - 51,872 - 51,894

Change-Id: I890c075307c0aec877618d9902ea352ae42a3bfa
Signed-off-by: Arthur Heymans <arthur@aheymans.xyz>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/70175
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Lean Sheng Tan <sheng.tan@9elements.com>
Reviewed-by: Julius Werner <jwerner@chromium.org>
This commit is contained in:
Arthur Heymans 2022-11-30 23:24:31 +01:00 committed by Lean Sheng Tan
parent f317068fc3
commit 7552eb210c
2 changed files with 12 additions and 10 deletions

View File

@ -35,15 +35,15 @@
#define kBitModelTotal (1 << kNumBitModelTotalBits)
#define kNumMoveBits 5
/* Use 32-bit reads whenever possible to avoid bad flash performance. Fall back
* to byte reads for last 4 bytes since RC_TEST returns an error when BufferLim
/* Use sizeof(SizeT) sized reads whenever possible to avoid bad flash performance. Fall back
* to byte reads for last sizeof(SizeT) bytes since RC_TEST returns an error when BufferLim
* is *reached* (not surpassed!), meaning we can't allow that to happen while
* there are still bytes to decode from the algorithm's point of view. */
#define RC_READ_BYTE \
(look_ahead_ptr < 4 ? look_ahead.raw[look_ahead_ptr++] \
: ((((uintptr_t) Buffer & 3) \
|| ((SizeT) (BufferLim - Buffer) <= 4)) ? (*Buffer++) \
: ((look_ahead.dw = *(UInt32 *)Buffer), (Buffer += 4), \
(look_ahead_ptr < sizeof(SizeT) ? look_ahead.raw[look_ahead_ptr++] \
: ((((uintptr_t) Buffer & (sizeof(SizeT) - 1)) \
|| ((SizeT) (BufferLim - Buffer) <= sizeof(SizeT))) ? (*Buffer++) \
: ((look_ahead.dw = *(SizeT *)Buffer), (Buffer += sizeof(SizeT)), \
(look_ahead_ptr = 1), look_ahead.raw[0])))
#define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \
@ -207,10 +207,10 @@ int LzmaDecode(CLzmaDecoderState *vs,
int len = 0;
const Byte *Buffer;
const Byte *BufferLim;
int look_ahead_ptr = 4;
int look_ahead_ptr = sizeof(SizeT);
union {
Byte raw[4];
UInt32 dw;
Byte raw[sizeof(SizeT)];
SizeT dw;
} look_ahead;
UInt32 Range;
UInt32 Code;

View File

@ -22,10 +22,12 @@
#ifndef __LZMADECODE_H
#define __LZMADECODE_H
#include <types.h>
typedef unsigned char Byte;
typedef unsigned short UInt16;
typedef unsigned int UInt32;
typedef UInt32 SizeT;
typedef size_t SizeT;
#define CProb UInt16