BaseTools LzmaCompress: Update LZMA to new 16.04 version

New version LZMA SDK improves the compression performance on windows OS,
and has no change on the compression ratio. I compress 8M FVMAIN image,
the compression time is reduced from 2.590s to 1.419s.

Cc: Yonghong Zhu <yonghong.zhu@intel.com>
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Liming Gao <liming.gao@intel.com>
Reviewed-by: Yonghong Zhu <yonghong.zhu@intel.com>
This commit is contained in:
Liming Gao
2016-10-27 14:50:54 +08:00
parent 756a514aa4
commit c4ab09ef2f
30 changed files with 2078 additions and 1420 deletions

View File

@ -1,5 +1,7 @@
/* LzmaDec.c -- LZMA Decoder
2008-11-06 : Igor Pavlov : Public domain */
2016-05-16 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "LzmaDec.h"
@ -44,6 +46,13 @@
i -= 0x40; }
#endif
#define NORMAL_LITER_DEC GET_BIT(prob + symbol, symbol)
#define MATCHED_LITER_DEC \
matchByte <<= 1; \
bit = (matchByte & offs); \
probLit = prob + offs + bit + symbol; \
GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit)
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
@ -105,19 +114,13 @@
#define Literal (RepLenCoder + kNumLenProbs)
#define LZMA_BASE_SIZE 1846
#define LZMA_LIT_SIZE 768
#define LzmaProps_GetNumProbs(p) ((UInt32)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
#define LZMA_LIT_SIZE 0x300
#if Literal != LZMA_BASE_SIZE
StopCompilingDueBUG
#endif
static const Byte kLiteralNextStates[kNumStates * 2] =
{
0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5,
7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10
};
#define LzmaProps_GetNumProbs(p) (Literal + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
#define LZMA_DIC_MIN (1 << 12)
@ -130,8 +133,8 @@ Out:
p->remainLen:
< kMatchSpecLenStart : normal remain
= kMatchSpecLenStart : finished
= kMatchSpecLenStart + 1 : Flush marker
= kMatchSpecLenStart + 2 : State Init Marker
= kMatchSpecLenStart + 1 : Flush marker (unused now)
= kMatchSpecLenStart + 2 : State Init Marker (unused now)
*/
static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
@ -169,39 +172,62 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
unsigned symbol;
UPDATE_0(prob);
prob = probs + Literal;
if (checkDicSize != 0 || processedPos != 0)
prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) +
(dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc))));
if (processedPos != 0 || checkDicSize != 0)
prob += ((UInt32)LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) +
(dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc))));
processedPos++;
if (state < kNumLitStates)
{
state -= (state < 4) ? state : 3;
symbol = 1;
do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100);
#ifdef _LZMA_SIZE_OPT
do { NORMAL_LITER_DEC } while (symbol < 0x100);
#else
NORMAL_LITER_DEC
NORMAL_LITER_DEC
NORMAL_LITER_DEC
NORMAL_LITER_DEC
NORMAL_LITER_DEC
NORMAL_LITER_DEC
NORMAL_LITER_DEC
NORMAL_LITER_DEC
#endif
}
else
{
unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
unsigned offs = 0x100;
state -= (state < 10) ? 3 : 6;
symbol = 1;
#ifdef _LZMA_SIZE_OPT
do
{
unsigned bit;
CLzmaProb *probLit;
matchByte <<= 1;
bit = (matchByte & offs);
probLit = prob + offs + bit + symbol;
GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit)
MATCHED_LITER_DEC
}
while (symbol < 0x100);
#else
{
unsigned bit;
CLzmaProb *probLit;
MATCHED_LITER_DEC
MATCHED_LITER_DEC
MATCHED_LITER_DEC
MATCHED_LITER_DEC
MATCHED_LITER_DEC
MATCHED_LITER_DEC
MATCHED_LITER_DEC
MATCHED_LITER_DEC
}
#endif
}
dic[dicPos++] = (Byte)symbol;
processedPos++;
state = kLiteralNextStates[state];
/* if (state < 4) state = 0; else if (state < 10) state -= 3; else state -= 6; */
dic[dicPos++] = (Byte)symbol;
continue;
}
else
{
UPDATE_1(prob);
prob = probs + IsRep + state;
@ -224,7 +250,7 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
IF_BIT_0(prob)
{
UPDATE_0(prob);
dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
dicPos++;
processedPos++;
state = state < kNumLitStates ? 9 : 11;
@ -265,15 +291,17 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
state = state < kNumLitStates ? 8 : 11;
prob = probs + RepLenCoder;
}
#ifdef _LZMA_SIZE_OPT
{
unsigned offset;
unsigned lim, offset;
CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0(probLen)
{
UPDATE_0(probLen);
probLen = prob + LenLow + (posState << kLenNumLowBits);
offset = 0;
limit = (1 << kLenNumLowBits);
lim = (1 << kLenNumLowBits);
}
else
{
@ -284,19 +312,55 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
UPDATE_0(probLen);
probLen = prob + LenMid + (posState << kLenNumMidBits);
offset = kLenNumLowSymbols;
limit = (1 << kLenNumMidBits);
lim = (1 << kLenNumMidBits);
}
else
{
UPDATE_1(probLen);
probLen = prob + LenHigh;
offset = kLenNumLowSymbols + kLenNumMidSymbols;
limit = (1 << kLenNumHighBits);
lim = (1 << kLenNumHighBits);
}
}
TREE_DECODE(probLen, limit, len);
TREE_DECODE(probLen, lim, len);
len += offset;
}
#else
{
CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0(probLen)
{
UPDATE_0(probLen);
probLen = prob + LenLow + (posState << kLenNumLowBits);
len = 1;
TREE_GET_BIT(probLen, len);
TREE_GET_BIT(probLen, len);
TREE_GET_BIT(probLen, len);
len -= 8;
}
else
{
UPDATE_1(probLen);
probLen = prob + LenChoice2;
IF_BIT_0(probLen)
{
UPDATE_0(probLen);
probLen = prob + LenMid + (posState << kLenNumMidBits);
len = 1;
TREE_GET_BIT(probLen, len);
TREE_GET_BIT(probLen, len);
TREE_GET_BIT(probLen, len);
}
else
{
UPDATE_1(probLen);
probLen = prob + LenHigh;
TREE_DECODE(probLen, (1 << kLenNumHighBits), len);
len += kLenNumLowSymbols + kLenNumMidSymbols;
}
}
}
#endif
if (state >= kNumStates)
{
@ -307,7 +371,7 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
if (distance >= kStartPosModelIndex)
{
unsigned posSlot = (unsigned)distance;
int numDirectBits = (int)(((distance >> 1) - 1));
unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
distance = (2 | (distance & 1));
if (posSlot < kEndPosModelIndex)
{
@ -366,6 +430,7 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
}
}
}
rep3 = rep2;
rep2 = rep1;
rep1 = rep0;
@ -373,27 +438,39 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
if (checkDicSize == 0)
{
if (distance >= processedPos)
{
p->dicPos = dicPos;
return SZ_ERROR_DATA;
}
}
else if (distance >= checkDicSize)
{
p->dicPos = dicPos;
return SZ_ERROR_DATA;
}
state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
/* state = kLiteralNextStates[state]; */
}
len += kMatchMinLen;
if (limit == dicPos)
return SZ_ERROR_DATA;
{
SizeT rem = limit - dicPos;
unsigned curLen = ((rem < len) ? (unsigned)rem : len);
SizeT pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0);
SizeT rem;
unsigned curLen;
SizeT pos;
if ((rem = limit - dicPos) == 0)
{
p->dicPos = dicPos;
return SZ_ERROR_DATA;
}
curLen = ((rem < len) ? (unsigned)rem : len);
pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
processedPos += curLen;
len -= curLen;
if (pos + curLen <= dicBufSize)
if (curLen <= dicBufSize - pos)
{
Byte *dest = dic + dicPos;
ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
@ -417,7 +494,9 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
}
}
while (dicPos < limit && buf < bufLimit);
NORMALIZE;
p->buf = buf;
p->range = range;
p->code = code;
@ -441,18 +520,20 @@ static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
SizeT dicPos = p->dicPos;
SizeT dicBufSize = p->dicBufSize;
unsigned len = p->remainLen;
UInt32 rep0 = p->reps[0];
if (limit - dicPos < len)
len = (unsigned)(limit - dicPos);
SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
SizeT rem = limit - dicPos;
if (rem < len)
len = (unsigned)(rem);
if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
p->checkDicSize = p->prop.dicSize;
p->processedPos += len;
p->remainLen -= len;
while (len-- != 0)
while (len != 0)
{
dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
len--;
dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
dicPos++;
}
p->dicPos = dicPos;
@ -470,17 +551,19 @@ static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte
if (limit - p->dicPos > rem)
limit2 = p->dicPos + rem;
}
RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit));
if (p->processedPos >= p->prop.dicSize)
if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
p->checkDicSize = p->prop.dicSize;
LzmaDec_WriteRem(p, limit);
}
while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
if (p->remainLen > kMatchSpecLenStart)
{
p->remainLen = kMatchSpecLenStart;
}
return 0;
}
@ -497,12 +580,12 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
UInt32 range = p->range;
UInt32 code = p->code;
const Byte *bufLimit = buf + inSize;
CLzmaProb *probs = p->probs;
const CLzmaProb *probs = p->probs;
unsigned state = p->state;
ELzmaDummy res;
{
CLzmaProb *prob;
const CLzmaProb *prob;
UInt32 bound;
unsigned ttt;
unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1);
@ -516,9 +599,9 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
prob = probs + Literal;
if (p->checkDicSize != 0 || p->processedPos != 0)
prob += (LZMA_LIT_SIZE *
((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
(p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
prob += ((UInt32)LZMA_LIT_SIZE *
((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
(p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
if (state < kNumLitStates)
{
@ -528,13 +611,13 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
else
{
unsigned matchByte = p->dic[p->dicPos - p->reps[0] +
((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)];
(p->dicPos < p->reps[0] ? p->dicBufSize : 0)];
unsigned offs = 0x100;
unsigned symbol = 1;
do
{
unsigned bit;
CLzmaProb *probLit;
const CLzmaProb *probLit;
matchByte <<= 1;
bit = (matchByte & offs);
probLit = prob + offs + bit + symbol;
@ -604,7 +687,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
}
{
unsigned limit, offset;
CLzmaProb *probLen = prob + LenChoice;
const CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0_CHECK(probLen)
{
UPDATE_0_CHECK;
@ -644,7 +727,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
if (posSlot >= kStartPosModelIndex)
{
int numDirectBits = ((posSlot >> 1) - 1);
unsigned numDirectBits = ((posSlot >> 1) - 1);
/* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
@ -683,13 +766,6 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
}
static void LzmaDec_InitRc(CLzmaDec *p, const Byte *data)
{
p->code = ((UInt32)data[1] << 24) | ((UInt32)data[2] << 16) | ((UInt32)data[3] << 8) | ((UInt32)data[4]);
p->range = 0xFFFFFFFF;
p->needFlush = 0;
}
void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState)
{
p->needFlush = 1;
@ -714,8 +790,8 @@ void LzmaDec_Init(CLzmaDec *p)
static void LzmaDec_InitStateReal(CLzmaDec *p)
{
UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp));
UInt32 i;
SizeT numProbs = LzmaProps_GetNumProbs(&p->prop);
SizeT i;
CLzmaProb *probs = p->probs;
for (i = 0; i < numProbs; i++)
probs[i] = kBitModelTotal >> 1;
@ -737,7 +813,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
{
int checkEndMarkNow;
if (p->needFlush != 0)
if (p->needFlush)
{
for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
p->tempBuf[p->tempBufSize++] = *src++;
@ -748,8 +824,13 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
}
if (p->tempBuf[0] != 0)
return SZ_ERROR_DATA;
LzmaDec_InitRc(p, p->tempBuf);
p->code =
((UInt32)p->tempBuf[1] << 24)
| ((UInt32)p->tempBuf[2] << 16)
| ((UInt32)p->tempBuf[3] << 8)
| ((UInt32)p->tempBuf[4]);
p->range = 0xFFFFFFFF;
p->needFlush = 0;
p->tempBufSize = 0;
}
@ -833,7 +914,16 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
p->buf = p->tempBuf;
if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
return SZ_ERROR_DATA;
lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf));
{
unsigned kkk = (unsigned)(p->buf - p->tempBuf);
if (rem < kkk)
return SZ_ERROR_FAIL; /* some internal error */
rem -= kkk;
if (lookAhead < rem)
return SZ_ERROR_FAIL; /* some internal error */
lookAhead -= rem;
}
(*srcLen) += lookAhead;
src += lookAhead;
inSize -= lookAhead;
@ -888,13 +978,13 @@ SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *sr
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc)
{
alloc->Free(alloc, p->probs);
p->probs = 0;
p->probs = NULL;
}
static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc)
{
alloc->Free(alloc, p->dic);
p->dic = 0;
p->dic = NULL;
}
void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc)
@ -932,12 +1022,12 @@ SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)
static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc)
{
UInt32 numProbs = LzmaProps_GetNumProbs(propNew);
if (p->probs == 0 || numProbs != p->numProbs)
if (!p->probs || numProbs != p->numProbs)
{
LzmaDec_FreeProbs(p, alloc);
p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb));
p->numProbs = numProbs;
if (p->probs == 0)
if (!p->probs)
return SZ_ERROR_MEM;
}
return SZ_OK;
@ -958,12 +1048,22 @@ SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAll
SizeT dicBufSize;
RINOK(LzmaProps_Decode(&propNew, props, propsSize));
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
dicBufSize = propNew.dicSize;
if (p->dic == 0 || dicBufSize != p->dicBufSize)
{
UInt32 dictSize = propNew.dicSize;
SizeT mask = ((UInt32)1 << 12) - 1;
if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;;
dicBufSize = ((SizeT)dictSize + mask) & ~mask;
if (dicBufSize < dictSize)
dicBufSize = dictSize;
}
if (!p->dic || dicBufSize != p->dicBufSize)
{
LzmaDec_FreeDict(p, alloc);
p->dic = (Byte *)alloc->Alloc(alloc, dicBufSize);
if (p->dic == 0)
if (!p->dic)
{
LzmaDec_FreeProbs(p, alloc);
return SZ_ERROR_MEM;
@ -980,28 +1080,21 @@ SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
{
CLzmaDec p;
SRes res;
SizeT inSize = *srcLen;
SizeT outSize = *destLen;
*srcLen = *destLen = 0;
SizeT outSize = *destLen, inSize = *srcLen;
*destLen = *srcLen = 0;
*status = LZMA_STATUS_NOT_SPECIFIED;
if (inSize < RC_INIT_SIZE)
return SZ_ERROR_INPUT_EOF;
LzmaDec_Construct(&p);
res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc);
if (res != 0)
return res;
RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc));
p.dic = dest;
p.dicBufSize = outSize;
LzmaDec_Init(&p);
*srcLen = inSize;
res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
*destLen = p.dicPos;
if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
res = SZ_ERROR_INPUT_EOF;
(*destLen) = p.dicPos;
LzmaDec_FreeProbs(&p, alloc);
return res;
}