cbfstool: Update LZMA encoder to LZMA SDK 9.12

This removes almost all C++ code (except the wrapper) Change-Id: I0f84070e3b6dc57c98d49a53150a140479b3221f Signed-off-by: Stefan Reinauer <reinauer@google.com> Reviewed-on: http://review.coreboot.org/1799 Tested-by: build bot (Jenkins) Reviewed-by: Ronald G. Minnich <rminnich@gmail.com>
2012-10-30 14:02:45 -07:00
parent a5b7067638
commit 2e200cde9a
63 changed files with 5910 additions and 6616 deletions
--- a/util/cbfstool/lzma/lzma.cc
+++ b/util/cbfstool/lzma/lzma.cc
@@ -0,0 +1,842 @@
+#include "endian.hh" /* For R64 */
+
+extern "C" {
+#include "C/LzmaDec.h"
+#include "C/LzmaEnc.h"
+}
+
+#include "lzma.hh"
+
+#include <algorithm> // min,max,swap
+#include <vector>
+#include <string>
+#include <cstring> // std::memcpy
+
+
+#include <cstdio>
+
+#include <stdint.h>
+
+/* We don't want threads */
+#ifdef linux
+#include <sched.h>
+#define ForceSwitchThread() sched_yield()
+#else
+#define ForceSwitchThread()
+#endif
+
+
+int LZMA_verbose = 0;
+
+// -fb
+unsigned LZMA_NumFastBytes = 273;
+/*from lzma.txt:
+          Set number of fast bytes - [5, 273], default: 273
+          Usually big number gives a little bit better compression ratio
+          and slower compression process.
+  from anonymous:
+This one is hard to explain... To my knowledge (please correct me if I
+am wrong), this refers to the optimal parsing algorithm. The algorithm
+tries many different combinations of matches to find the best one. If a
+match is found that is over the fb value, then it will not be optimised,
+and will just be used straight.
+This speeds up corner cases such as pic.
+*/
+
+/* apparently, 0 and 1 are valid values. 0 = fast mode */
+unsigned LZMA_AlgorithmNo  = 1;
+
+unsigned LZMA_MatchFinderCycles = 0; // default: 0
+
+// -pb
+unsigned LZMA_PosStateBits = 0; // default: 2, range: 0..4
+/*from lzma.txt:
+          pb switch is intended for periodical data
+          when period is equal 2^N.
+*/
+
+
+// -lp
+unsigned LZMA_LiteralPosStateBits = 0; // default: 0, range: 0..4
+/*from lzma.txt:
+          lp switch is intended for periodical data when period is
+          equal 2^N. For example, for 32-bit (4 bytes)
+          periodical data you can use lp=2.
+          Often it's better to set lc0, if you change lp switch.
+*/
+
+// -lc
+unsigned LZMA_LiteralContextBits = 1; // default: 3, range: 0..8
+/*from lzma.txt:
+          Sometimes lc=4 gives gain for big files.
+  from anonymous:
+The context for the literal coder is 2^(lc) long. The longer it is, the
+better the statistics, but also the slower it adapts. A tradeoff, which
+is why 3 or 4 is reccommended.
+*/
+
+/*
+
+Discoveries:
+
+ INODES:
+    Best LZMA for raw_inotab_inode(40->48): pb0 lp0 lc0
+    Best LZMA for raw_root_inode(28->32): pb0 lp0 lc0
+
+    Start LZMA(rootdir, 736 bytes)
+    Yay result with pb0 lp0 lc0: 218
+    Yay result with pb0 lp0 lc1: 217
+    Best LZMA for rootdir(736->217): pb0 lp0 lc1
+
+    Start LZMA(inotab, 379112 bytes)
+    Yay result with pb0 lp0 lc0: 24504
+    Best LZMA for inotab(379112->24504): pb0 lp0 lc0
+
+ BLKTAB:
+    Best LZMA for raw_blktab(10068->2940): pb2 lp2 lc0
+
+    ---with fastbytes=128---
+    Start LZMA(blktab, 12536608 bytes)
+    Yay result with pb0 lp0 lc0: 1386141
+    Yay result with pb0 lp1 lc0: 1308137
+    Yay result with pb0 lp2 lc0: 1305403
+    Yay result with pb0 lp3 lc0: 1303072
+    Yay result with pb1 lp1 lc0: 1238990
+    Yay result with pb1 lp2 lc0: 1227973
+    Yay result with pb1 lp3 lc0: 1221205
+    Yay result with pb2 lp1 lc0: 1197035
+    Yay result with pb2 lp2 lc0: 1188979
+    Yay result with pb2 lp3 lc0: 1184531
+    Yay result with pb3 lp1 lc0: 1183866
+    Yay result with pb3 lp2 lc0: 1172994
+    Yay result with pb3 lp3 lc0: 1169048
+    Best LZMA for blktab(12536608->1169048): pb3 lp3 lc0
+
+    It seems, lc=0 and pb=lp=N is a wise choice,
+    where N is 2 for packed blktab and 3 for unpacked.
+
+ FBLOCKS:
+    For SPC sound+code data, the best results
+     are between:
+      pb0 lp0 lc0 (10%)
+      pb0 lp0 lc1 (90%)
+     For inotab, these were observed:
+      pb1 lp0 lc1
+      pb2 lp0 lc0
+      pb1 lp1 lc0
+      pb3 lp1 lc0
+      pb1 lp2 lc0
+      pb2 lp1 lc0
+
+    For C source code data, the best results
+     are between:
+      pb1 lp0 lc3 (10%)
+      pb0 lp0 lc3 (90%)
+     Occasionally:
+      pb0 lp1 lc0
+      pb0 lp0 lc3 (mostly)
+      pb0 lp0 lc2
+      pb0 lp0 lc4
+     Occasionally 2:
+      pb0 lp0 lc8
+      pb0 lp0 lc4
+
+    BUT:
+    Best LZMA for fblock(204944->192060): pb0 lp4 lc8 -- surprise! (INOTAB PROBABLY)
+
+*/
+
+static UInt32 SelectDictionarySizeFor(unsigned datasize)
+{
+   #if 1
+    if(datasize >= (1 << 30U)) return 1 << 30U;
+    return datasize;
+   #else
+#ifdef __GNUC__
+    /* gnu c can optimize this switch statement into a fast binary
+     * search, but it cannot do so for the list of the if statements.
+     */
+    switch(datasize)
+    {
+        case 0 ... 512 : return 512;
+        case 513 ... 1024: return 2048;
+        case 1025 ... 4096: return 8192;
+        case 4097 ... 16384: return 32768;
+        case 16385 ... 65536: return 528288;
+        case 65537 ... 528288: return 1048576*4;
+        case 528289 ... 786432: return 1048576*16;
+        default: return 1048576*32;
+    }
+#else
+    if(datasize <= 512) return 512;
+    if(datasize <= 1024) return 1024;
+    if(datasize <= 4096) return 4096;
+    if(datasize <= 16384) return 32768;
+    if(datasize <= 65536) return 528288;
+    if(datasize <= 528288) return 1048576*4;
+    if(datasize <= 786432) reutrn 1048576*16;
+    return 32*1048576;
+#endif
+   #endif
+}
+
+static void *SzAlloc(void*, size_t size)
+    { return new unsigned char[size]; }
+static void SzFree(void*, void *address)
+    { unsigned char*a = (unsigned char*)address; delete[] a; }
+static ISzAlloc LZMAalloc = { SzAlloc, SzFree };
+
+class MemReader: public ISeqInStream
+{
+public:
+    const unsigned char* const indata;
+    const size_t         inlength;
+    size_t pos;
+public:
+    MemReader(const unsigned char* d, size_t l)
+        : ISeqInStream(), indata(d), inlength(l), pos(0)
+    {
+        Read = ReadMethod;
+    }
+    static SRes ReadMethod(void *pp, void *buf, size_t *size)
+    {
+        MemReader& p = *(MemReader*)pp;
+        size_t rem = p.inlength-p.pos;
+        size_t read = *size;
+        if(read > rem) read= rem;
+        std::memcpy(buf, &p.indata[p.pos], read);
+        *size = read;
+        p.pos += read;
+        return SZ_OK;
+    }
+};
+class MemWriter: public ISeqOutStream
+{
+public:
+    std::vector<unsigned char> buf;
+public:
+    MemWriter(): ISeqOutStream(), buf() { Write = WriteMethod; }
+
+    static size_t WriteMethod(void*pp, const void* from, size_t size)
+    {
+        MemWriter& p = *(MemWriter*)pp;
+        const unsigned char* i = (const unsigned char*)from;
+        p.buf.insert(p.buf.end(), i, i+size);
+        return size;
+    }
+};
+
+const std::vector<unsigned char> LZMACompress(const unsigned char* data, size_t length,
+    unsigned pb,
+    unsigned lp,
+    unsigned lc)
+{
+    return LZMACompress(data,length, pb,lp,lc,
+        SelectDictionarySizeFor(length));
+}
+
+const std::vector<unsigned char> LZMACompress(
+    const unsigned char* data, size_t length,
+    unsigned pb,
+    unsigned lp,
+    unsigned lc,
+    unsigned dictionarysize)
+{
+    if(!length) return std::vector<unsigned char>();
+
+    CLzmaEncProps props;
+    LzmaEncProps_Init(&props);
+    props.dictSize = dictionarysize;
+    props.pb       = pb;
+    props.lp       = lp;
+    props.lc       = lc;
+    props.fb       = LZMA_NumFastBytes;
+    props.mc       = LZMA_MatchFinderCycles;
+    props.algo     = LZMA_AlgorithmNo;
+    props.numThreads = 1;
+
+    switch(LZMA_AlgorithmNo)
+    {
+        case 0: // quick: HC4
+            props.btMode = 0;
+            props.level = 1;
+            break;
+        case 1: // full: BT4
+        default:
+            props.level = 9;
+            props.btMode       = 1;
+            props.numHashBytes = 4;
+            break;
+    }
+
+    CLzmaEncHandle p = LzmaEnc_Create(&LZMAalloc);
+    struct AutoReleaseLzmaEnc
+    {
+        AutoReleaseLzmaEnc(CLzmaEncHandle pp) : p(pp) { }
+        ~AutoReleaseLzmaEnc()
+        { LzmaEnc_Destroy(p, &LZMAalloc, &LZMAalloc); }
+        CLzmaEncHandle p;
+
+
+        AutoReleaseLzmaEnc(const AutoReleaseLzmaEnc&);
+        void operator=(const AutoReleaseLzmaEnc&);
+
+    } AutoReleaser(p); // Create a destructor that ensures
+    // that the CLzmaEncHandle is not leaked, even if an
+    // exception happens
+
+    int res = LzmaEnc_SetProps(p, &props);
+    if(res != SZ_OK)
+    {
+    Error:
+        return std::vector<unsigned char> ();
+    }
+
+    unsigned char propsEncoded[LZMA_PROPS_SIZE + 8];
+    size_t propsSize = sizeof propsEncoded;
+    res = LzmaEnc_WriteProperties(p, propsEncoded, &propsSize);
+    if(res != SZ_OK) goto Error;
+
+    MemReader is(data, length);
+    MemWriter os;
+    put_64(propsEncoded+LZMA_PROPS_SIZE, length);
+    os.buf.insert(os.buf.end(), propsEncoded, propsEncoded+LZMA_PROPS_SIZE+8);
+
+    res = LzmaEnc_Encode(p, &os, &is, 0, &LZMAalloc, &LZMAalloc);
+    if(res != SZ_OK) goto Error;
+
+    return os.buf;
+}
+
+const std::vector<unsigned char> LZMACompress(const unsigned char* data, size_t length)
+{
+    return LZMACompress(data, length,
+        LZMA_PosStateBits,
+        LZMA_LiteralPosStateBits,
+        LZMA_LiteralContextBits);
+}
+
+#undef RC_NORMALIZE
+
+const std::vector<unsigned char> LZMADeCompress
+    (const unsigned char* data, size_t length, bool& ok)
+{
+    if(length <= LZMA_PROPS_SIZE+8)
+    {
+    /*clearly_not_ok:*/
+        ok = false;
+        return std::vector<unsigned char> ();
+    }
+
+    uint_least64_t out_sizemax = get_64(&data[LZMA_PROPS_SIZE]);
+
+    /*if(out_sizemax >= (size_t)~0ULL)
+    {
+        // cannot even allocate a vector this large.
+        goto clearly_not_ok;
+    }*/
+
+    std::vector<unsigned char> result(out_sizemax);
+
+    ELzmaStatus status;
+    SizeT destlen = result.size();
+    SizeT srclen = length-(LZMA_PROPS_SIZE+8);
+    int res = LzmaDecode(
+        &result[0], &destlen,
+        &data[LZMA_PROPS_SIZE+8], &srclen,
+        &data[0], LZMA_PROPS_SIZE,
+        LZMA_FINISH_END,
+        &status,
+        &LZMAalloc);
+
+    /*
+    std::fprintf(stderr, "res=%d, status=%d, in_done=%d (buf=%d), out_done=%d (max=%d)\n",
+        res,
+        (int)status,
+        (int)srclen, (int)length,
+        (int)destlen, (int)out_sizemax);
+    */
+
+    ok = res == SZ_OK && (status == LZMA_STATUS_FINISHED_WITH_MARK
+                       || status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK)
+      && srclen == (length-(LZMA_PROPS_SIZE+8))
+      && destlen == out_sizemax;
+    return result;
+}
+
+const std::vector<unsigned char> LZMADeCompress
+    (const unsigned char* data, size_t length)
+{
+    bool ok_unused;
+    return LZMADeCompress(data, length, ok_unused);
+}
+
+#if 0
+#include <stdio.h>
+int main(void)
+{
+    char Buf[2048*2048];
+    int s = fread(Buf,1,sizeof(Buf),stdin);
+    std::vector<unsigned char> result = LZMADeCompress(std::vector<unsigned char>(Buf,Buf+s));
+    fwrite(&result[0],1,result.size(),stdout);
+}
+#endif
+
+const std::vector<unsigned char> LZMACompressHeavy(const unsigned char* data, size_t length,
+    const char* why)
+{
+    std::vector<unsigned char> bestresult;
+    char best[512];
+    bool first = true;
+    if(LZMA_verbose >= 1)
+    {
+        std::fprintf(stderr, "Start LZMA(%s, %u bytes)\n", why, (unsigned)length);
+        std::fflush(stderr);
+    }
+
+    unsigned minresultsize=0, maxresultsize=0;
+    unsigned sizemap[5][5][9] = {{{0}}};
+
+    bool use_small_dict = false;
+
+    for(int compress_mode = 0; compress_mode < (5*5*9); ++compress_mode)
+    {
+        const unsigned pb = compress_mode % 5;
+        const unsigned lp = (compress_mode / 5) % 5;
+        const unsigned lc = (compress_mode / 5 / 5) % 9;
+
+        std::vector<unsigned char>
+            result = use_small_dict
+                ? LZMACompress(data,length,pb,lp,lc, 4096)
+                : LZMACompress(data,length,pb,lp,lc);
+
+       {
+        sizemap[pb][lp][lc] = result.size();
+
+        if(first || result.size() < minresultsize) minresultsize = result.size();
+        if(first || result.size() > maxresultsize) maxresultsize = result.size();
+        if(first || result.size() < bestresult.size())
+        {
+            sprintf(best, "pb%u lp%u lc%u",
+                pb,lp,lc);
+            if(LZMA_verbose >= 1)
+                std::fprintf(stderr, "Yay result with %s: %u\n", best, (unsigned)result.size());
+            bestresult.swap(result);
+            first = false;
+        }
+        else
+        {
+            char tmp[512];
+            sprintf(tmp, "pb%u lp%u lc%u",
+                pb,lp,lc);
+            if(LZMA_verbose >= 2)
+                std::fprintf(stderr, "Blaa result with %s: %u\n", tmp, (unsigned)result.size());
+        }
+        if(LZMA_verbose >= 2)
+        {
+            std::fprintf(stderr, "%*s\n", (5 * (4+9+2)), "");
+            /* Visualize the size map: */
+            std::string lines[6] = {};
+            for(unsigned pbt = 0; pbt <= 4; ++pbt)
+            {
+                char buf[64]; sprintf(buf, "pb%u:%11s", pbt,"");
+                lines[0] += buf;
+
+                for(unsigned lpt = 0; lpt <= 4; ++lpt)
+                {
+                    char buf[64]; sprintf(buf, "lp%u:", lpt);
+                    std::string line;
+                    line += buf;
+                    for(unsigned lct = 0; lct <= 8; ++lct)
+                    {
+                        unsigned s = sizemap[pbt][lpt][lct];
+                        char c;
+                        if(!s) c = '.';
+                        else c = 'a' + ('z'-'a'+1)
+                                     * (s - minresultsize)
+                                     / (maxresultsize-minresultsize+1);
+                        line += c;
+                    }
+                    lines[1 + lpt] += line + "  ";
+                }
+            }
+            for(unsigned a=0; a<6; ++a) std::fprintf(stderr, "%s\n", lines[a].c_str());
+            std::fprintf(stderr, "\33[%uA", 7);
+        }
+       }
+    }
+    if(LZMA_verbose >= 2)
+        std::fprintf(stderr, "\n\n\n\n\n\n\n\n");
+
+    if(LZMA_verbose >= 1)
+    {
+        std::fprintf(stderr, "Best LZMA for %s(%u->%u): %s\n",
+            why,
+            (unsigned)length,
+            (unsigned)bestresult.size(),
+            best);
+    }
+    std::fflush(stderr);
+    return bestresult;
+}
+
+/*
+
+The LZMA compression power is controlled by these parameters:
+  Dictionary size (we use the maximum)
+  Compression algorithm (we use BT4, the heaviest available)
+  Number of fast bytes (we use the maximum)
+  pb (0..4), lp (0..4) and lc (0..8) -- the effect of these depends on data.
+
+Since the only parameters whose effect depends on the data to be compressed
+are the three (pb, lp, lc), the "auto" and "full" compression algorithms
+only try to find the optimal values for those.
+
+The "auto" LZMA compression algorithm is based on these two assumptions:
+  - It is possible to find the best value for each component (pb, lp, lc)
+    by individually testing the most effective one of them while keeping
+    the others static.
+    I.e.,    step 1: pb=<find best>, lp=0, lc=0
+             step 2: pb=<use result>, lp=<find best>, lc=0
+             step 3: pb=<use result>, lp=<use result>, lc=<find best>
+             final: pb=<use result>, lp=<use result>, lc=<use result>
+  - That the effect of each of these components forms a parabolic function
+    that has a starting point, ending point, and possibly a mountain or a
+    valley somewhere in the middle, but never a valley _and_ a mountain, nor
+    two valleys nor two mountains.
+These assumptions are not always true, but it gets very close to the optimum.
+
+The ParabolicFinder class below finds the lowest point in a parabolic curve
+with a small number of tests, determining the shape of the curve by sampling
+a few cue values as needed.
+
+The algorithm is like this:
+  Never check any value more than once.
+  Check the first two values.
+  If they differ, then check the last in sequence.
+    If not, then check everything in sequential order.
+  If the first two values and the last form an ascending sequence, accept the first value.
+    If they form a descending sequence, start Focus Mode
+    such that the focus lower limit is index 2 and upper
+    limit is the second last. Then check the second last.
+      If they don't, then check the third value of sequence,
+      and everything else in sequential order.
+  If in Focus Mode, check if being in the lower or upper end of the focus.
+    If in upper end, check if the current value is bigger than the next one.
+      If it is, end the process, because the smallest value has already been found.
+        If not, next check the value at focus_low, and increase focus_low.
+    If in lower end, check if the current value is bigger than the previous one.
+      If it is, end the process, because the smallest value has already been found.
+        If not, next check the value at focus_high, and decrease focus_high.
+
+For any sample space, it generally does 3 tests, but if it detects a curve
+forming a valley, it may do more.
+
+Note that ParabolicFinder does not _indicate_ the lowest value. It leaves that
+to the caller. It just stops searching when it thinks that no lower value will
+be found.
+
+Note: The effect of pb, lp and lc depend also on the dictionary size setting
+and compression algorithm. You cannot estimate the optimal value for those
+parameters reliably using different compression settings than in the actual case.
+
+*/
+class ParabolicFinder
+{
+public:
+    enum QueryState      { Unknown, Pending, Done };
+    enum InstructionType { HereYouGo, WaitingResults, End };
+public:
+    ParabolicFinder(unsigned Start, unsigned End)
+        : begin(Start),
+          results(End-Start+1, 0),
+          state  (End-Start+1, Unknown),
+          LeftRightSwap(false)
+    {
+    }
+
+    InstructionType GetNextInstruction(unsigned& attempt)
+    {
+      InstructionType result = End;
+
+      const int Last  = begin + results.size()-1;
+
+      #define RetIns(n) do{ result = (n); goto DoneCrit; }while(0)
+      #define RetVal(n) do{ state[attempt = (n)] = Pending; RetIns(HereYouGo); }while(0)
+
+      {
+        /*
+        std::fprintf(stderr, "NextInstruction...");
+        for(unsigned a=0; a<state.size(); ++a)
+            std::fprintf(stderr, " %u=%s", a,
+                state[a]==Unknown?"??"
+               :state[a]==Done?"Ok"
+               :"..");
+        std::fprintf(stderr, "\n");*/
+
+        if(CountUnknown() == 0)
+        {
+            // No unassigned slots remain. Don't need more workers.
+            RetIns(End);
+        }
+
+        if(1) // scope for local variables
+        {
+            // Alternate which side to do next if both are available.
+            bool LeftSideFirst = LeftRightSwap ^= 1;
+
+            // Check left side descend type
+            int LeftSideNext = -1; bool LeftSideDoable = false;
+            for(int c=0; c<=Last; ++c)
+                switch(state[c])
+                {
+                    case Unknown: LeftSideNext = c; LeftSideDoable = true; goto ExitLeftSideFor;
+                    case Pending: LeftSideNext = c; LeftSideDoable = false; goto ExitLeftSideFor;
+                    case Done:
+                        if(c == 0) continue;
+                        if(results[c] > results[c-1])
+                        {
+                            // Left side stopped descending.
+                            if(state[Last] != Unknown) RetIns(End);
+                            goto ExitLeftSideFor;
+                        }
+                        else if(results[c] == results[c-1])
+                            LeftSideFirst = true;
+                }
+        ExitLeftSideFor: ;
+
+            // Check right side descend type
+            int RightSideNext = -1; bool RightSideDoable = false;
+            for(int c=Last; c>=0; --c)
+                switch(state[c])
+                {
+                    case Unknown: RightSideNext = c; RightSideDoable = true; goto ExitRightSideFor;
+                    case Pending: RightSideNext = c; RightSideDoable = false; goto ExitRightSideFor;
+                    case Done:
+                        if(c == Last) continue;
+                        if(results[c] > results[c+1])
+                        {
+                            // Right side stopped descending.
+                            if(state[0] != Unknown) RetIns(End);
+                            goto ExitRightSideFor;
+                        }
+                        else if(results[c] == results[c+1])
+                            LeftSideFirst = false;
+                }
+        ExitRightSideFor: ;
+
+            if(!LeftSideFirst)
+                 { std::swap(LeftSideDoable, RightSideDoable);
+                   std::swap(LeftSideNext,   RightSideNext); }
+
+            if(LeftSideDoable) RetVal(LeftSideNext);
+            if(RightSideDoable) RetVal(RightSideNext);
+
+            // If we have excess threads and work to do, give them something
+            if(CountHandled() > 2) if(LeftSideNext >= 0) RetVal(LeftSideNext);
+            if(CountHandled() > 3) if(RightSideNext >= 0) RetVal(RightSideNext);
+
+            RetIns(WaitingResults);
+        }
+
+      DoneCrit: ;
+      }
+      return result;
+    }
+
+    void GotResult(unsigned attempt, unsigned value)
+    {
+      {
+        results[attempt] = value;
+        state[attempt]   = Done;
+      }
+    }
+
+private:
+    unsigned CountUnknown() const
+    {
+        unsigned result=0;
+        for(size_t a=0, b=state.size(); a<b; ++a)
+            if(state[a] == Unknown) ++result;
+        return result;
+    }
+    unsigned CountHandled() const
+    {
+        return state.size() - CountUnknown();
+    }
+private:
+    unsigned begin;
+    std::vector<unsigned>   results;
+    std::vector<QueryState> state;
+    bool LeftRightSwap;
+};
+
+static void LZMACompressAutoHelper(
+    const unsigned char* data, size_t length,
+    bool use_small_dict,
+    const char* why,
+    unsigned& pb, unsigned& lp, unsigned& lc,
+    unsigned& which_iterate, ParabolicFinder& finder,
+    bool&first, std::vector<unsigned char>& bestresult)
+{
+    for(;;)
+    {
+        unsigned t=0;
+        switch(finder.GetNextInstruction(t))
+        {
+            case ParabolicFinder::End:
+                return;
+            case ParabolicFinder::HereYouGo:
+                break;
+            case ParabolicFinder::WaitingResults:
+                ForceSwitchThread();
+                continue;
+        }
+
+        const unsigned try_pb = &which_iterate == &pb ? t : pb;
+        const unsigned try_lp = &which_iterate == &lp ? t : lp;
+        const unsigned try_lc = &which_iterate == &lc ? t : lc;
+
+        if(LZMA_verbose >= 2)
+            std::fprintf(stderr, "%s:Trying pb%u lp%u lc%u\n",
+                why,try_pb,try_lp,try_lc);
+
+        std::vector<unsigned char> result = use_small_dict
+            ? LZMACompress(data,length,try_pb,try_lp,try_lc, 65536)
+            : LZMACompress(data,length,try_pb,try_lp,try_lc);
+
+        if(LZMA_verbose >= 2)
+            std::fprintf(stderr, "%s:       pb%u lp%u lc%u -> %u\n",
+                why,try_pb,try_lp,try_lc, (unsigned)result.size());
+
+        finder.GotResult(t, result.size());
+
+      {
+        if(first || result.size() <= bestresult.size())
+        {
+            first    = false;
+            bestresult.swap(result);
+            which_iterate = t;
+        }
+      }
+    }
+}
+
+
+const std::vector<unsigned char> LZMACompressAuto(const unsigned char* data, size_t length,
+    const char* why)
+{
+    if(LZMA_verbose >= 1)
+    {
+        std::fprintf(stderr, "Start LZMA(%s, %u bytes)\n", why, (unsigned)length);
+        std::fflush(stderr);
+    }
+
+    unsigned backup_algorithm = LZMA_AlgorithmNo;
+
+    bool use_small_dict = false;//length >= 1048576;
+
+    if(use_small_dict) LZMA_AlgorithmNo = 0;
+
+    unsigned pb=0, lp=0, lc=0;
+
+    std::vector<unsigned char> bestresult;
+
+  {
+    ParabolicFinder pb_finder(0,4);
+    ParabolicFinder lp_finder(0,4);
+    ParabolicFinder lc_finder(0,8);
+    bool first=true;
+   {
+    /* Using parallelism here. However, we need barriers after
+     * each step, because the comparisons are made based on the
+     * result size, and if the pb/lp/lc values other than the
+     * one being focused change, it won't work. Only one parameter
+     * must change in the loop.
+     */
+
+    /* step 1: find best value in pb axis */
+    LZMACompressAutoHelper(data,length,use_small_dict,why,
+        pb, lp, lc,
+        pb, pb_finder, first, bestresult);
+
+
+    lp_finder.GotResult(lp, bestresult.size());
+
+    /* step 2: find best value in lp axis */
+    LZMACompressAutoHelper(data,length,use_small_dict,why,
+        pb, lp, lc,
+        lp, lp_finder, first, bestresult);
+
+    lc_finder.GotResult(lc, bestresult.size());
+
+    /* step 3: find best value in lc axis */
+    LZMACompressAutoHelper(data,length,use_small_dict,why,
+        pb, lp, lc,
+        lc, lc_finder, first, bestresult);
+   }
+  }
+
+    if(use_small_dict || LZMA_AlgorithmNo != backup_algorithm)
+    {
+        LZMA_AlgorithmNo = backup_algorithm;
+        bestresult = LZMACompress(data,length, pb,lp,lc);
+    }
+
+    if(LZMA_verbose >= 1)
+    {
+        std::fprintf(stderr, "Best LZMA for %s(%u->%u): pb%u lp%u lc%u\n",
+            why,
+            (unsigned)length,
+            (unsigned)bestresult.size(),
+            pb,lp,lc);
+    }
+    std::fflush(stderr);
+
+    return bestresult;
+}
+
+const std::vector<unsigned char>
+    DoLZMACompress(int HeavyLevel,
+        const unsigned char* data, size_t length,
+        const char* why)
+{
+    if(HeavyLevel >= 2) return LZMACompressHeavy(data,length, why);
+    if(HeavyLevel >= 1) return LZMACompressAuto(data,length, why);
+    return LZMACompress(data,length);
+}
+
+extern "C" {
+
+/**
+ * Compress a buffer with lzma
+ * Don't copy the result back if it is too large.
+ * @param in a pointer to the buffer
+ * @param in_len the length in bytes
+ * @param out a pointer to a buffer of at least size in_len
+ * @param out_len a pointer to the compressed length of in
+ */
+
+void do_lzma_compress(char *in, int in_len, char *out, int *out_len) {
+	std::vector<unsigned char> result;
+	result = LZMACompress(std::vector<unsigned char>(in, in + in_len));
+	*out_len = result.size();
+	if (*out_len < in_len)
+		std::memcpy(out, &result[0], *out_len);
+}
+
+void do_lzma_uncompress(char *dst, int dst_len, char *src, int src_len) {
+	std::vector<unsigned char> result;
+	result = LZMADeCompress(std::vector<unsigned char>(src, src + src_len));
+	if (result.size() <= (SizeT)dst_len)
+		std::memcpy(dst, &result[0], result.size());
+	else
+	{
+		fprintf(stderr, "Not copying %d bytes to %d-byte buffer!\n",
+			(unsigned int)result.size(), dst_len);
+		exit(1);
+	}
+}
+
+}
+