From 3536262f70abddae45f45455d3735d29282100f6 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 14 Apr 2022 14:19:19 -0400 Subject: [PATCH 1/8] Port noDict pipeline --- lib/compress/zstd_fast.c | 271 ++++++++++++++++++++++++++++++--------- 1 file changed, 209 insertions(+), 62 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 5da108c62..7b1d0dd43 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -587,11 +587,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( U32* const hashTable = ms->hashTable; U32 const hlog = cParams->hashLog; /* support stepSize of 0 */ - U32 const stepSize = cParams->targetLength + !(cParams->targetLength); + U32 const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; const BYTE* const base = ms->window.base; const BYTE* const dictBase = ms->window.dictBase; const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; const BYTE* anchor = istart; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); @@ -604,6 +603,29 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + const BYTE* ip0 = istart; + const BYTE* ip1; + const BYTE* ip2; + const BYTE* ip3; + U32 current0; + + + size_t hash0; /* hash for ip0 */ + size_t hash1; /* hash for ip1 */ + U32 idx; /* match idx for ip0 */ + const BYTE* idxBase; /* base pointer for idx */ + U32 mval; /* src or dict value at match idx */ + + U32 offcode; + const BYTE* match0; + size_t mLength; + const BYTE* matchEnd = 0; /* initialize to avoid warning, assert != 0 later */ + + size_t step; + const BYTE* nextStep; + const size_t kStepIncr = (1 << (kSearchStrength - 1)); (void)hasStep; /* not currently specialized on whether it's accelerated */ @@ -613,75 +635,200 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( if (prefixStartIndex == dictStartIndex) return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize); - /* Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - const size_t h = ZSTD_hashPtr(ip, hlog, mls); - const U32 matchIndex = hashTable[h]; - const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; - const BYTE* match = matchBase + matchIndex; - const U32 curr = (U32)(ip-base); - const U32 repIndex = curr + 1 - offset_1; + /* start each op */ +_start: /* Requires: ip0 */ + + step = stepSize; + nextStep = ip0 + kStepIncr; + + /* calculate positions, ip0 - anchor == 0, so we skip step calc */ + ip1 = ip0 + 1; + ip2 = ip0 + step; + ip3 = ip2 + 1; + + if (ip3 >= ilimit) { + goto _cleanup; + } + + hash0 = ZSTD_hashPtr(ip0, hlog, mls); + hash1 = ZSTD_hashPtr(ip1, hlog, mls); + + idx = hashTable[hash0]; + idxBase = idx < prefixStartIndex ? dictBase : base; + + do { + /* load repcode match for ip[2]*/ + const U32 current2 = (U32)(ip2 - base); + const U32 repIndex = current2 - offset_1; const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - hashTable[h] = curr; /* update hash table */ - DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr); + U32 rval; - if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ - & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */ - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, rLength); - ip += rLength; - anchor = ip; + /* load repcode match for ip[2] */ + assert(offset_1 > 0); + if ( ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */ + & (offset_1 < current2 - dictStartIndex) ) ) { + rval = MEM_read32(repBase + repIndex); } else { - if ( (matchIndex < dictStartIndex) || - (MEM_read32(match) != MEM_read32(ip)) ) { - assert(stepSize >= 1); - ip += ((ip-anchor) >> kSearchStrength) + stepSize; - continue; - } - { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; - const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; - U32 const offset = curr - matchIndex; - size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; - while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - offset_2 = offset_1; offset_1 = offset; /* update offset history */ - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); - ip += mLength; - anchor = ip; - } } + rval = MEM_read32(ip2) ^ 1; /* guaranteed to not match. */ + } - if (ip <= ilimit) { - /* Fill Table */ - hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; - hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */ - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); - hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } } + /* write back hash table entry */ + current0 = (U32)(ip0 - base); + hashTable[hash0] = current0; + + /* check repcode at ip[2] */ + if (MEM_read32(ip2) == rval) { + ip0 = ip2; + match0 = repBase + repIndex; + matchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + assert((match0 != prefixStart) & (match0 != dictStart)); + mLength = ip0[-1] == match0[-1]; + ip0 -= mLength; + match0 -= mLength; + offcode = REPCODE1_TO_OFFBASE; + mLength += 4; + goto _match; + } + + /* load match for ip[0] */ + if (idx >= dictStartIndex) { + mval = MEM_read32(idxBase + idx); + } else { + mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ + } + + /* check match at ip[0] */ + if (MEM_read32(ip0) == mval) { + /* found a match! */ + goto _offset; + } + + /* lookup ip[1] */ + idx = hashTable[hash1]; + idxBase = idx < prefixStartIndex ? dictBase : base; + + /* hash ip[2] */ + hash0 = hash1; + hash1 = ZSTD_hashPtr(ip2, hlog, mls); + + /* advance to next positions */ + ip0 = ip1; + ip1 = ip2; + ip2 = ip3; + + /* write back hash table entry */ + current0 = (U32)(ip0 - base); + hashTable[hash0] = current0; + + /* load match for ip[0] */ + if (idx >= dictStartIndex) { + mval = MEM_read32(idxBase + idx); + } else { + mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ + } + + /* check match at ip[0] */ + if (MEM_read32(ip0) == mval) { + /* found a match! */ + goto _offset; + } + + /* lookup ip[1] */ + idx = hashTable[hash1]; + idxBase = idx < prefixStartIndex ? dictBase : base; + + /* hash ip[2] */ + hash0 = hash1; + hash1 = ZSTD_hashPtr(ip2, hlog, mls); + + /* advance to next positions */ + ip0 = ip1; + ip1 = ip2; + ip2 = ip0 + step; + ip3 = ip1 + step; + + /* calculate step */ + if (ip2 >= nextStep) { + step++; + PREFETCH_L1(ip1 + 64); + PREFETCH_L1(ip1 + 128); + nextStep += kStepIncr; + } + } while (ip3 < ilimit); + +_cleanup: + /* Note that there are probably still a couple positions we could search. + * However, it seems to be a meaningful performance hit to try to search + * them. So let's not. */ /* save reps for next block */ - rep[0] = offset_1; - rep[1] = offset_2; + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; /* Return the last literals size */ return (size_t)(iend - anchor); + +_offset: /* Requires: ip0, idx */ + + /* Compute the offset code. */ + { U32 const offset = current0 - idx; + const BYTE* const lowMatchPtr = idx < prefixStartIndex ? dictStart : prefixStart; + matchEnd = idx < prefixStartIndex ? dictEnd : iend; + match0 = idxBase + idx; + offset_2 = offset_1; + offset_1 = offset; + offcode = OFFSET_TO_OFFBASE(offset); + mLength = 4; + + /* Count the backwards match length. */ + while (((ip0>anchor) & (match0>lowMatchPtr)) && (ip0[-1] == match0[-1])) { + ip0--; + match0--; + mLength++; + } } + +_match: /* Requires: ip0, match0, offcode */ + + /* Count the forward length. */ + assert(matchEnd != 0); + mLength += ZSTD_count_2segments(ip0 + mLength, match0 + mLength, iend, matchEnd, prefixStart); + + ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength); + + ip0 += mLength; + anchor = ip0; + + /* write next hash table entry */ + if (ip1 < ip0) { + hashTable[hash1] = (U32)(ip1 - base); + } + + /* Fill table and check for immediate repcode. */ + if (ip0 <= ilimit) { + /* Fill Table */ + assert(base+current0+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); + + assert(offset_2 > 0); + while (ip0 <= ilimit) { + U32 const repIndex2 = (U32)(ip0-base) - offset_2; + const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= (U32)(ip0-base) - dictStartIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip0)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); + ip0 += repLength2; + anchor = ip0; + continue; + } + break; + } } + + goto _start; } ZSTD_GEN_FAST_FN(extDict, 4, 0) From 2820efe7ec931906ce052771afc04ddd12a8cfa8 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 19 Apr 2022 11:36:06 -0400 Subject: [PATCH 2/8] Nits --- lib/compress/zstd_fast.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 7b1d0dd43..e201e8620 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -603,7 +603,6 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; U32 offset_1=rep[0], offset_2=rep[1]; - U32 offsetSaved = 0; const BYTE* ip0 = istart; const BYTE* ip1; @@ -616,7 +615,6 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( size_t hash1; /* hash for ip1 */ U32 idx; /* match idx for ip0 */ const BYTE* idxBase; /* base pointer for idx */ - U32 mval; /* src or dict value at match idx */ U32 offcode; const BYTE* match0; @@ -657,13 +655,13 @@ _start: /* Requires: ip0 */ idxBase = idx < prefixStartIndex ? dictBase : base; do { - /* load repcode match for ip[2]*/ + U32 mval; /* src or dict value at match idx */ + + /* load repcode match for ip[2] */ const U32 current2 = (U32)(ip2 - base); const U32 repIndex = current2 - offset_1; const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; U32 rval; - - /* load repcode match for ip[2] */ assert(offset_1 > 0); if ( ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */ & (offset_1 < current2 - dictStartIndex) ) ) { @@ -762,13 +760,13 @@ _cleanup: * them. So let's not. */ /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; + rep[0] = offset_1; + rep[1] = offset_2; /* Return the last literals size */ return (size_t)(iend - anchor); -_offset: /* Requires: ip0, idx */ +_offset: /* Requires: ip0, idx, idxBase */ /* Compute the offset code. */ { U32 const offset = current0 - idx; @@ -787,7 +785,7 @@ _offset: /* Requires: ip0, idx */ mLength++; } } -_match: /* Requires: ip0, match0, offcode */ +_match: /* Requires: ip0, match0, offcode, matchEnd */ /* Count the forward length. */ assert(matchEnd != 0); From 809f65291266de966ba4262220992f5f7e7903a0 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 20 Apr 2022 11:50:00 -0400 Subject: [PATCH 3/8] Optimize repcode predicate, hardcode hasStep == 0 scenario, cosmetic fixes --- lib/compress/zstd_fast.c | 151 +++++++++++++++++++++------------------ 1 file changed, 81 insertions(+), 70 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index e201e8620..c6e9dd33a 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -587,7 +587,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( U32* const hashTable = ms->hashTable; U32 const hlog = cParams->hashLog; /* support stepSize of 0 */ - U32 const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; + size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2; const BYTE* const base = ms->window.base; const BYTE* const dictBase = ms->window.dictBase; const BYTE* const istart = (const BYTE*)src; @@ -625,14 +625,18 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* nextStep; const size_t kStepIncr = (1 << (kSearchStrength - 1)); - (void)hasStep; /* not currently specialized on whether it's accelerated */ - DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); /* switch to "regular" variant if extDict is invalidated due to maxDistance */ if (prefixStartIndex == dictStartIndex) return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize); + { U32 const curr = (U32)(ip0 - base); + U32 const maxRep = curr - dictStartIndex; + if (offset_2 >= maxRep) offset_2 = 0; + if (offset_1 >= maxRep) offset_1 = 0; + } + /* start each op */ _start: /* Requires: ip0 */ @@ -655,51 +659,44 @@ _start: /* Requires: ip0 */ idxBase = idx < prefixStartIndex ? dictBase : base; do { - U32 mval; /* src or dict value at match idx */ + { /* load repcode match for ip[2] */ + U32 const current2 = (U32)(ip2 - base); + U32 const repIndex = current2 - offset_1; + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + U32 rval; + if ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */ + & (offset_1 > 0) ) { + rval = MEM_read32(repBase + repIndex); + } else { + rval = MEM_read32(ip2) ^ 1; /* guaranteed to not match. */ + } - /* load repcode match for ip[2] */ - const U32 current2 = (U32)(ip2 - base); - const U32 repIndex = current2 - offset_1; - const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; - U32 rval; - assert(offset_1 > 0); - if ( ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */ - & (offset_1 < current2 - dictStartIndex) ) ) { - rval = MEM_read32(repBase + repIndex); - } else { - rval = MEM_read32(ip2) ^ 1; /* guaranteed to not match. */ - } + /* write back hash table entry */ + current0 = (U32)(ip0 - base); + hashTable[hash0] = current0; - /* write back hash table entry */ - current0 = (U32)(ip0 - base); - hashTable[hash0] = current0; + /* check repcode at ip[2] */ + if (MEM_read32(ip2) == rval) { + ip0 = ip2; + match0 = repBase + repIndex; + matchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + assert((match0 != prefixStart) & (match0 != dictStart)); + mLength = ip0[-1] == match0[-1]; + ip0 -= mLength; + match0 -= mLength; + offcode = REPCODE1_TO_OFFBASE; + mLength += 4; + goto _match; + } } - /* check repcode at ip[2] */ - if (MEM_read32(ip2) == rval) { - ip0 = ip2; - match0 = repBase + repIndex; - matchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - assert((match0 != prefixStart) & (match0 != dictStart)); - mLength = ip0[-1] == match0[-1]; - ip0 -= mLength; - match0 -= mLength; - offcode = REPCODE1_TO_OFFBASE; - mLength += 4; - goto _match; - } + { /* load match for ip[0] */ + U32 const mval = idx >= dictStartIndex ? MEM_read32(idxBase + idx) : MEM_read32(ip0) ^ 1; /* guaranteed not to match */ - /* load match for ip[0] */ - if (idx >= dictStartIndex) { - mval = MEM_read32(idxBase + idx); - } else { - mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ - } - - /* check match at ip[0] */ - if (MEM_read32(ip0) == mval) { - /* found a match! */ - goto _offset; - } + /* check match at ip[0] */ + if (MEM_read32(ip0) == mval) { + /* found a match! */ + goto _offset; + } } /* lookup ip[1] */ idx = hashTable[hash1]; @@ -718,18 +715,14 @@ _start: /* Requires: ip0 */ current0 = (U32)(ip0 - base); hashTable[hash0] = current0; - /* load match for ip[0] */ - if (idx >= dictStartIndex) { - mval = MEM_read32(idxBase + idx); - } else { - mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ - } + { /* load match for ip[0] */ + U32 const mval = idx >= dictStartIndex ? MEM_read32(idxBase + idx) : MEM_read32(ip0) ^ 1; /* guaranteed not to match */ - /* check match at ip[0] */ - if (MEM_read32(ip0) == mval) { - /* found a match! */ - goto _offset; - } + /* check match at ip[0] */ + if (MEM_read32(ip0) == mval) { + /* found a match! */ + goto _offset; + } } /* lookup ip[1] */ idx = hashTable[hash1]; @@ -760,8 +753,8 @@ _cleanup: * them. So let's not. */ /* save reps for next block */ - rep[0] = offset_1; - rep[1] = offset_2; + rep[0] = offset_1 ? offset_1 : rep[0]; + rep[1] = offset_2 ? offset_2 : rep[1]; /* Return the last literals size */ return (size_t)(iend - anchor); @@ -808,11 +801,10 @@ _match: /* Requires: ip0, match0, offcode, matchEnd */ hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); - assert(offset_2 > 0); while (ip0 <= ilimit) { U32 const repIndex2 = (U32)(ip0-base) - offset_2; const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= (U32)(ip0-base) - dictStartIndex)) /* intentional overflow */ + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 > 0)) /* intentional underflow */ && (MEM_read32(repMatch2) == MEM_read32(ip0)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; @@ -829,6 +821,11 @@ _match: /* Requires: ip0, match0, offcode, matchEnd */ goto _start; } +ZSTD_GEN_FAST_FN(extDict, 4, 1) +ZSTD_GEN_FAST_FN(extDict, 5, 1) +ZSTD_GEN_FAST_FN(extDict, 6, 1) +ZSTD_GEN_FAST_FN(extDict, 7, 1) + ZSTD_GEN_FAST_FN(extDict, 4, 0) ZSTD_GEN_FAST_FN(extDict, 5, 0) ZSTD_GEN_FAST_FN(extDict, 6, 0) @@ -839,16 +836,30 @@ size_t ZSTD_compressBlock_fast_extDict( void const* src, size_t srcSize) { U32 const mls = ms->cParams.minMatch; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize); - case 5 : - return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize); - case 6 : - return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize); - case 7 : - return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize); + assert(ms->dictMatchState == NULL); + if (ms->cParams.targetLength > 1) { + switch (mls) { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_extDict_4_1(ms, seqStore, rep, src, srcSize); + case 5 : + return ZSTD_compressBlock_fast_extDict_5_1(ms, seqStore, rep, src, srcSize); + case 6 : + return ZSTD_compressBlock_fast_extDict_6_1(ms, seqStore, rep, src, srcSize); + case 7 : + return ZSTD_compressBlock_fast_extDict_7_1(ms, seqStore, rep, src, srcSize); + } + } else { + switch (mls) { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize); + case 5 : + return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize); + case 6 : + return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize); + case 7 : + return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize); + } } } From 518cb83833074d304dfcaa93cfc16039ea4683c8 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 26 Apr 2022 17:54:25 -0400 Subject: [PATCH 4/8] Hardcode repcode safety check, fix cosmetic nits --- lib/compress/zstd_fast.c | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index c6e9dd33a..49a2add75 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -581,13 +581,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState( static size_t ZSTD_compressBlock_fast_extDict_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls, U32 const hasStep) + void const* src, size_t srcSize, U32 const mls, U32 const commonScenario) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; U32 const hlog = cParams->hashLog; /* support stepSize of 0 */ - size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2; + size_t const stepSize = commonScenario ? 2 : (cParams->targetLength + !(cParams->targetLength) + 1); const BYTE* const base = ms->window.base; const BYTE* const dictBase = ms->window.dictBase; const BYTE* const istart = (const BYTE*)src; @@ -633,9 +633,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( { U32 const curr = (U32)(ip0 - base); U32 const maxRep = curr - dictStartIndex; - if (offset_2 >= maxRep) offset_2 = 0; - if (offset_1 >= maxRep) offset_1 = 0; - } + if (commonScenario) { + assert(offset_2 < maxRep); + assert(offset_1 < maxRep); + } else { + if (offset_2 >= maxRep) offset_2 = 0; + if (offset_1 >= maxRep) offset_1 = 0; + } } /* start each op */ _start: /* Requires: ip0 */ @@ -665,7 +669,7 @@ _start: /* Requires: ip0 */ const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; U32 rval; if ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */ - & (offset_1 > 0) ) { + & (commonScenario | (offset_1 > 0)) ) { rval = MEM_read32(repBase + repIndex); } else { rval = MEM_read32(ip2) ^ 1; /* guaranteed to not match. */ @@ -690,7 +694,9 @@ _start: /* Requires: ip0 */ } } { /* load match for ip[0] */ - U32 const mval = idx >= dictStartIndex ? MEM_read32(idxBase + idx) : MEM_read32(ip0) ^ 1; /* guaranteed not to match */ + U32 const mval = idx >= dictStartIndex ? + MEM_read32(idxBase + idx) : + MEM_read32(ip0) ^ 1; /* guaranteed not to match */ /* check match at ip[0] */ if (MEM_read32(ip0) == mval) { @@ -716,7 +722,9 @@ _start: /* Requires: ip0 */ hashTable[hash0] = current0; { /* load match for ip[0] */ - U32 const mval = idx >= dictStartIndex ? MEM_read32(idxBase + idx) : MEM_read32(ip0) ^ 1; /* guaranteed not to match */ + U32 const mval = idx >= dictStartIndex ? + MEM_read32(idxBase + idx) : + MEM_read32(ip0) ^ 1; /* guaranteed not to match */ /* check match at ip[0] */ if (MEM_read32(ip0) == mval) { @@ -804,7 +812,7 @@ _match: /* Requires: ip0, match0, offcode, matchEnd */ while (ip0 <= ilimit) { U32 const repIndex2 = (U32)(ip0-base) - offset_2; const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 > 0)) /* intentional underflow */ + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (commonScenario | (offset_2 > 0))) /* intentional underflow */ && (MEM_read32(repMatch2) == MEM_read32(ip0)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; @@ -836,8 +844,17 @@ size_t ZSTD_compressBlock_fast_extDict( void const* src, size_t srcSize) { U32 const mls = ms->cParams.minMatch; + const BYTE* const istart = (const BYTE*)src; + const BYTE* const base = ms->window.base; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, ms->cParams.windowLog); + U32 const curr = (U32)(istart - base); + U32 const maxRep = curr - lowLimit; + U32 const repOffsetsAreValid = (rep[0] < maxRep) & (rep[1] < maxRep); + + assert((rep[0] > 0) & (rep[1] > 0)); assert(ms->dictMatchState == NULL); - if (ms->cParams.targetLength > 1) { + if ((ms->cParams.targetLength <= 1) & repOffsetsAreValid) { switch (mls) { default: /* includes case 3 */ case 4 : From 6a2e1f7c69f32427afc2f0273d3f4fe923a98a94 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 27 Apr 2022 18:16:21 -0400 Subject: [PATCH 5/8] Revert "Hardcode repcode safety check, fix cosmetic nits" This reverts commit 518cb83833074d304dfcaa93cfc16039ea4683c8. --- lib/compress/zstd_fast.c | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 49a2add75..c6e9dd33a 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -581,13 +581,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState( static size_t ZSTD_compressBlock_fast_extDict_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls, U32 const commonScenario) + void const* src, size_t srcSize, U32 const mls, U32 const hasStep) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; U32 const hlog = cParams->hashLog; /* support stepSize of 0 */ - size_t const stepSize = commonScenario ? 2 : (cParams->targetLength + !(cParams->targetLength) + 1); + size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2; const BYTE* const base = ms->window.base; const BYTE* const dictBase = ms->window.dictBase; const BYTE* const istart = (const BYTE*)src; @@ -633,13 +633,9 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( { U32 const curr = (U32)(ip0 - base); U32 const maxRep = curr - dictStartIndex; - if (commonScenario) { - assert(offset_2 < maxRep); - assert(offset_1 < maxRep); - } else { - if (offset_2 >= maxRep) offset_2 = 0; - if (offset_1 >= maxRep) offset_1 = 0; - } } + if (offset_2 >= maxRep) offset_2 = 0; + if (offset_1 >= maxRep) offset_1 = 0; + } /* start each op */ _start: /* Requires: ip0 */ @@ -669,7 +665,7 @@ _start: /* Requires: ip0 */ const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; U32 rval; if ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */ - & (commonScenario | (offset_1 > 0)) ) { + & (offset_1 > 0) ) { rval = MEM_read32(repBase + repIndex); } else { rval = MEM_read32(ip2) ^ 1; /* guaranteed to not match. */ @@ -694,9 +690,7 @@ _start: /* Requires: ip0 */ } } { /* load match for ip[0] */ - U32 const mval = idx >= dictStartIndex ? - MEM_read32(idxBase + idx) : - MEM_read32(ip0) ^ 1; /* guaranteed not to match */ + U32 const mval = idx >= dictStartIndex ? MEM_read32(idxBase + idx) : MEM_read32(ip0) ^ 1; /* guaranteed not to match */ /* check match at ip[0] */ if (MEM_read32(ip0) == mval) { @@ -722,9 +716,7 @@ _start: /* Requires: ip0 */ hashTable[hash0] = current0; { /* load match for ip[0] */ - U32 const mval = idx >= dictStartIndex ? - MEM_read32(idxBase + idx) : - MEM_read32(ip0) ^ 1; /* guaranteed not to match */ + U32 const mval = idx >= dictStartIndex ? MEM_read32(idxBase + idx) : MEM_read32(ip0) ^ 1; /* guaranteed not to match */ /* check match at ip[0] */ if (MEM_read32(ip0) == mval) { @@ -812,7 +804,7 @@ _match: /* Requires: ip0, match0, offcode, matchEnd */ while (ip0 <= ilimit) { U32 const repIndex2 = (U32)(ip0-base) - offset_2; const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (commonScenario | (offset_2 > 0))) /* intentional underflow */ + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 > 0)) /* intentional underflow */ && (MEM_read32(repMatch2) == MEM_read32(ip0)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; @@ -844,17 +836,8 @@ size_t ZSTD_compressBlock_fast_extDict( void const* src, size_t srcSize) { U32 const mls = ms->cParams.minMatch; - const BYTE* const istart = (const BYTE*)src; - const BYTE* const base = ms->window.base; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, ms->cParams.windowLog); - U32 const curr = (U32)(istart - base); - U32 const maxRep = curr - lowLimit; - U32 const repOffsetsAreValid = (rep[0] < maxRep) & (rep[1] < maxRep); - - assert((rep[0] > 0) & (rep[1] > 0)); assert(ms->dictMatchState == NULL); - if ((ms->cParams.targetLength <= 1) & repOffsetsAreValid) { + if (ms->cParams.targetLength > 1) { switch (mls) { default: /* includes case 3 */ case 4 : From ce6b69f5c593a4ee06f8a09517141dbd1ee12621 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 28 Apr 2022 14:49:45 -0400 Subject: [PATCH 6/8] Final nit --- lib/compress/zstd_fast.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index c6e9dd33a..959a392a4 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -690,7 +690,9 @@ _start: /* Requires: ip0 */ } } { /* load match for ip[0] */ - U32 const mval = idx >= dictStartIndex ? MEM_read32(idxBase + idx) : MEM_read32(ip0) ^ 1; /* guaranteed not to match */ + U32 const mval = idx >= dictStartIndex ? + MEM_read32(idxBase + idx) : + MEM_read32(ip0) ^ 1; /* guaranteed not to match */ /* check match at ip[0] */ if (MEM_read32(ip0) == mval) { @@ -716,7 +718,9 @@ _start: /* Requires: ip0 */ hashTable[hash0] = current0; { /* load match for ip[0] */ - U32 const mval = idx >= dictStartIndex ? MEM_read32(idxBase + idx) : MEM_read32(ip0) ^ 1; /* guaranteed not to match */ + U32 const mval = idx >= dictStartIndex ? + MEM_read32(idxBase + idx) : + MEM_read32(ip0) ^ 1; /* guaranteed not to match */ /* check match at ip[0] */ if (MEM_read32(ip0) == mval) { From ac371be27b443e488de824a7a0e365fd6d4ac536 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 28 Apr 2022 18:05:39 -0400 Subject: [PATCH 7/8] Remove hasStep variant (not enough wins to justify the code size increase) --- lib/compress/zstd_fast.c | 44 +++++++++++++--------------------------- 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 959a392a4..198ccaee3 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -587,7 +587,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( U32* const hashTable = ms->hashTable; U32 const hlog = cParams->hashLog; /* support stepSize of 0 */ - size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2; + size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; const BYTE* const base = ms->window.base; const BYTE* const dictBase = ms->window.dictBase; const BYTE* const istart = (const BYTE*)src; @@ -625,6 +625,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* nextStep; const size_t kStepIncr = (1 << (kSearchStrength - 1)); + (void)hasStep; /* not currently specialized on whether it's accelerated */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); /* switch to "regular" variant if extDict is invalidated due to maxDistance */ @@ -825,11 +827,6 @@ _match: /* Requires: ip0, match0, offcode, matchEnd */ goto _start; } -ZSTD_GEN_FAST_FN(extDict, 4, 1) -ZSTD_GEN_FAST_FN(extDict, 5, 1) -ZSTD_GEN_FAST_FN(extDict, 6, 1) -ZSTD_GEN_FAST_FN(extDict, 7, 1) - ZSTD_GEN_FAST_FN(extDict, 4, 0) ZSTD_GEN_FAST_FN(extDict, 5, 0) ZSTD_GEN_FAST_FN(extDict, 6, 0) @@ -841,29 +838,16 @@ size_t ZSTD_compressBlock_fast_extDict( { U32 const mls = ms->cParams.minMatch; assert(ms->dictMatchState == NULL); - if (ms->cParams.targetLength > 1) { - switch (mls) { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_extDict_4_1(ms, seqStore, rep, src, srcSize); - case 5 : - return ZSTD_compressBlock_fast_extDict_5_1(ms, seqStore, rep, src, srcSize); - case 6 : - return ZSTD_compressBlock_fast_extDict_6_1(ms, seqStore, rep, src, srcSize); - case 7 : - return ZSTD_compressBlock_fast_extDict_7_1(ms, seqStore, rep, src, srcSize); - } - } else { - switch (mls) { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize); - case 5 : - return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize); - case 6 : - return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize); - case 7 : - return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize); - } + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize); + case 5 : + return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize); + case 6 : + return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize); + case 7 : + return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize); } } From 3be9a81e46d7b0672588ecd167ff5ba9cc19a2ec Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 4 May 2022 16:05:37 -0400 Subject: [PATCH 8/8] Update results.csv --- tests/regression/results.csv | 190 +++++++++++++++++------------------ 1 file changed, 95 insertions(+), 95 deletions(-) diff --git a/tests/regression/results.csv b/tests/regression/results.csv index 3385c5049..e6a4af271 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -59,15 +59,15 @@ silesia, uncompressed literals optimal, compress silesia, huffman literals, compress cctx, 6172178 silesia, multithreaded with advanced params, compress cctx, 4842075 github, level -5, compress cctx, 204411 -github, level -5 with dict, compress cctx, 47294 +github, level -5 with dict, compress cctx, 52059 github, level -3, compress cctx, 193253 -github, level -3 with dict, compress cctx, 48047 +github, level -3 with dict, compress cctx, 46787 github, level -1, compress cctx, 175468 -github, level -1 with dict, compress cctx, 43527 +github, level -1 with dict, compress cctx, 43585 github, level 0, compress cctx, 136332 github, level 0 with dict, compress cctx, 41534 github, level 1, compress cctx, 142365 -github, level 1 with dict, compress cctx, 42157 +github, level 1 with dict, compress cctx, 42259 github, level 3, compress cctx, 136332 github, level 3 with dict, compress cctx, 41534 github, level 4, compress cctx, 136199 @@ -188,15 +188,15 @@ github, uncompressed literals optimal, zstdcli, github, huffman literals, zstdcli, 144365 github, multithreaded with advanced params, zstdcli, 167911 github.tar, level -5, zstdcli, 52114 -github.tar, level -5 with dict, zstdcli, 46502 +github.tar, level -5 with dict, zstdcli, 51074 github.tar, level -3, zstdcli, 45682 -github.tar, level -3 with dict, zstdcli, 42181 +github.tar, level -3 with dict, zstdcli, 44660 github.tar, level -1, zstdcli, 42564 -github.tar, level -1 with dict, zstdcli, 41140 +github.tar, level -1 with dict, zstdcli, 41155 github.tar, level 0, zstdcli, 38835 github.tar, level 0 with dict, zstdcli, 37999 github.tar, level 1, zstdcli, 39204 -github.tar, level 1 with dict, zstdcli, 38288 +github.tar, level 1 with dict, zstdcli, 38093 github.tar, level 3, zstdcli, 38835 github.tar, level 3 with dict, zstdcli, 37999 github.tar, level 4, zstdcli, 38897 @@ -312,8 +312,8 @@ github, level 1, advanced github, level 1 with dict, advanced one pass, 41682 github, level 1 with dict dms, advanced one pass, 41682 github, level 1 with dict dds, advanced one pass, 41682 -github, level 1 with dict copy, advanced one pass, 41674 -github, level 1 with dict load, advanced one pass, 43755 +github, level 1 with dict copy, advanced one pass, 41698 +github, level 1 with dict load, advanced one pass, 43814 github, level 3, advanced one pass, 136332 github, level 3 with dict, advanced one pass, 41148 github, level 3 with dict dms, advanced one pass, 41148 @@ -422,11 +422,11 @@ github, uncompressed literals optimal, advanced github, huffman literals, advanced one pass, 142365 github, multithreaded with advanced params, advanced one pass, 165911 github.tar, level -5, advanced one pass, 52110 -github.tar, level -5 with dict, advanced one pass, 46498 +github.tar, level -5 with dict, advanced one pass, 51070 github.tar, level -3, advanced one pass, 45678 -github.tar, level -3 with dict, advanced one pass, 42177 +github.tar, level -3 with dict, advanced one pass, 44656 github.tar, level -1, advanced one pass, 42560 -github.tar, level -1 with dict, advanced one pass, 41136 +github.tar, level -1 with dict, advanced one pass, 41151 github.tar, level 0, advanced one pass, 38831 github.tar, level 0 with dict, advanced one pass, 37995 github.tar, level 0 with dict dms, advanced one pass, 38003 @@ -434,11 +434,11 @@ github.tar, level 0 with dict dds, advanced github.tar, level 0 with dict copy, advanced one pass, 37995 github.tar, level 0 with dict load, advanced one pass, 37956 github.tar, level 1, advanced one pass, 39200 -github.tar, level 1 with dict, advanced one pass, 38284 +github.tar, level 1 with dict, advanced one pass, 38089 github.tar, level 1 with dict dms, advanced one pass, 38294 github.tar, level 1 with dict dds, advanced one pass, 38294 -github.tar, level 1 with dict copy, advanced one pass, 38284 -github.tar, level 1 with dict load, advanced one pass, 38724 +github.tar, level 1 with dict copy, advanced one pass, 38089 +github.tar, level 1 with dict load, advanced one pass, 38364 github.tar, level 3, advanced one pass, 38831 github.tar, level 3 with dict, advanced one pass, 37995 github.tar, level 3 with dict dms, advanced one pass, 38003 @@ -630,8 +630,8 @@ github, level 1, advanced github, level 1 with dict, advanced one pass small out, 41682 github, level 1 with dict dms, advanced one pass small out, 41682 github, level 1 with dict dds, advanced one pass small out, 41682 -github, level 1 with dict copy, advanced one pass small out, 41674 -github, level 1 with dict load, advanced one pass small out, 43755 +github, level 1 with dict copy, advanced one pass small out, 41698 +github, level 1 with dict load, advanced one pass small out, 43814 github, level 3, advanced one pass small out, 136332 github, level 3 with dict, advanced one pass small out, 41148 github, level 3 with dict dms, advanced one pass small out, 41148 @@ -740,11 +740,11 @@ github, uncompressed literals optimal, advanced github, huffman literals, advanced one pass small out, 142365 github, multithreaded with advanced params, advanced one pass small out, 165911 github.tar, level -5, advanced one pass small out, 52110 -github.tar, level -5 with dict, advanced one pass small out, 46498 +github.tar, level -5 with dict, advanced one pass small out, 51070 github.tar, level -3, advanced one pass small out, 45678 -github.tar, level -3 with dict, advanced one pass small out, 42177 +github.tar, level -3 with dict, advanced one pass small out, 44656 github.tar, level -1, advanced one pass small out, 42560 -github.tar, level -1 with dict, advanced one pass small out, 41136 +github.tar, level -1 with dict, advanced one pass small out, 41151 github.tar, level 0, advanced one pass small out, 38831 github.tar, level 0 with dict, advanced one pass small out, 37995 github.tar, level 0 with dict dms, advanced one pass small out, 38003 @@ -752,11 +752,11 @@ github.tar, level 0 with dict dds, advanced github.tar, level 0 with dict copy, advanced one pass small out, 37995 github.tar, level 0 with dict load, advanced one pass small out, 37956 github.tar, level 1, advanced one pass small out, 39200 -github.tar, level 1 with dict, advanced one pass small out, 38284 +github.tar, level 1 with dict, advanced one pass small out, 38089 github.tar, level 1 with dict dms, advanced one pass small out, 38294 github.tar, level 1 with dict dds, advanced one pass small out, 38294 -github.tar, level 1 with dict copy, advanced one pass small out, 38284 -github.tar, level 1 with dict load, advanced one pass small out, 38724 +github.tar, level 1 with dict copy, advanced one pass small out, 38089 +github.tar, level 1 with dict load, advanced one pass small out, 38364 github.tar, level 3, advanced one pass small out, 38831 github.tar, level 3 with dict, advanced one pass small out, 37995 github.tar, level 3 with dict dms, advanced one pass small out, 38003 @@ -864,11 +864,11 @@ github.tar, uncompressed literals, advanced github.tar, uncompressed literals optimal, advanced one pass small out, 35397 github.tar, huffman literals, advanced one pass small out, 38853 github.tar, multithreaded with advanced params, advanced one pass small out, 41525 -silesia, level -5, advanced streaming, 6963781 -silesia, level -3, advanced streaming, 6610376 -silesia, level -1, advanced streaming, 6179294 +silesia, level -5, advanced streaming, 6852424 +silesia, level -3, advanced streaming, 6503413 +silesia, level -1, advanced streaming, 6172179 silesia, level 0, advanced streaming, 4842075 -silesia, level 1, advanced streaming, 5310178 +silesia, level 1, advanced streaming, 5306426 silesia, level 3, advanced streaming, 4842075 silesia, level 4, advanced streaming, 4779186 silesia, level 5 row 1, advanced streaming, 4666323 @@ -896,13 +896,13 @@ silesia, small chain log, advanced silesia, explicit params, advanced streaming, 4795452 silesia, uncompressed literals, advanced streaming, 5120566 silesia, uncompressed literals optimal, advanced streaming, 4319518 -silesia, huffman literals, advanced streaming, 5327881 +silesia, huffman literals, advanced streaming, 5321346 silesia, multithreaded with advanced params, advanced streaming, 5120566 -silesia.tar, level -5, advanced streaming, 7043687 -silesia.tar, level -3, advanced streaming, 6671317 -silesia.tar, level -1, advanced streaming, 6187457 +silesia.tar, level -5, advanced streaming, 6853609 +silesia.tar, level -3, advanced streaming, 6505969 +silesia.tar, level -1, advanced streaming, 6179028 silesia.tar, level 0, advanced streaming, 4859271 -silesia.tar, level 1, advanced streaming, 5333896 +silesia.tar, level 1, advanced streaming, 5327377 silesia.tar, level 3, advanced streaming, 4859271 silesia.tar, level 4, advanced streaming, 4797470 silesia.tar, level 5 row 1, advanced streaming, 4677748 @@ -930,7 +930,7 @@ silesia.tar, small chain log, advanced silesia.tar, explicit params, advanced streaming, 4806873 silesia.tar, uncompressed literals, advanced streaming, 5127423 silesia.tar, uncompressed literals optimal, advanced streaming, 4310141 -silesia.tar, huffman literals, advanced streaming, 5349624 +silesia.tar, huffman literals, advanced streaming, 5341688 silesia.tar, multithreaded with advanced params, advanced streaming, 5122567 github, level -5, advanced streaming, 204411 github, level -5 with dict, advanced streaming, 46718 @@ -948,8 +948,8 @@ github, level 1, advanced github, level 1 with dict, advanced streaming, 41682 github, level 1 with dict dms, advanced streaming, 41682 github, level 1 with dict dds, advanced streaming, 41682 -github, level 1 with dict copy, advanced streaming, 41674 -github, level 1 with dict load, advanced streaming, 43755 +github, level 1 with dict copy, advanced streaming, 41698 +github, level 1 with dict load, advanced streaming, 43814 github, level 3, advanced streaming, 136332 github, level 3 with dict, advanced streaming, 41148 github, level 3 with dict dms, advanced streaming, 41148 @@ -1057,24 +1057,24 @@ github, uncompressed literals, advanced github, uncompressed literals optimal, advanced streaming, 157227 github, huffman literals, advanced streaming, 142365 github, multithreaded with advanced params, advanced streaming, 165911 -github.tar, level -5, advanced streaming, 51420 -github.tar, level -5 with dict, advanced streaming, 45495 -github.tar, level -3, advanced streaming, 45077 -github.tar, level -3 with dict, advanced streaming, 41627 -github.tar, level -1, advanced streaming, 42536 -github.tar, level -1 with dict, advanced streaming, 41198 +github.tar, level -5, advanced streaming, 52110 +github.tar, level -5 with dict, advanced streaming, 51070 +github.tar, level -3, advanced streaming, 45678 +github.tar, level -3 with dict, advanced streaming, 44656 +github.tar, level -1, advanced streaming, 42560 +github.tar, level -1 with dict, advanced streaming, 41151 github.tar, level 0, advanced streaming, 38831 github.tar, level 0 with dict, advanced streaming, 37995 github.tar, level 0 with dict dms, advanced streaming, 38003 github.tar, level 0 with dict dds, advanced streaming, 38003 github.tar, level 0 with dict copy, advanced streaming, 37995 github.tar, level 0 with dict load, advanced streaming, 37956 -github.tar, level 1, advanced streaming, 39270 -github.tar, level 1 with dict, advanced streaming, 38316 -github.tar, level 1 with dict dms, advanced streaming, 38326 -github.tar, level 1 with dict dds, advanced streaming, 38326 -github.tar, level 1 with dict copy, advanced streaming, 38316 -github.tar, level 1 with dict load, advanced streaming, 38761 +github.tar, level 1, advanced streaming, 39200 +github.tar, level 1 with dict, advanced streaming, 38089 +github.tar, level 1 with dict dms, advanced streaming, 38294 +github.tar, level 1 with dict dds, advanced streaming, 38294 +github.tar, level 1 with dict copy, advanced streaming, 38089 +github.tar, level 1 with dict load, advanced streaming, 38364 github.tar, level 3, advanced streaming, 38831 github.tar, level 3 with dict, advanced streaming, 37995 github.tar, level 3 with dict dms, advanced streaming, 38003 @@ -1180,13 +1180,13 @@ github.tar, small chain log, advanced github.tar, explicit params, advanced streaming, 41385 github.tar, uncompressed literals, advanced streaming, 41525 github.tar, uncompressed literals optimal, advanced streaming, 35397 -github.tar, huffman literals, advanced streaming, 38874 +github.tar, huffman literals, advanced streaming, 38853 github.tar, multithreaded with advanced params, advanced streaming, 41525 -silesia, level -5, old streaming, 6963781 -silesia, level -3, old streaming, 6610376 -silesia, level -1, old streaming, 6179294 +silesia, level -5, old streaming, 6852424 +silesia, level -3, old streaming, 6503413 +silesia, level -1, old streaming, 6172179 silesia, level 0, old streaming, 4842075 -silesia, level 1, old streaming, 5310178 +silesia, level 1, old streaming, 5306426 silesia, level 3, old streaming, 4842075 silesia, level 4, old streaming, 4779186 silesia, level 5, old streaming, 4666323 @@ -1199,12 +1199,12 @@ silesia, level 19, old stre silesia, no source size, old streaming, 4842039 silesia, uncompressed literals, old streaming, 4842075 silesia, uncompressed literals optimal, old streaming, 4296686 -silesia, huffman literals, old streaming, 6179294 -silesia.tar, level -5, old streaming, 7043687 -silesia.tar, level -3, old streaming, 6671317 -silesia.tar, level -1, old streaming, 6187457 +silesia, huffman literals, old streaming, 6172179 +silesia.tar, level -5, old streaming, 6853609 +silesia.tar, level -3, old streaming, 6505969 +silesia.tar, level -1, old streaming, 6179028 silesia.tar, level 0, old streaming, 4859271 -silesia.tar, level 1, old streaming, 5333896 +silesia.tar, level 1, old streaming, 5327377 silesia.tar, level 3, old streaming, 4859271 silesia.tar, level 4, old streaming, 4797470 silesia.tar, level 5, old streaming, 4677748 @@ -1217,7 +1217,7 @@ silesia.tar, level 19, old stre silesia.tar, no source size, old streaming, 4859267 silesia.tar, uncompressed literals, old streaming, 4859271 silesia.tar, uncompressed literals optimal, old streaming, 4267266 -silesia.tar, huffman literals, old streaming, 6187457 +silesia.tar, huffman literals, old streaming, 6179028 github, level -5, old streaming, 204411 github, level -5 with dict, old streaming, 46718 github, level -3, old streaming, 193253 @@ -1251,16 +1251,16 @@ github, no source size with dict, old stre github, uncompressed literals, old streaming, 136332 github, uncompressed literals optimal, old streaming, 134064 github, huffman literals, old streaming, 175468 -github.tar, level -5, old streaming, 51420 -github.tar, level -5 with dict, old streaming, 45495 -github.tar, level -3, old streaming, 45077 -github.tar, level -3 with dict, old streaming, 41627 -github.tar, level -1, old streaming, 42536 -github.tar, level -1 with dict, old streaming, 41198 +github.tar, level -5, old streaming, 52110 +github.tar, level -5 with dict, old streaming, 51070 +github.tar, level -3, old streaming, 45678 +github.tar, level -3 with dict, old streaming, 44656 +github.tar, level -1, old streaming, 42560 +github.tar, level -1 with dict, old streaming, 41151 github.tar, level 0, old streaming, 38831 github.tar, level 0 with dict, old streaming, 37995 -github.tar, level 1, old streaming, 39270 -github.tar, level 1 with dict, old streaming, 38316 +github.tar, level 1, old streaming, 39200 +github.tar, level 1 with dict, old streaming, 38089 github.tar, level 3, old streaming, 38831 github.tar, level 3 with dict, old streaming, 37995 github.tar, level 4, old streaming, 38893 @@ -1283,12 +1283,12 @@ github.tar, no source size, old stre github.tar, no source size with dict, old streaming, 38000 github.tar, uncompressed literals, old streaming, 38831 github.tar, uncompressed literals optimal, old streaming, 32134 -github.tar, huffman literals, old streaming, 42536 -silesia, level -5, old streaming advanced, 6963781 -silesia, level -3, old streaming advanced, 6610376 -silesia, level -1, old streaming advanced, 6179294 +github.tar, huffman literals, old streaming, 42560 +silesia, level -5, old streaming advanced, 6852424 +silesia, level -3, old streaming advanced, 6503413 +silesia, level -1, old streaming advanced, 6172179 silesia, level 0, old streaming advanced, 4842075 -silesia, level 1, old streaming advanced, 5310178 +silesia, level 1, old streaming advanced, 5306426 silesia, level 3, old streaming advanced, 4842075 silesia, level 4, old streaming advanced, 4779186 silesia, level 5, old streaming advanced, 4666323 @@ -1308,13 +1308,13 @@ silesia, small chain log, old stre silesia, explicit params, old streaming advanced, 4795452 silesia, uncompressed literals, old streaming advanced, 4842075 silesia, uncompressed literals optimal, old streaming advanced, 4296686 -silesia, huffman literals, old streaming advanced, 6179294 +silesia, huffman literals, old streaming advanced, 6172179 silesia, multithreaded with advanced params, old streaming advanced, 4842075 -silesia.tar, level -5, old streaming advanced, 7043687 -silesia.tar, level -3, old streaming advanced, 6671317 -silesia.tar, level -1, old streaming advanced, 6187457 +silesia.tar, level -5, old streaming advanced, 6853609 +silesia.tar, level -3, old streaming advanced, 6505969 +silesia.tar, level -1, old streaming advanced, 6179028 silesia.tar, level 0, old streaming advanced, 4859271 -silesia.tar, level 1, old streaming advanced, 5333896 +silesia.tar, level 1, old streaming advanced, 5327377 silesia.tar, level 3, old streaming advanced, 4859271 silesia.tar, level 4, old streaming advanced, 4797470 silesia.tar, level 5, old streaming advanced, 4677748 @@ -1334,7 +1334,7 @@ silesia.tar, small chain log, old stre silesia.tar, explicit params, old streaming advanced, 4806873 silesia.tar, uncompressed literals, old streaming advanced, 4859271 silesia.tar, uncompressed literals optimal, old streaming advanced, 4267266 -silesia.tar, huffman literals, old streaming advanced, 6187457 +silesia.tar, huffman literals, old streaming advanced, 6179028 silesia.tar, multithreaded with advanced params, old streaming advanced, 4859271 github, level -5, old streaming advanced, 213265 github, level -5 with dict, old streaming advanced, 49562 @@ -1377,16 +1377,16 @@ github, uncompressed literals, old stre github, uncompressed literals optimal, old streaming advanced, 134064 github, huffman literals, old streaming advanced, 181107 github, multithreaded with advanced params, old streaming advanced, 141104 -github.tar, level -5, old streaming advanced, 51420 -github.tar, level -5 with dict, old streaming advanced, 46091 -github.tar, level -3, old streaming advanced, 45077 -github.tar, level -3 with dict, old streaming advanced, 42222 -github.tar, level -1, old streaming advanced, 42536 -github.tar, level -1 with dict, old streaming advanced, 41494 +github.tar, level -5, old streaming advanced, 52110 +github.tar, level -5 with dict, old streaming advanced, 50985 +github.tar, level -3, old streaming advanced, 45678 +github.tar, level -3 with dict, old streaming advanced, 44729 +github.tar, level -1, old streaming advanced, 42560 +github.tar, level -1 with dict, old streaming advanced, 41589 github.tar, level 0, old streaming advanced, 38831 github.tar, level 0 with dict, old streaming advanced, 38013 -github.tar, level 1, old streaming advanced, 39270 -github.tar, level 1 with dict, old streaming advanced, 38934 +github.tar, level 1, old streaming advanced, 39200 +github.tar, level 1 with dict, old streaming advanced, 38359 github.tar, level 3, old streaming advanced, 38831 github.tar, level 3 with dict, old streaming advanced, 38013 github.tar, level 4, old streaming advanced, 38893 @@ -1416,7 +1416,7 @@ github.tar, small chain log, old stre github.tar, explicit params, old streaming advanced, 41385 github.tar, uncompressed literals, old streaming advanced, 38831 github.tar, uncompressed literals optimal, old streaming advanced, 32134 -github.tar, huffman literals, old streaming advanced, 42536 +github.tar, huffman literals, old streaming advanced, 42560 github.tar, multithreaded with advanced params, old streaming advanced, 38831 github, level -5 with dict, old streaming cdict, 46718 github, level -3 with dict, old streaming cdict, 45395 @@ -1433,11 +1433,11 @@ github, level 13 with dict, old stre github, level 16 with dict, old streaming cdict, 37577 github, level 19 with dict, old streaming cdict, 37576 github, no source size with dict, old streaming cdict, 40654 -github.tar, level -5 with dict, old streaming cdict, 46276 -github.tar, level -3 with dict, old streaming cdict, 42354 -github.tar, level -1 with dict, old streaming cdict, 41662 +github.tar, level -5 with dict, old streaming cdict, 51189 +github.tar, level -3 with dict, old streaming cdict, 44821 +github.tar, level -1 with dict, old streaming cdict, 41775 github.tar, level 0 with dict, old streaming cdict, 37956 -github.tar, level 1 with dict, old streaming cdict, 38761 +github.tar, level 1 with dict, old streaming cdict, 38364 github.tar, level 3 with dict, old streaming cdict, 37956 github.tar, level 4 with dict, old streaming cdict, 37927 github.tar, level 5 with dict, old streaming cdict, 38999 @@ -1463,11 +1463,11 @@ github, level 13 with dict, old stre github, level 16 with dict, old streaming advanced cdict, 40789 github, level 19 with dict, old streaming advanced cdict, 37576 github, no source size with dict, old streaming advanced cdict, 40608 -github.tar, level -5 with dict, old streaming advanced cdict, 44307 -github.tar, level -3 with dict, old streaming advanced cdict, 41359 -github.tar, level -1 with dict, old streaming advanced cdict, 41322 +github.tar, level -5 with dict, old streaming advanced cdict, 50854 +github.tar, level -3 with dict, old streaming advanced cdict, 44571 +github.tar, level -1 with dict, old streaming advanced cdict, 41477 github.tar, level 0 with dict, old streaming advanced cdict, 38013 -github.tar, level 1 with dict, old streaming advanced cdict, 39002 +github.tar, level 1 with dict, old streaming advanced cdict, 38168 github.tar, level 3 with dict, old streaming advanced cdict, 38013 github.tar, level 4 with dict, old streaming advanced cdict, 38063 github.tar, level 5 with dict, old streaming advanced cdict, 38997