Preliminary Support in ZSTD_compressBlock_fast_generic() for Ext Dict Ctx

This commit is contained in:
W. Felix Handte 2018-04-28 00:42:37 -04:00
parent d18a405779
commit 8d24ff0353
2 changed files with 75 additions and 19 deletions

View File

@ -250,6 +250,8 @@ struct ZSTD_CCtx_s {
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
typedef enum { ZSTD_noDictMatchState, ZSTD_hasDictMatchState } ZSTD_hasDictMatchState_e;
typedef size_t (*ZSTD_blockCompressor) (
ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);

View File

@ -45,7 +45,8 @@ FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_fast_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
U32 const hlog, U32 const stepSize, U32 const mls)
U32 const hlog, U32 const stepSize, U32 const mls,
ZSTD_hasDictMatchState_e const hasDict)
{
U32* const hashTable = ms->hashTable;
const BYTE* const base = ms->window.base;
@ -59,6 +60,19 @@ size_t ZSTD_compressBlock_fast_generic(
U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
const ZSTD_matchState_t* const dms = ms->dictMatchState;
const U32* const dictHashTable = hasDict == ZSTD_hasDictMatchState ?
dms->hashTable : NULL;
const U32 dictLowestIndex = hasDict == ZSTD_hasDictMatchState ?
dms->window.dictLimit : 0;
const BYTE* const dictBase = hasDict == ZSTD_hasDictMatchState ?
dms->window.base : NULL;
const BYTE* const dictLowest = hasDict == ZSTD_hasDictMatchState ?
dictBase + dictLowestIndex : NULL;
const BYTE* const dictEnd = hasDict == ZSTD_hasDictMatchState ?
dms->window.nextSrc : NULL;
const U32 dictIndexDelta = lowestIndex - (dictEnd - dictBase);
/* init */
ip += (ip==lowest);
{ U32 const maxRep = (U32)(ip-lowest);
@ -75,19 +89,41 @@ size_t ZSTD_compressBlock_fast_generic(
const BYTE* match = base + matchIndex;
hashTable[h] = current; /* update hash table */
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
if ((hasDict != ZSTD_hasDictMatchState || current >= lowestIndex + offset_1)
&& (offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
if ( (matchIndex <= lowestIndex)
|| (MEM_read32(match) != MEM_read32(ip)) ) {
assert(stepSize >= 1);
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
}
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
{ U32 const offset = (U32)(ip-match);
if (hasDict == ZSTD_hasDictMatchState) {
U32 const dictMatchIndex = dictHashTable[h];
const BYTE* dictMatch = dictBase + dictMatchIndex;
if (dictMatchIndex <= dictLowestIndex ||
MEM_read32(dictMatch) != MEM_read32(ip)) {
assert(stepSize >= 1);
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
}
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, istart) + 4;
{ U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
DEBUGLOG(6, "ip %p (%u) dictMatch %p (%u) idxDelta %u", ip, current, dictMatch, dictMatchIndex, dictIndexDelta);
while (((ip>anchor) & (dictMatch>dictLowest)) && (ip[-1] == dictMatch[-1])) { ip--; dictMatch--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
} else {
assert(stepSize >= 1);
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
}
} else {
U32 const offset = (U32)(ip-match);
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
@ -104,6 +140,7 @@ size_t ZSTD_compressBlock_fast_generic(
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
/* check immediate repcode */
while ( (ip <= ilimit)
&& (hasDict != ZSTD_hasDictMatchState || ip - offset_2 >= istart)
&& ( (offset_2>0)
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
/* store sequence */
@ -132,17 +169,34 @@ size_t ZSTD_compressBlock_fast(
U32 const hlog = cParams->hashLog;
U32 const mls = cParams->searchLength;
U32 const stepSize = cParams->targetLength;
switch(mls)
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4);
case 5 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5);
case 6 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6);
case 7 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7);
if (ms->dictMatchState != NULL) {
ZSTD_hasDictMatchState_e const hdms = ZSTD_hasDictMatchState;
switch(mls)
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, hdms);
case 5 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, hdms);
case 6 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, hdms);
case 7 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, hdms);
}
} else {
ZSTD_hasDictMatchState_e const hdms = ZSTD_noDictMatchState;
switch(mls)
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, hdms);
case 5 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, hdms);
case 6 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, hdms);
case 7 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, hdms);
}
}
}