Add long distance matching as a CCtxParam

This commit is contained in:
Stella Lau 2017-08-31 15:40:16 -07:00
parent 6a546efb8c
commit 8081becadc
8 changed files with 64 additions and 42 deletions

View File

@ -281,11 +281,10 @@ typedef struct {
typedef struct {
ldmEntry_t* hashTable;
BYTE* bucketOffsets;
U32 ldmEnable; /* 1 if enable long distance matching */
BYTE* bucketOffsets; /* next position in bucket to insert entry */
U32 hashLog; /* log size of hashTable */
U32 bucketLog; /* log number of buckets, at most 4 */
U32 hashEveryLog;
U32 hashEveryLog; /* log number of entries to skip */
} ldmState_t;
typedef struct {
@ -313,6 +312,8 @@ struct ZSTD_CCtx_params_s {
unsigned jobSize;
unsigned overlapSizeLog;
U32 enableLdm; /* 1 if enable long distance matching */
/* For use with createCCtxParams() and freeCCtxParams() only */
ZSTD_customMem customMem;

View File

@ -53,6 +53,7 @@ size_t ZSTD_compressBound(size_t srcSize) {
return srcSize + (srcSize >> 8) + margin;
}
/*-*************************************
* Sequence storage
***************************************/
@ -362,14 +363,11 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
case ZSTD_p_longDistanceMatching:
/* TODO */
if (cctx->cdict) return ERROR(stage_wrong);
cctx->ldmState.ldmEnable = value>0;
if (value != 0) {
ZSTD_cLevelToCParams(cctx);
cctx->requestedParams.cParams.windowLog = LDM_WINDOW_LOG;
}
return 0;
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
default: return ERROR(parameter_unsupported);
}
@ -471,8 +469,12 @@ size_t ZSTD_CCtxParam_setParameter(
return ZSTDMT_CCtxParam_setMTCtxParameter(params, ZSTDMT_p_overlapSectionLog, value);
case ZSTD_p_longDistanceMatching :
/* TODO */
return ERROR(parameter_unsupported);
params->enableLdm = value>0;
if (value != 0) {
ZSTD_cLevelToCCtxParams(params);
params->cParams.windowLog = LDM_WINDOW_LOG;
}
return 0;
default: return ERROR(parameter_unsupported);
}
@ -509,6 +511,9 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
cctx, ZSTD_p_overlapSizeLog, params->overlapSizeLog) );
}
/* Copy long distance matching parameter */
cctx->requestedParams.enableLdm = params->enableLdm;
/* customMem is used only for create/free params and can be ignored */
return 0;
}
@ -675,6 +680,16 @@ ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, u
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
}
/* Estimate the space needed for long distance matching tables. */
static size_t ZSTD_ldm_getTableSize(U32 ldmHashLog, U32 bucketLog) {
size_t const ldmHSize = ((size_t)1) << ldmHashLog;
size_t const ldmBucketLog =
MIN(bucketLog, LDM_BUCKET_SIZE_LOG_MAX);
size_t const ldmBucketSize =
((size_t)1) << (ldmHashLog - ldmBucketLog);
return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t)));
}
size_t ZSTD_estimateCCtxSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* params)
{
/* Estimate CCtx size is supported for single-threaded compression only. */
@ -699,8 +714,10 @@ size_t ZSTD_estimateCCtxSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* pa
+ (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
size_t const optSpace = ((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btultra)) ? optBudget : 0;
/* TODO: Long distance matching is not suported */
size_t const ldmSpace = 0;
/* Ldm parameters can not currently be changed */
size_t const ldmSpace = params->enableLdm ?
ZSTD_ldm_getTableSize(LDM_HASH_LOG, LDM_BUCKET_SIZE_LOG) : 0;
size_t const neededSpace = entropySpace + tableSpace + tokenSpace +
optSpace + ldmSpace;
@ -762,7 +779,8 @@ static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1,
static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1,
ZSTD_CCtx_params params2)
{
return ZSTD_equivalentCParams(params1.cParams, params2.cParams);
return ZSTD_equivalentCParams(params1.cParams, params2.cParams) &&
params1.enableLdm == params2.enableLdm;
}
/*! ZSTD_continueCCtx() :
@ -803,9 +821,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
if (crp == ZSTDcrp_continue) {
/* TODO: For now, reset if long distance matching is enabled */
if (ZSTD_equivalentParams(params, zc->appliedParams) &&
!zc->ldmState.ldmEnable) {
if (ZSTD_equivalentParams(params, zc->appliedParams)) {
DEBUGLOG(5, "ZSTD_equivalentParams()==1");
zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
zc->entropy->offcode_repeatMode = FSE_repeat_none;
@ -838,13 +854,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
size_t const buffInSize = (zbuff==ZSTDb_buffered) ? ((size_t)1 << params.cParams.windowLog) + blockSize : 0;
void* ptr;
size_t const ldmHSize = ((size_t)1) << zc->ldmState.hashLog;
size_t const ldmBucketSize =
((size_t)1) << (zc->ldmState.hashLog - zc->ldmState.bucketLog);
size_t const ldmPotentialSpace =
ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t)));
size_t const ldmSpace = zc->ldmState.ldmEnable ?
ldmPotentialSpace : 0;
size_t const ldmSpace = params.enableLdm ? ZSTD_ldm_getTableSize(zc->ldmState.hashLog, zc->ldmState.bucketLog) : 0;
/* Check if workSpace is large enough, alloc a new one if needed */
{ size_t const entropySpace = sizeof(ZSTD_entropyCTables_t);
@ -923,8 +933,11 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
}
/* ldm space */
if (zc->ldmState.ldmEnable) {
if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, ldmSpace);
if (params.enableLdm) {
size_t const ldmHSize = ((size_t)1) << zc->ldmState.hashLog;
size_t const ldmBucketSize =
((size_t)1) << (zc->ldmState.hashLog - zc->ldmState.bucketLog);
memset(ptr, 0, ldmSpace);
assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
zc->ldmState.hashTable = (ldmEntry_t*)ptr;
ptr = zc->ldmState.hashTable + ldmHSize;
@ -1047,7 +1060,7 @@ static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reduce
/*! ZSTD_ldm_reduceTable() :
* reduce table indexes by `reducerValue` */
static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
U32 const reducerValue)
U32 const reducerValue)
{
U32 u;
for (u = 0; u < size; u++) {
@ -1069,8 +1082,8 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
{ U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); }
{ if (zc->ldmState.ldmEnable) {
U32 const ldmHSize = 1 << LDM_HASH_LOG;
{ if (zc->appliedParams.enableLdm) {
U32 const ldmHSize = 1 << zc->ldmState.hashLog;
ZSTD_ldm_reduceTable(zc->ldmState.hashTable, ldmHSize, reducerValue);
}
}
@ -1683,6 +1696,7 @@ static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
}
}
/*-*************************************
* Fast Scan
***************************************/
@ -1751,6 +1765,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
@ -1983,7 +1998,6 @@ size_t ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
ip++;
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
U32 offset;
if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) {
mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
@ -3405,7 +3419,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
/* Check immediate repcode */
while ( (ip < ilimit)
&& ( (repToConfirm[1] > 0)
&& ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest))
&& (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) {
size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1],
@ -3413,7 +3427,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
/* Swap repToConfirm[1] <=> repToConfirm[0] */
{
U32 const tmpOff = repToConfirm[1];
repToConfirm[1] = repToConfirm[0];
repToConfirm[1] = repToConfirm[0];
repToConfirm[0] = tmpOff;
}
@ -3571,6 +3585,8 @@ static size_t ZSTD_compressBlock_ldm_extDict_generic(
/* Call the block compressor on the remaining literals */
{
/* ip = current - backwardMatchLength
* The match is at (bestEntry->offset - backwardMatchLength) */
U32 const matchIndex = bestEntry->offset;
U32 const offset = current - matchIndex;
@ -3687,7 +3703,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCa
size_t lastLLSize;
const BYTE* anchor;
const ZSTD_blockCompressor blockCompressor =
zc->ldmState.ldmEnable ?
zc->appliedParams.enableLdm?
(zc->lowLimit < zc->dictLimit ? ZSTD_compressBlock_ldm_extDict :
ZSTD_compressBlock_ldm) :
ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
@ -4870,7 +4886,6 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbThreads, cctx->customMem);
if (cctx->mtctx == NULL) return ERROR(memory_allocation);
}
DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbThreads=%u", params.nbThreads);
CHECK_F( ZSTDMT_initCStream_internal(
cctx->mtctx,

View File

@ -196,6 +196,8 @@ static ZSTD_CCtx_params ZSTDMT_makeJobCCtxParams(ZSTD_CCtx_params const params)
jobParams.cParams = params.cParams;
jobParams.fParams = params.fParams;
jobParams.compressionLevel = params.compressionLevel;
jobParams.enableLdm = params.enableLdm;
return jobParams;
}

View File

@ -978,9 +978,11 @@ typedef enum {
/* advanced parameters - may not remain available after API update */
ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
* even when referencing into Dictionary content (default:0) */
ZSTD_p_longDistanceMatching, /* Enable long distance matching.
* This increases the memory usage as well as the
* window size. */
ZSTD_p_longDistanceMatching, /* Enable long distance matching. This
* increases the memory usage as well as the
* window size. Note: this should be set after
* ZSTD_p_compressionLevel and before
* ZSTD_p_windowLog. */
} ZSTD_cParameter;

View File

@ -269,12 +269,12 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
#ifdef ZSTD_NEWAPI
ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbThreads, g_nbThreads);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_longDistanceMatching, g_ldmFlag);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_minMatch, comprParams->searchLength);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_targetLength, comprParams->targetLength);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_longDistanceMatching, g_ldmFlag);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionStrategy, comprParams->strategy);
ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize);
#else

View File

@ -402,8 +402,11 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_dictIDFlag, g_dictIDFlag) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_checksumFlag, g_checksumFlag) );
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) );
/* compression parameters */
/* compression level */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, cLevel) );
/* long distance matching */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_longDistanceMatching, g_ldmFlag) );
/* compression parameters */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_windowLog, comprParams->windowLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_chainLog, comprParams->chainLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_hashLog, comprParams->hashLog) );
@ -411,8 +414,6 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_minMatch, comprParams->searchLength) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_targetLength, comprParams->targetLength) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionStrategy, (U32)comprParams->strategy) );
/* long distance matching */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_longDistanceMatching, g_ldmFlag) );
/* multi-threading */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_nbThreads, g_nbThreads) );
/* dictionary */

View File

@ -440,6 +440,8 @@ static int basicUnitTests(U32 seed, double compressibility)
free(staticDCtxBuffer);
}
/* ZSTDMT simple MT compression test */
DISPLAYLEVEL(4, "test%3i : create ZSTDMT CCtx : ", testNb++);
{ ZSTDMT_CCtx* mtctx = ZSTDMT_createCCtx(2);
@ -1340,7 +1342,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
dictSize = FUZ_rLogLength(&lseed, dictLog); /* needed also for decompression */
dict = srcBuffer + (FUZ_rand(&lseed) % (srcBufferSize - dictSize));
CHECK_Z ( ZSTD_CCtx_setParameter(refCtx, ZSTD_p_longDistanceMatching, FUZ_rand(&lseed)&255) );
if (FUZ_rand(&lseed) & 0xF) {
CHECK_Z ( ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel) );
} else {
@ -1349,7 +1351,6 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
!(FUZ_rand(&lseed)&3) /* contentChecksumFlag*/,
0 /*NodictID*/ }; /* note : since dictionary is fake, dictIDflag has no impact */
ZSTD_parameters const p = FUZ_makeParams(cPar, fPar);
CHECK_Z ( ZSTD_compressBegin_advanced(refCtx, dict, dictSize, p, 0) );
}
CHECK_Z( ZSTD_copyCCtx(ctx, refCtx, 0) );

View File

@ -1380,7 +1380,7 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_minMatch, cParams.searchLength, useOpaqueAPI) );
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_targetLength, cParams.targetLength, useOpaqueAPI) );
if (FUZ_rand(&lseed) & 1) CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_p_longDistanceMatching, FUZ_rand(&lseed) & 63) );
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_longDistanceMatching, FUZ_rand(&lseed) & 63, useOpaqueAPI) );
/* unconditionally set, to be sync with decoder */
/* mess with frame parameters */