Move hashEveryLog to cctxParams and update cli

This commit is contained in:
Stella Lau 2017-09-01 14:52:51 -07:00
parent 767a0b3be1
commit a1f04d518d
10 changed files with 105 additions and 23 deletions

View File

@ -256,7 +256,6 @@ typedef struct {
typedef struct {
ldmEntry_t* hashTable;
BYTE* bucketOffsets; /* Next position in bucket to insert entry */
U32 hashEveryLog; /* Log number of entries to skip */
U64 hashPower; /* Used to compute the rolling hash.
* Depends on ldmParams.minMatchLength */
} ldmState_t;
@ -266,6 +265,7 @@ typedef struct {
U32 hashLog; /* Log size of hashTable */
U32 bucketLog; /* Log number of buckets, at most 4 */
U32 minMatchLength; /* Minimum match length */
U32 hashEveryLog; /* Log number of entries to skip */
} ldmParams_t;
typedef struct {

View File

@ -42,6 +42,7 @@ typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZS
#define LDM_WINDOW_LOG 27
#define LDM_HASH_LOG 20
#define LDM_HASH_CHAR_OFFSET 10
#define LDM_HASHEVERYLOG_NOTSET 9999
/*-*************************************
@ -320,6 +321,7 @@ static size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm)
params->hashLog = LDM_HASH_LOG;
params->bucketLog = LDM_BUCKET_SIZE_LOG;
params->minMatchLength = LDM_MIN_MATCH_LENGTH;
params->hashEveryLog = LDM_HASHEVERYLOG_NOTSET;
return 0;
}
@ -385,6 +387,10 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v
if (cctx->cdict) return ERROR(stage_wrong);
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
case ZSTD_p_ldmHashEveryLog:
if (cctx->cdict) return ERROR(stage_wrong);
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
default: return ERROR(parameter_unsupported);
}
}
@ -503,6 +509,13 @@ size_t ZSTD_CCtxParam_setParameter(
params->ldmParams.minMatchLength = value;
return 0;
case ZSTD_p_ldmHashEveryLog :
if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) {
return ERROR(parameter_outOfBound);
}
params->ldmParams.hashEveryLog = value;
return 0;
default: return ERROR(parameter_unsupported);
}
}
@ -538,7 +551,7 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
cctx, ZSTD_p_overlapSizeLog, params->overlapSizeLog) );
}
/* Copy long distance matching parameter */
/* Copy long distance matching parameters */
cctx->requestedParams.ldmParams = params->ldmParams;
/* customMem is used only for create/free params and can be ignored */
@ -742,7 +755,6 @@ size_t ZSTD_estimateCCtxSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* pa
+ (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
size_t const optSpace = ((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btultra)) ? optBudget : 0;
/* Ldm parameters can not currently be changed */
size_t const ldmSpace = params->ldmParams.enableLdm ?
ZSTD_ldm_getTableSize(params->ldmParams.hashLog,
params->ldmParams.bucketLog) : 0;
@ -813,7 +825,8 @@ static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1,
(ldmParams1.enableLdm == ldmParams2.enableLdm &&
ldmParams1.hashLog == ldmParams2.hashLog &&
ldmParams1.bucketLog == ldmParams2.bucketLog &&
ldmParams1.minMatchLength == ldmParams2.minMatchLength);
ldmParams1.minMatchLength == ldmParams2.minMatchLength &&
ldmParams1.hashEveryLog == ldmParams2.hashEveryLog);
}
/** Equivalence for resetCCtx purposes */
@ -866,6 +879,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
if (crp == ZSTDcrp_continue) {
if (ZSTD_equivalentParams(params, zc->appliedParams)) {
DEBUGLOG(5, "ZSTD_equivalentParams()==1");
assert(!(params.ldmParams.enableLdm &&
params.ldmParams.hashEveryLog == LDM_HASHEVERYLOG_NOTSET));
zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
zc->entropy->offcode_repeatMode = FSE_repeat_none;
zc->entropy->matchlength_repeatMode = FSE_repeat_none;
@ -874,9 +889,11 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
} }
if (params.ldmParams.enableLdm) {
zc->ldmState.hashEveryLog =
params.cParams.windowLog < params.ldmParams.hashLog ?
0 : params.cParams.windowLog - params.ldmParams.hashLog;
if (params.ldmParams.hashEveryLog == LDM_HASHEVERYLOG_NOTSET) {
params.ldmParams.hashEveryLog =
params.cParams.windowLog < params.ldmParams.hashLog ?
0 : params.cParams.windowLog - params.ldmParams.hashLog;
}
zc->ldmState.hashPower =
ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
}
@ -3159,19 +3176,19 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
*
* Gets the small hash, checksum, and tag from the rollingHash.
*
* If the tag matches (1 << ldmState->hashEveryLog)-1, then
* If the tag matches (1 << ldmParams.hashEveryLog)-1, then
* creates an ldmEntry from the offset, and inserts it into the hash table.
*
* hBits is the length of the small hash, which is the most significant hBits
* of rollingHash. The checksum is the next 32 most significant bits, followed
* by ldmState->hashEveryLog bits that make up the tag. */
* by ldmParams.hashEveryLog bits that make up the tag. */
static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
U64 rollingHash, U32 hBits,
U32 const offset,
ldmParams_t const ldmParams)
{
U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmState->hashEveryLog);
U32 const tagMask = (1 << ldmState->hashEveryLog) - 1;
U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog);
U32 const tagMask = (1 << ldmParams.hashEveryLog) - 1;
if (tag == tagMask) {
U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
@ -3349,7 +3366,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
const U64 hashPower = ldmState->hashPower;
const U32 hBits = ldmParams.hashLog - ldmParams.bucketLog;
const U32 ldmBucketSize = (1 << ldmParams.bucketLog);
const U32 ldmTagMask = (1 << ldmState->hashEveryLog) - 1;
const U32 ldmTagMask = (1 << ldmParams.hashEveryLog) - 1;
seqStore_t* const seqStorePtr = &(cctx->seqStore);
const BYTE* const base = cctx->base;
const BYTE* const istart = (const BYTE*)src;
@ -3388,7 +3405,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
lastHashed = ip;
/* Do not insert and do not look for a match */
if (ZSTD_ldm_getTag(rollingHash, hBits, ldmState->hashEveryLog) !=
if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
ldmTagMask) {
ip++;
continue;
@ -3546,12 +3563,12 @@ static size_t ZSTD_compressBlock_ldm_extDict_generic(
ZSTD_CCtx* ctx,
const void* src, size_t srcSize)
{
ldmState_t* ldmState = &(ctx->ldmState);
ldmState_t* const ldmState = &(ctx->ldmState);
const ldmParams_t ldmParams = ctx->appliedParams.ldmParams;
const U64 hashPower = ldmState->hashPower;
const U32 hBits = ldmParams.hashLog - ldmParams.bucketLog;
const U32 ldmBucketSize = (1 << ldmParams.bucketLog);
const U32 ldmTagMask = (1 << ctx->ldmState.hashEveryLog) - 1;
const U32 ldmTagMask = (1 << ldmParams.hashEveryLog) - 1;
seqStore_t* const seqStorePtr = &(ctx->seqStore);
const BYTE* const base = ctx->base;
const BYTE* const dictBase = ctx->dictBase;
@ -3594,7 +3611,7 @@ static size_t ZSTD_compressBlock_ldm_extDict_generic(
}
lastHashed = ip;
if (ZSTD_ldm_getTag(rollingHash, hBits, ldmState->hashEveryLog) !=
if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
ldmTagMask) {
/* Don't insert and don't look for a match */
ip++;

View File

@ -984,11 +984,16 @@ typedef enum {
* ZSTD_p_compressionLevel and before
* ZSTD_p_windowLog and other LDM parameters. */
ZSTD_p_ldmHashLog, /* Size of the table for long distance matching.
* Must be clamped between ZSTD_HASHLOG_MIN and
* ZSTD_HASHLOG_MAX */
* Must be clamped between ZSTD_HASHLOG_MIN and
* ZSTD_HASHLOG_MAX */
ZSTD_p_ldmMinMatch, /* Minimum size of searched matches for long distance matcher.
* Must be clamped between ZSTD_LDM_SEARCHLENGTH_MIN
* and ZSTD_LDM_SEARCHLENGTH_MAX. */
* Must be clamped between ZSTD_LDM_SEARCHLENGTH_MIN
* and ZSTD_LDM_SEARCHLENGTH_MAX. */
ZSTD_p_ldmHashEveryLog, /* Frequency of inserting/looking up entries in the
* LDM hash table. The default is
* (windowLog - ldmHashLog) to optimize hash table
* usage. Must be clamped between 0 and
* ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN. */
} ZSTD_cParameter;

View File

@ -134,6 +134,23 @@ void BMK_setLdmFlag(unsigned ldmFlag) {
g_ldmFlag = ldmFlag;
}
static U32 g_ldmMinMatch = 0;
void BMK_setLdmMinMatch(unsigned ldmMinMatch) {
g_ldmMinMatch = ldmMinMatch;
}
static U32 g_ldmHashLog = 0;
void BMK_setLdmHashLog(unsigned ldmHashLog) {
g_ldmHashLog = ldmHashLog;
}
#define BMK_LDM_HASHEVERYLOG_NOTSET 9999
static U32 g_ldmHashEveryLog = BMK_LDM_HASHEVERYLOG_NOTSET;
void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog) {
g_ldmHashEveryLog = ldmHashEveryLog;
}
/* ********************************************************
* Bench functions
**********************************************************/
@ -270,6 +287,11 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbThreads, g_nbThreads);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_longDistanceMatching, g_ldmFlag);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashLog, g_ldmHashLog);
if (g_ldmHashEveryLog != BMK_LDM_HASHEVERYLOG_NOTSET) {
ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog);
}
ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog);

View File

@ -26,5 +26,8 @@ void BMK_setNotificationLevel(unsigned level);
void BMK_setAdditionalParam(int additionalParam);
void BMK_setDecodeOnlyMode(unsigned decodeFlag);
void BMK_setLdmFlag(unsigned ldmFlag);
void BMK_setLdmMinMatch(unsigned ldmMinMatch);
void BMK_setLdmHashLog(unsigned ldmHashLog);
void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog);
#endif /* BENCH_H_121279284357 */

View File

@ -217,6 +217,20 @@ static U32 g_ldmFlag = 0;
void FIO_setLdmFlag(unsigned ldmFlag) {
g_ldmFlag = (ldmFlag>0);
}
static U32 g_ldmHashLog = 0;
void FIO_setLdmHashLog(unsigned ldmHashLog) {
g_ldmHashLog = ldmHashLog;
}
static U32 g_ldmMinMatch = 0;
void FIO_setLdmMinMatch(unsigned ldmMinMatch) {
g_ldmMinMatch = ldmMinMatch;
}
#define FIO_LDM_HASHEVERYLOG_NOTSET 9999
static U32 g_ldmHashEveryLog = FIO_LDM_HASHEVERYLOG_NOTSET;
void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog) {
g_ldmHashEveryLog = ldmHashEveryLog;
}
/*-*************************************
@ -406,6 +420,11 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, cLevel) );
/* long distance matching */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_longDistanceMatching, g_ldmFlag) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashLog, g_ldmHashLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch) );
if (g_ldmHashEveryLog != FIO_LDM_HASHEVERYLOG_NOTSET) {
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog) );
}
/* compression parameters */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_windowLog, comprParams->windowLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_chainLog, comprParams->chainLog) );

View File

@ -57,6 +57,9 @@ void FIO_setNbThreads(unsigned nbThreads);
void FIO_setBlockSize(unsigned blockSize);
void FIO_setOverlapLog(unsigned overlapLog);
void FIO_setLdmFlag(unsigned ldmFlag);
void FIO_setLdmHashLog(unsigned ldmHashLog);
void FIO_setLdmMinMatch(unsigned ldmMinMatch);
void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog);
/*-*************************************

View File

@ -72,7 +72,11 @@ static const unsigned g_defaultMaxDictSize = 110 KB;
static const int g_defaultDictCLevel = 3;
static const unsigned g_defaultSelectivityLevel = 9;
#define OVERLAP_LOG_DEFAULT 9999
#define LDM_HASHEVERYLOG_DEFAULT 9999
static U32 g_overlapLog = OVERLAP_LOG_DEFAULT;
static U32 g_ldmHashLog = 0;
static U32 g_ldmMinMatch = 0;
static U32 g_ldmHashEveryLog = LDM_HASHEVERYLOG_DEFAULT;
/*-************************************
@ -305,6 +309,9 @@ static unsigned parseCompressionParameters(const char* stringPtr, ZSTD_compressi
if (longCommandWArg(&stringPtr, "targetLength=") || longCommandWArg(&stringPtr, "tlen=")) { params->targetLength = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "strategy=") || longCommandWArg(&stringPtr, "strat=")) { params->strategy = (ZSTD_strategy)(readU32FromChar(&stringPtr)); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "overlapLog=") || longCommandWArg(&stringPtr, "ovlog=")) { g_overlapLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "ldmHashLog=") || longCommandWArg(&stringPtr, "ldmHlog=")) { g_ldmHashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "ldmSearchLength=") || longCommandWArg(&stringPtr, "ldmSlen=")) { g_ldmMinMatch = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "ldmHashEveryLog=")) { g_ldmHashEveryLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
return 0;
}
@ -724,6 +731,9 @@ int main(int argCount, const char* argv[])
BMK_setNbThreads(nbThreads);
BMK_setNbSeconds(bench_nbSeconds);
BMK_setLdmFlag(ldmFlag);
BMK_setLdmMinMatch(g_ldmMinMatch);
BMK_setLdmHashLog(g_ldmHashLog);
BMK_setLdmHashEveryLog(g_ldmHashEveryLog);
BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, setRealTimePrio);
#endif
(void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio;
@ -792,6 +802,12 @@ int main(int argCount, const char* argv[])
FIO_setNbThreads(nbThreads);
FIO_setBlockSize((U32)blockSize);
FIO_setLdmFlag(ldmFlag);
FIO_setLdmHashLog(g_ldmHashLog);
FIO_setLdmMinMatch(g_ldmMinMatch);
if (g_ldmHashEveryLog != LDM_HASHEVERYLOG_DEFAULT) {
FIO_setLdmHashEveryLog(g_ldmHashEveryLog);
}
if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(g_overlapLog);
if ((filenameIdx==1) && outFileName)
operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, &compressionParams);

View File

@ -1342,7 +1342,6 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
dictSize = FUZ_rLogLength(&lseed, dictLog); /* needed also for decompression */
dict = srcBuffer + (FUZ_rand(&lseed) % (srcBufferSize - dictSize));
CHECK_Z ( ZSTD_CCtx_setParameter(refCtx, ZSTD_p_longDistanceMatching, FUZ_rand(&lseed)&255) );
if (FUZ_rand(&lseed) & 0xF) {
CHECK_Z ( ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel) );
} else {

View File

@ -1381,8 +1381,6 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_targetLength, cParams.targetLength, useOpaqueAPI) );
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_longDistanceMatching, FUZ_rand(&lseed) & 63, useOpaqueAPI) );
if (FUZ_rand(&lseed) & 7) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_ldmMinMatch, FUZ_rand(&lseed) % 128 + 4, useOpaqueAPI ) );
if (FUZ_rand(&lseed) & 7) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_ldmHashLog, FUZ_rand(&lseed) % 18 + 10, useOpaqueAPI ) );
/* unconditionally set, to be sync with decoder */
/* mess with frame parameters */