mirror of
https://github.com/facebook/zstd.git
synced 2024-12-01 01:36:53 +08:00
Add --size-hint=# option
This commit is contained in:
parent
c9072ee674
commit
dffbac5f89
@ -324,6 +324,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
|
||||
* ZSTD_c_forceAttachDict
|
||||
* ZSTD_c_literalCompressionMode
|
||||
* ZSTD_c_targetCBlockSize
|
||||
* ZSTD_c_srcSizeHint
|
||||
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
|
||||
* note : never ever use experimentalParam? names directly;
|
||||
* also, the enums values themselves are unstable and can still change.
|
||||
@ -334,6 +335,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
|
||||
ZSTD_c_experimentalParam4=1001,
|
||||
ZSTD_c_experimentalParam5=1002,
|
||||
ZSTD_c_experimentalParam6=1003,
|
||||
ZSTD_c_experimentalParam7=1004,
|
||||
} ZSTD_cParameter;
|
||||
</b></pre><BR>
|
||||
<pre><b>typedef struct {
|
||||
|
@ -392,6 +392,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
||||
bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
|
||||
return bounds;
|
||||
|
||||
case ZSTD_c_srcSizeHint:
|
||||
bounds.lowerBound = 0;
|
||||
bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
|
||||
return bounds;
|
||||
|
||||
default:
|
||||
{ ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 };
|
||||
return boundError;
|
||||
@ -448,6 +453,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
|
||||
case ZSTD_c_forceAttachDict:
|
||||
case ZSTD_c_literalCompressionMode:
|
||||
case ZSTD_c_targetCBlockSize:
|
||||
case ZSTD_c_srcSizeHint:
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
@ -494,6 +500,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
|
||||
case ZSTD_c_ldmMinMatch:
|
||||
case ZSTD_c_ldmBucketSizeLog:
|
||||
case ZSTD_c_targetCBlockSize:
|
||||
case ZSTD_c_srcSizeHint:
|
||||
break;
|
||||
|
||||
default: RETURN_ERROR(parameter_unsupported);
|
||||
@ -674,6 +681,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
||||
CCtxParams->targetCBlockSize = value;
|
||||
return CCtxParams->targetCBlockSize;
|
||||
|
||||
case ZSTD_c_srcSizeHint :
|
||||
if (value!=0) /* 0 ==> default */
|
||||
BOUNDCHECK(ZSTD_c_srcSizeHint, value);
|
||||
CCtxParams->srcSizeHint = value;
|
||||
return CCtxParams->srcSizeHint;
|
||||
|
||||
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
||||
}
|
||||
}
|
||||
@ -779,6 +792,8 @@ size_t ZSTD_CCtxParams_getParameter(
|
||||
case ZSTD_c_targetCBlockSize :
|
||||
*value = (int)CCtxParams->targetCBlockSize;
|
||||
break;
|
||||
case ZSTD_c_srcSizeHint :
|
||||
*value = (int)CCtxParams->srcSizeHint;
|
||||
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
||||
}
|
||||
return 0;
|
||||
@ -1029,7 +1044,11 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
|
||||
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
||||
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
|
||||
{
|
||||
ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
|
||||
ZSTD_compressionParameters cParams;
|
||||
if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
|
||||
srcSizeHint = CCtxParams->srcSizeHint;
|
||||
}
|
||||
cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
|
||||
if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
|
||||
if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
|
||||
if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
|
||||
|
@ -203,6 +203,9 @@ struct ZSTD_CCtx_params_s {
|
||||
size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
|
||||
* No target when targetCBlockSize == 0.
|
||||
* There is no guarantee on compressed block size */
|
||||
size_t srcSizeHint; /* User's best guess of source size.
|
||||
* Hint is not valid when srcSizeHint == 0.
|
||||
* There is no guarantee that hint is close to actual source size */
|
||||
|
||||
ZSTD_dictAttachPref_e attachDictPref;
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
|
@ -386,6 +386,7 @@ typedef enum {
|
||||
* ZSTD_c_forceAttachDict
|
||||
* ZSTD_c_literalCompressionMode
|
||||
* ZSTD_c_targetCBlockSize
|
||||
* ZSTD_c_srcSizeHint
|
||||
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
|
||||
* note : never ever use experimentalParam? names directly;
|
||||
* also, the enums values themselves are unstable and can still change.
|
||||
@ -396,6 +397,7 @@ typedef enum {
|
||||
ZSTD_c_experimentalParam4=1001,
|
||||
ZSTD_c_experimentalParam5=1002,
|
||||
ZSTD_c_experimentalParam6=1003,
|
||||
ZSTD_c_experimentalParam7=1004,
|
||||
} ZSTD_cParameter;
|
||||
|
||||
typedef struct {
|
||||
@ -1063,6 +1065,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
/* Advanced parameter bounds */
|
||||
#define ZSTD_TARGETCBLOCKSIZE_MIN 64
|
||||
#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
|
||||
#define ZSTD_SRCSIZEHINT_MAX 1e9 /* 1 GB */
|
||||
|
||||
/* internal */
|
||||
#define ZSTD_HASHLOG3_MAX 17
|
||||
@ -1441,6 +1444,11 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
|
||||
* There is no guarantee on compressed block size (default:0) */
|
||||
#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
|
||||
|
||||
/* User's best guess of source size.
|
||||
* Hint is not valid when srcSizeHint == 0.
|
||||
* There is no guarantee that hint is close to actual source size */
|
||||
#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
|
||||
|
||||
/*! ZSTD_CCtx_getParameter() :
|
||||
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
|
||||
* and store it into int* value.
|
||||
|
@ -305,6 +305,7 @@ struct FIO_prefs_s {
|
||||
int ldmBucketSizeLog;
|
||||
int ldmHashRateLog;
|
||||
size_t targetCBlockSize;
|
||||
size_t srcSizeHint;
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
|
||||
/* IO preferences */
|
||||
@ -350,6 +351,7 @@ FIO_prefs_t* FIO_createPreferences(void)
|
||||
ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
|
||||
ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
|
||||
ret->targetCBlockSize = 0;
|
||||
ret->srcSizeHint = 0;
|
||||
ret->literalCompressionMode = ZSTD_lcm_auto;
|
||||
return ret;
|
||||
}
|
||||
@ -422,6 +424,10 @@ void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize)
|
||||
prefs->targetCBlockSize = targetCBlockSize;
|
||||
}
|
||||
|
||||
void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) {
|
||||
prefs->srcSizeHint = srcSizeHint;
|
||||
}
|
||||
|
||||
void FIO_setLiteralCompressionMode(
|
||||
FIO_prefs_t* const prefs,
|
||||
ZSTD_literalCompressionMode_e mode) {
|
||||
@ -667,6 +673,8 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
|
||||
/* max compressed block size */
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
|
||||
/* source size hint */
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) );
|
||||
/* long distance matching */
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
|
||||
|
@ -72,6 +72,7 @@ void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag);
|
||||
void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
|
||||
void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable);
|
||||
void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize);
|
||||
void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint);
|
||||
void FIO_setLiteralCompressionMode(
|
||||
FIO_prefs_t* const prefs,
|
||||
ZSTD_literalCompressionMode_e mode);
|
||||
|
@ -141,6 +141,7 @@ static int usage_advanced(const char* programName)
|
||||
DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
|
||||
DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
|
||||
DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n");
|
||||
DISPLAY( "--size-hint=# optimize compression parameters for streaming input of approximately this size\n");
|
||||
DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n");
|
||||
#ifdef ZSTD_MULTITHREAD
|
||||
DISPLAY( " -T# : spawns # compression threads (default: 1, 0==# cores) \n");
|
||||
@ -589,6 +590,7 @@ int main(int argCount, const char* argv[])
|
||||
unsigned maxDictSize = g_defaultMaxDictSize;
|
||||
unsigned dictID = 0;
|
||||
size_t targetCBlockSize = 0;
|
||||
size_t srcSizeHint = 0;
|
||||
int dictCLevel = g_defaultDictCLevel;
|
||||
unsigned dictSelect = g_defaultSelectivityLevel;
|
||||
#ifdef UTIL_HAS_CREATEFILELIST
|
||||
@ -746,6 +748,7 @@ int main(int argCount, const char* argv[])
|
||||
if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
|
||||
if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--long")) {
|
||||
unsigned ldmWindowLog = 0;
|
||||
ldmFlag = 1;
|
||||
@ -1151,6 +1154,7 @@ int main(int argCount, const char* argv[])
|
||||
FIO_setAdaptMax(prefs, adaptMax);
|
||||
FIO_setRsyncable(prefs, rsyncable);
|
||||
FIO_setTargetCBlockSize(prefs, targetCBlockSize);
|
||||
FIO_setSrcSizeHint(prefs, srcSizeHint);
|
||||
FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
|
||||
if (adaptMin > cLevel) cLevel = adaptMin;
|
||||
if (adaptMax < cLevel) cLevel = adaptMax;
|
||||
@ -1160,7 +1164,7 @@ int main(int argCount, const char* argv[])
|
||||
else
|
||||
operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
|
||||
#else
|
||||
(void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */
|
||||
(void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */
|
||||
DISPLAY("Compression not supported \n");
|
||||
#endif
|
||||
} else { /* decompression or test */
|
||||
|
@ -409,6 +409,34 @@ println "compress multiple files including a missing one (notHere) : "
|
||||
$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
|
||||
|
||||
|
||||
println "\n===> size-hint mode"
|
||||
|
||||
./datagen -g11000 > tmp
|
||||
println "test : basic file compression vs streaming compression vs hinted streaming compression"
|
||||
$ZSTD -14 -f tmp -o tmp.zst 2>&1 | tee file.out
|
||||
cat tmp | $ZSTD -14 -f -o tmp.zst # only run for convenience of comparison
|
||||
cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=11000 2>&1 | tee stream_sized.out
|
||||
|
||||
file_ratio=$(cat file.out | awk '{print $4}' | sed 's/%//g')
|
||||
stream_sized_ratio=$(cat stream_sized.out | awk '{print $4}' | sed 's/%//g')
|
||||
rm file.out stream_sized.out
|
||||
|
||||
ratio_diff=$(echo $stream_sized_ratio - $file_ratio | bc)
|
||||
if [ $(echo "(100 * $ratio_diff) > 1" | bc -l) -eq 1 ]
|
||||
then
|
||||
die "hinted compression greater than 0.01% larger than file compression"
|
||||
fi
|
||||
println "test : hinted streaming compression and decompression"
|
||||
cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=11000
|
||||
$ZSTD -df tmp.zst -o tmp_decompress
|
||||
cmp tmp tmp_decompress || die "difference between original and decompressed file"
|
||||
println "test : incorrect hinted stream sizes"
|
||||
cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=11050 # slightly too high
|
||||
cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=10950 # slightly too low
|
||||
cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=22000 # considerably too high
|
||||
cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=5500 # considerably too low
|
||||
|
||||
|
||||
println "\n===> dictionary tests "
|
||||
|
||||
println "- test with raw dict (content only) "
|
||||
|
Loading…
Reference in New Issue
Block a user