mirror of
https://github.com/facebook/zstd.git
synced 2024-11-23 20:46:47 +08:00
Merge pull request #4180 from facebook/split_param
Block splitter control parameter
This commit is contained in:
commit
15c29168b7
@ -323,7 +323,7 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
|
||||
assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog);
|
||||
assert(cctxParams.ldmParams.hashRateLog < 32);
|
||||
}
|
||||
cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams);
|
||||
cctxParams.postBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.postBlockSplitter, &cParams);
|
||||
cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
|
||||
cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences);
|
||||
cctxParams.maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams.maxBlockSize);
|
||||
@ -391,13 +391,13 @@ ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams,
|
||||
*/
|
||||
cctxParams->compressionLevel = compressionLevel;
|
||||
cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams);
|
||||
cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams);
|
||||
cctxParams->postBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->postBlockSplitter, ¶ms->cParams);
|
||||
cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams);
|
||||
cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences);
|
||||
cctxParams->maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams->maxBlockSize);
|
||||
cctxParams->searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams->searchForExternalRepcodes, compressionLevel);
|
||||
DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",
|
||||
cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);
|
||||
cctxParams->useRowMatchFinder, cctxParams->postBlockSplitter, cctxParams->ldmParams.enableLdm);
|
||||
}
|
||||
|
||||
size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
|
||||
@ -598,11 +598,16 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
||||
bounds.upperBound = 1;
|
||||
return bounds;
|
||||
|
||||
case ZSTD_c_useBlockSplitter:
|
||||
case ZSTD_c_splitAfterSequences:
|
||||
bounds.lowerBound = (int)ZSTD_ps_auto;
|
||||
bounds.upperBound = (int)ZSTD_ps_disable;
|
||||
return bounds;
|
||||
|
||||
case ZSTD_c_blockSplitterLevel:
|
||||
bounds.lowerBound = 0;
|
||||
bounds.upperBound = ZSTD_BLOCKSPLITTER_LEVEL_MAX;
|
||||
return bounds;
|
||||
|
||||
case ZSTD_c_useRowMatchFinder:
|
||||
bounds.lowerBound = (int)ZSTD_ps_auto;
|
||||
bounds.upperBound = (int)ZSTD_ps_disable;
|
||||
@ -669,6 +674,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
|
||||
case ZSTD_c_minMatch:
|
||||
case ZSTD_c_targetLength:
|
||||
case ZSTD_c_strategy:
|
||||
case ZSTD_c_blockSplitterLevel:
|
||||
return 1;
|
||||
|
||||
case ZSTD_c_format:
|
||||
@ -695,7 +701,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
|
||||
case ZSTD_c_stableOutBuffer:
|
||||
case ZSTD_c_blockDelimiters:
|
||||
case ZSTD_c_validateSequences:
|
||||
case ZSTD_c_useBlockSplitter:
|
||||
case ZSTD_c_splitAfterSequences:
|
||||
case ZSTD_c_useRowMatchFinder:
|
||||
case ZSTD_c_deterministicRefPrefix:
|
||||
case ZSTD_c_prefetchCDictTables:
|
||||
@ -754,7 +760,8 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
|
||||
case ZSTD_c_stableOutBuffer:
|
||||
case ZSTD_c_blockDelimiters:
|
||||
case ZSTD_c_validateSequences:
|
||||
case ZSTD_c_useBlockSplitter:
|
||||
case ZSTD_c_splitAfterSequences:
|
||||
case ZSTD_c_blockSplitterLevel:
|
||||
case ZSTD_c_useRowMatchFinder:
|
||||
case ZSTD_c_deterministicRefPrefix:
|
||||
case ZSTD_c_prefetchCDictTables:
|
||||
@ -975,10 +982,15 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
||||
CCtxParams->validateSequences = value;
|
||||
return (size_t)CCtxParams->validateSequences;
|
||||
|
||||
case ZSTD_c_useBlockSplitter:
|
||||
BOUNDCHECK(ZSTD_c_useBlockSplitter, value);
|
||||
CCtxParams->useBlockSplitter = (ZSTD_paramSwitch_e)value;
|
||||
return CCtxParams->useBlockSplitter;
|
||||
case ZSTD_c_splitAfterSequences:
|
||||
BOUNDCHECK(ZSTD_c_splitAfterSequences, value);
|
||||
CCtxParams->postBlockSplitter = (ZSTD_paramSwitch_e)value;
|
||||
return CCtxParams->postBlockSplitter;
|
||||
|
||||
case ZSTD_c_blockSplitterLevel:
|
||||
BOUNDCHECK(ZSTD_c_blockSplitterLevel, value);
|
||||
CCtxParams->preBlockSplitter_level = value;
|
||||
return (size_t)CCtxParams->preBlockSplitter_level;
|
||||
|
||||
case ZSTD_c_useRowMatchFinder:
|
||||
BOUNDCHECK(ZSTD_c_useRowMatchFinder, value);
|
||||
@ -1135,8 +1147,11 @@ size_t ZSTD_CCtxParams_getParameter(
|
||||
case ZSTD_c_validateSequences :
|
||||
*value = (int)CCtxParams->validateSequences;
|
||||
break;
|
||||
case ZSTD_c_useBlockSplitter :
|
||||
*value = (int)CCtxParams->useBlockSplitter;
|
||||
case ZSTD_c_splitAfterSequences :
|
||||
*value = (int)CCtxParams->postBlockSplitter;
|
||||
break;
|
||||
case ZSTD_c_blockSplitterLevel :
|
||||
*value = CCtxParams->preBlockSplitter_level;
|
||||
break;
|
||||
case ZSTD_c_useRowMatchFinder :
|
||||
*value = (int)CCtxParams->useRowMatchFinder;
|
||||
@ -2099,7 +2114,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
{
|
||||
ZSTD_cwksp* const ws = &zc->workspace;
|
||||
DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d",
|
||||
(U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter);
|
||||
(U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->postBlockSplitter);
|
||||
assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
|
||||
|
||||
zc->isFirstBlock = 1;
|
||||
@ -2111,7 +2126,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
params = &zc->appliedParams;
|
||||
|
||||
assert(params->useRowMatchFinder != ZSTD_ps_auto);
|
||||
assert(params->useBlockSplitter != ZSTD_ps_auto);
|
||||
assert(params->postBlockSplitter != ZSTD_ps_auto);
|
||||
assert(params->ldmParams.enableLdm != ZSTD_ps_auto);
|
||||
assert(params->maxBlockSize != 0);
|
||||
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
||||
@ -2517,10 +2532,10 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
|
||||
/* Copy only compression parameters related to tables. */
|
||||
params.cParams = srcCCtx->appliedParams.cParams;
|
||||
assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_ps_auto);
|
||||
assert(srcCCtx->appliedParams.useBlockSplitter != ZSTD_ps_auto);
|
||||
assert(srcCCtx->appliedParams.postBlockSplitter != ZSTD_ps_auto);
|
||||
assert(srcCCtx->appliedParams.ldmParams.enableLdm != ZSTD_ps_auto);
|
||||
params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder;
|
||||
params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter;
|
||||
params.postBlockSplitter = srcCCtx->appliedParams.postBlockSplitter;
|
||||
params.ldmParams = srcCCtx->appliedParams.ldmParams;
|
||||
params.fParams = fParams;
|
||||
params.maxBlockSize = srcCCtx->appliedParams.maxBlockSize;
|
||||
@ -2728,9 +2743,9 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
|
||||
* Returns 1 if true, 0 otherwise. */
|
||||
static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams)
|
||||
{
|
||||
DEBUGLOG(5, "ZSTD_blockSplitterEnabled (useBlockSplitter=%d)", cctxParams->useBlockSplitter);
|
||||
assert(cctxParams->useBlockSplitter != ZSTD_ps_auto);
|
||||
return (cctxParams->useBlockSplitter == ZSTD_ps_enable);
|
||||
DEBUGLOG(5, "ZSTD_blockSplitterEnabled (postBlockSplitter=%d)", cctxParams->postBlockSplitter);
|
||||
assert(cctxParams->postBlockSplitter != ZSTD_ps_auto);
|
||||
return (cctxParams->postBlockSplitter == ZSTD_ps_enable);
|
||||
}
|
||||
|
||||
/* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types
|
||||
@ -4300,7 +4315,7 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
|
||||
U32 nbSeq;
|
||||
size_t cSize;
|
||||
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
|
||||
assert(zc->appliedParams.useBlockSplitter == ZSTD_ps_enable);
|
||||
assert(zc->appliedParams.postBlockSplitter == ZSTD_ps_enable);
|
||||
|
||||
{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
|
||||
FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
|
||||
@ -4491,7 +4506,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
|
||||
|
||||
#include "zstd_preSplit.h"
|
||||
|
||||
static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t srcSize, size_t blockSizeMax, ZSTD_strategy strat, S64 savings)
|
||||
static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t srcSize, size_t blockSizeMax, int splitLevel, ZSTD_strategy strat, S64 savings)
|
||||
{
|
||||
/* split level based on compression strategy, from `fast` to `btultra2` */
|
||||
static const int splitLevels[] = { 0, 0, 1, 2, 2, 3, 3, 4, 4, 4 };
|
||||
@ -4505,10 +4520,22 @@ static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t src
|
||||
* require verified savings to allow pre-splitting.
|
||||
* Note: as a consequence, the first full block is not split.
|
||||
*/
|
||||
if (savings < 3) return 128 KB;
|
||||
/* dynamic splitting has a cpu cost for analysis,
|
||||
* select a variant among multiple gradual speed/accuracy tradeoffs */
|
||||
return ZSTD_splitBlock(src, blockSizeMax, splitLevels[strat], cctx->tmpWorkspace, cctx->tmpWkspSize);
|
||||
if (savings < 3) {
|
||||
DEBUGLOG(6, "don't attempt splitting: savings (%i) too low", (int)savings);
|
||||
return 128 KB;
|
||||
}
|
||||
/* apply @splitLevel, or use default value (which depends on @strat).
|
||||
* note that splitting heuristic is still conditioned by @savings >= 3,
|
||||
* so the first block will not reach this code path */
|
||||
if (splitLevel == 1) return 128 KB;
|
||||
if (splitLevel == 0) {
|
||||
assert(ZSTD_fast <= strat && strat <= ZSTD_btultra2);
|
||||
splitLevel = splitLevels[strat];
|
||||
} else {
|
||||
assert(2 <= splitLevel && splitLevel <= 6);
|
||||
splitLevel -= 2;
|
||||
}
|
||||
return ZSTD_splitBlock(src, blockSizeMax, splitLevel, cctx->tmpWorkspace, cctx->tmpWkspSize);
|
||||
}
|
||||
|
||||
/*! ZSTD_compress_frameChunk() :
|
||||
@ -4539,7 +4566,12 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
|
||||
|
||||
while (remaining) {
|
||||
ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
|
||||
size_t const blockSize = ZSTD_optimalBlockSize(cctx, ip, remaining, blockSizeMax, cctx->appliedParams.cParams.strategy, savings);
|
||||
size_t const blockSize = ZSTD_optimalBlockSize(cctx,
|
||||
ip, remaining,
|
||||
blockSizeMax,
|
||||
cctx->appliedParams.preBlockSplitter_level,
|
||||
cctx->appliedParams.cParams.strategy,
|
||||
savings);
|
||||
U32 const lastBlock = lastFrameChunk & (blockSize == remaining);
|
||||
assert(blockSize <= remaining);
|
||||
|
||||
@ -6286,7 +6318,7 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
||||
dictSize, mode);
|
||||
}
|
||||
|
||||
params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams);
|
||||
params.postBlockSplitter = ZSTD_resolveBlockSplitterMode(params.postBlockSplitter, ¶ms.cParams);
|
||||
params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams);
|
||||
params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams);
|
||||
params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences);
|
||||
|
@ -343,8 +343,21 @@ struct ZSTD_CCtx_params_s {
|
||||
ZSTD_sequenceFormat_e blockDelimiters;
|
||||
int validateSequences;
|
||||
|
||||
/* Block splitting */
|
||||
ZSTD_paramSwitch_e useBlockSplitter;
|
||||
/* Block splitting
|
||||
* @postBlockSplitter executes split analysis after sequences are produced,
|
||||
* it's more accurate but consumes more resources.
|
||||
* @preBlockSplitter_level splits before knowing sequences,
|
||||
* it's more approximative but also cheaper.
|
||||
* Valid @preBlockSplitter_level values range from 0 to 6 (included).
|
||||
* 0 means auto, 1 means do not split,
|
||||
* then levels are sorted in increasing cpu budget, from 2 (fastest) to 6 (slowest).
|
||||
* Highest @preBlockSplitter_level combines well with @postBlockSplitter.
|
||||
*/
|
||||
ZSTD_paramSwitch_e postBlockSplitter;
|
||||
int preBlockSplitter_level;
|
||||
|
||||
/* Adjust the max block size*/
|
||||
size_t maxBlockSize;
|
||||
|
||||
/* Param for deciding whether to use row-based matchfinder */
|
||||
ZSTD_paramSwitch_e useRowMatchFinder;
|
||||
@ -368,9 +381,6 @@ struct ZSTD_CCtx_params_s {
|
||||
void* extSeqProdState;
|
||||
ZSTD_sequenceProducer_F extSeqProdFunc;
|
||||
|
||||
/* Adjust the max block size*/
|
||||
size_t maxBlockSize;
|
||||
|
||||
/* Controls repcode search in external sequence parsing */
|
||||
ZSTD_paramSwitch_e searchForExternalRepcodes;
|
||||
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
|
||||
|
@ -229,6 +229,7 @@ size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
|
||||
int level,
|
||||
void* workspace, size_t wkspSize)
|
||||
{
|
||||
DEBUGLOG(6, "ZSTD_splitBlock (level=%i)", level);
|
||||
assert(0<=level && level<=4);
|
||||
if (level == 0)
|
||||
return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize);
|
||||
|
38
lib/zstd.h
38
lib/zstd.h
@ -491,7 +491,8 @@ typedef enum {
|
||||
* ZSTD_c_stableOutBuffer
|
||||
* ZSTD_c_blockDelimiters
|
||||
* ZSTD_c_validateSequences
|
||||
* ZSTD_c_useBlockSplitter
|
||||
* ZSTD_c_blockSplitterLevel
|
||||
* ZSTD_c_splitAfterSequences
|
||||
* ZSTD_c_useRowMatchFinder
|
||||
* ZSTD_c_prefetchCDictTables
|
||||
* ZSTD_c_enableSeqProducerFallback
|
||||
@ -518,7 +519,8 @@ typedef enum {
|
||||
ZSTD_c_experimentalParam16=1013,
|
||||
ZSTD_c_experimentalParam17=1014,
|
||||
ZSTD_c_experimentalParam18=1015,
|
||||
ZSTD_c_experimentalParam19=1016
|
||||
ZSTD_c_experimentalParam19=1016,
|
||||
ZSTD_c_experimentalParam20=1017
|
||||
} ZSTD_cParameter;
|
||||
|
||||
typedef struct {
|
||||
@ -2148,8 +2150,32 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
|
||||
*/
|
||||
#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
|
||||
|
||||
/* ZSTD_c_useBlockSplitter
|
||||
* Controlled with ZSTD_paramSwitch_e enum.
|
||||
/* ZSTD_c_blockSplitterLevel
|
||||
* note: this parameter only influences the first splitter stage,
|
||||
* which is active before producing the sequences.
|
||||
* ZSTD_c_splitAfterSequences controls the next splitter stage,
|
||||
* which is active after sequence production.
|
||||
* Note that both can be combined.
|
||||
* Allowed values are between 0 and ZSTD_BLOCKSPLITTER_LEVEL_MAX included.
|
||||
* 0 means "auto", which will select a value depending on current ZSTD_c_strategy.
|
||||
* 1 means no splitting.
|
||||
* Then, values from 2 to 6 are sorted in increasing cpu load order.
|
||||
*
|
||||
* Note that currently the first block is never split,
|
||||
* to ensure expansion guarantees in presence of incompressible data.
|
||||
*/
|
||||
#define ZSTD_BLOCKSPLITTER_LEVEL_MAX 6
|
||||
#define ZSTD_c_blockSplitterLevel ZSTD_c_experimentalParam20
|
||||
|
||||
/* ZSTD_c_splitAfterSequences
|
||||
* This is a stronger splitter algorithm,
|
||||
* based on actual sequences previously produced by the selected parser.
|
||||
* It's also slower, and as a consequence, mostly used for high compression levels.
|
||||
* While the post-splitter does overlap with the pre-splitter,
|
||||
* both can nonetheless be combined,
|
||||
* notably with ZSTD_c_blockSplitterLevel at ZSTD_BLOCKSPLITTER_LEVEL_MAX,
|
||||
* resulting in higher compression ratio than just one of them.
|
||||
*
|
||||
* Default is ZSTD_ps_auto.
|
||||
* Set to ZSTD_ps_disable to never use block splitter.
|
||||
* Set to ZSTD_ps_enable to always use block splitter.
|
||||
@ -2157,7 +2183,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
|
||||
* By default, in ZSTD_ps_auto, the library will decide at runtime whether to use
|
||||
* block splitting based on the compression parameters.
|
||||
*/
|
||||
#define ZSTD_c_useBlockSplitter ZSTD_c_experimentalParam13
|
||||
#define ZSTD_c_splitAfterSequences ZSTD_c_experimentalParam13
|
||||
|
||||
/* ZSTD_c_useRowMatchFinder
|
||||
* Controlled with ZSTD_paramSwitch_e enum.
|
||||
@ -2236,7 +2262,6 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
|
||||
* that overrides the default ZSTD_BLOCKSIZE_MAX. It cannot be used to set upper
|
||||
* bounds greater than ZSTD_BLOCKSIZE_MAX or bounds lower than 1KB (will make
|
||||
* compressBound() inaccurate). Only currently meant to be used for testing.
|
||||
*
|
||||
*/
|
||||
#define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18
|
||||
|
||||
@ -2264,6 +2289,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
|
||||
*/
|
||||
#define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19
|
||||
|
||||
|
||||
/*! ZSTD_CCtx_getParameter() :
|
||||
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
|
||||
* and store it into int* value.
|
||||
|
@ -140,7 +140,8 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer
|
||||
setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer);
|
||||
setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer);
|
||||
setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
|
||||
setRand(cctx, ZSTD_c_useBlockSplitter, 0, 2, producer);
|
||||
setRand(cctx, ZSTD_c_blockSplitterLevel, 0, ZSTD_BLOCKSPLITTER_LEVEL_MAX, producer);
|
||||
setRand(cctx, ZSTD_c_splitAfterSequences, 0, 2, producer);
|
||||
setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer);
|
||||
setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer);
|
||||
setRand(cctx, ZSTD_c_maxBlockSize, ZSTD_BLOCKSIZE_MAX_MIN, ZSTD_BLOCKSIZE_MAX, producer);
|
||||
|
@ -559,6 +559,66 @@ static void test_setCParams(unsigned tnb)
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
}
|
||||
|
||||
static void test_blockSplitter_incompressibleExpansionProtection(unsigned testNb, unsigned seed)
|
||||
{
|
||||
DISPLAYLEVEL(3, "test%3i : Check block splitter doesn't oversplit incompressible data (seed %u): ", testNb, seed);
|
||||
{ ZSTD_CCtx* cctx = ZSTD_createCCtx();
|
||||
size_t const srcSize = 256 * 1024; /* needs to be at least 2 blocks */
|
||||
void* incompressible = malloc(srcSize);
|
||||
size_t const dstCapacity = ZSTD_compressBound(srcSize);
|
||||
void* cBuffer = malloc(dstCapacity);
|
||||
size_t const chunkSize = 8 KB;
|
||||
size_t const nbChunks = srcSize / chunkSize;
|
||||
size_t chunkNb, cSizeNoSplit, cSizeWithSplit;
|
||||
assert(cctx != NULL);
|
||||
assert(incompressible != NULL);
|
||||
assert(cBuffer != NULL);
|
||||
|
||||
/* let's fill input with random noise (incompressible) */
|
||||
RDG_genBuffer(incompressible, srcSize, 0.0, 0.0, seed);
|
||||
|
||||
/* this pattern targets the fastest _byChunk variant's sampling (level 3).
|
||||
* manually checked that, without the @savings protection, it would over-split.
|
||||
*/
|
||||
for (chunkNb=0; chunkNb<nbChunks; chunkNb++) {
|
||||
BYTE* const p = (BYTE*)incompressible + chunkNb * chunkSize;
|
||||
size_t const samplingRate = 43;
|
||||
int addOrRemove = chunkNb % 2;
|
||||
size_t n;
|
||||
for (n=0; n<chunkSize; n+=samplingRate) {
|
||||
if (addOrRemove) {
|
||||
p[n] &= 0x80;
|
||||
} else {
|
||||
p[n] |= 0x80;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* run first without splitting */
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockSplitterLevel, 1 /* no split */);
|
||||
cSizeNoSplit = ZSTD_compress2(cctx, cBuffer, dstCapacity, incompressible, srcSize);
|
||||
|
||||
/* run with sample43 splitter, check it's still the same */
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockSplitterLevel, 3 /* sample43, fastest _byChunk variant */);
|
||||
cSizeWithSplit = ZSTD_compress2(cctx, cBuffer, dstCapacity, incompressible, srcSize);
|
||||
|
||||
if (cSizeWithSplit != cSizeNoSplit) {
|
||||
DISPLAYLEVEL(1, "invalid compressed size: cSizeWithSplit %u != %u cSizeNoSplit \n",
|
||||
(unsigned)cSizeWithSplit, (unsigned)cSizeNoSplit);
|
||||
abort();
|
||||
}
|
||||
DISPLAYLEVEL(4, "compressed size: cSizeWithSplit %u == %u cSizeNoSplit : ",
|
||||
(unsigned)cSizeWithSplit, (unsigned)cSizeNoSplit);
|
||||
|
||||
free(incompressible);
|
||||
free(cBuffer);
|
||||
ZSTD_freeCCtx(cctx);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
}
|
||||
|
||||
/* ============================================================= */
|
||||
|
||||
static int basicUnitTests(U32 const seed, double compressibility)
|
||||
{
|
||||
size_t const CNBuffSize = 5 MB;
|
||||
@ -1360,7 +1420,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 19));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, 7));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_useBlockSplitter, ZSTD_ps_enable));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_splitAfterSequences, ZSTD_ps_enable));
|
||||
|
||||
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, data, srcSize);
|
||||
CHECK_Z(cSize);
|
||||
@ -1374,6 +1434,8 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
test_blockSplitter_incompressibleExpansionProtection(testNb++, seed);
|
||||
|
||||
DISPLAYLEVEL(3, "test%3d : superblock uncompressible data: too many nocompress superblocks : ", testNb++);
|
||||
{
|
||||
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
@ -1675,8 +1737,8 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
int value;
|
||||
ZSTD_compressionParameters cparams = ZSTD_getCParams(1, 0, 0);
|
||||
cparams.strategy = -1;
|
||||
/* Set invalid cParams == no change. */
|
||||
cparams.strategy = (ZSTD_strategy)-1; /* set invalid value, on purpose */
|
||||
/* Set invalid cParams == error out, and no change. */
|
||||
CHECK(ZSTD_isError(ZSTD_CCtx_setCParams(cctx, cparams)));
|
||||
|
||||
CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_windowLog, &value));
|
||||
@ -1739,12 +1801,12 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
ZSTD_freeCCtx(cctx);
|
||||
}
|
||||
|
||||
DISPLAYLEVEL(3, "test%3d : ZSTD_CCtx_setCarams() : ", testNb++);
|
||||
DISPLAYLEVEL(3, "test%3d : ZSTD_CCtx_setParams() : ", testNb++);
|
||||
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
int value;
|
||||
ZSTD_parameters params = ZSTD_getParams(1, 0, 0);
|
||||
params.cParams.strategy = -1;
|
||||
/* Set invalid params == no change. */
|
||||
params.cParams.strategy = (ZSTD_strategy)-1; /* set invalid value, on purpose */
|
||||
/* Set invalid params == error out, and no change. */
|
||||
CHECK(ZSTD_isError(ZSTD_CCtx_setParams(cctx, params)));
|
||||
|
||||
CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_windowLog, &value));
|
||||
@ -2190,7 +2252,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : compress with block splitting : ", testNb++)
|
||||
{ ZSTD_CCtx* cctx = ZSTD_createCCtx();
|
||||
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_useBlockSplitter, ZSTD_ps_enable) );
|
||||
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_splitAfterSequences, ZSTD_ps_enable) );
|
||||
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
|
||||
CHECK_Z(cSize);
|
||||
ZSTD_freeCCtx(cctx);
|
||||
|
Loading…
Reference in New Issue
Block a user