ZSTDMT can now work in non-blocking mode with 1 thread

it still fallbacks to single-thread blocking invocation
when input is small (<1job)
or when invoking ZSTDMT_compress(), which is blocking.

Also : fixed a bug in new block-granular compression routine.
This commit is contained in:
Yann Collet 2018-01-16 15:28:43 -08:00
parent 8e83c5c910
commit 2e23333094
5 changed files with 57 additions and 38 deletions

View File

@ -1962,7 +1962,8 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
{
size_t fhSize = 0;
DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u", cctx->stage);
DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
cctx->stage, (U32)srcSize);
if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
if (frame && (cctx->stage==ZSTDcs_init)) {

View File

@ -365,11 +365,12 @@ void ZSTDMT_compressChunk(void* jobDescription)
if (!job->firstChunk) { /* flush and overwrite frame header when it's not first job */
size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, 0);
if (ZSTD_isError(hSize)) { job->cSize = hSize; /* save error code */ goto _endJob; }
DEBUGLOG(5, "ZSTDMT_compressChunk: flush and overwrite %u bytes of frame header (not first chunk)", (U32)hSize);
ZSTD_invalidateRepCodes(cctx);
}
/* compress */
#if 1
#if 0
job->cSize = (job->lastChunk) ?
ZSTD_compressEnd (cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
@ -382,26 +383,28 @@ void ZSTDMT_compressChunk(void* jobDescription)
BYTE* op = ostart;
BYTE* oend = op + dstBuff.size;
int blockNb;
DEBUGLOG(5, "ZSTDMT_compressChunk: compress %u bytes in %i blocks", (U32)job->srcSize, nbBlocks);
job->cSize = 0;
for (blockNb = 0; blockNb < nbBlocks-1; blockNb++) {
for (blockNb = 1; blockNb < nbBlocks; blockNb++) {
size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, ZSTD_BLOCKSIZE_MAX);
if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
ip += ZSTD_BLOCKSIZE_MAX;
op += cSize; assert(op < oend);
/* stats */
job->cSize += cSize;
job->readSize = ZSTD_BLOCKSIZE_MAX * (blockNb+1);
job->readSize = ZSTD_BLOCKSIZE_MAX * blockNb;
}
/* last block */
{ size_t const lastBlockSize1 = job->srcSize & (ZSTD_BLOCKSIZE_MAX-1);
size_t const lastBlockSize = (lastBlockSize1==0) ? ZSTD_BLOCKSIZE_MAX : lastBlockSize1;
if ((nbBlocks > 0) | job->lastChunk /*need to output a "last block" flag*/ ) {
size_t const lastBlockSize1 = job->srcSize & (ZSTD_BLOCKSIZE_MAX-1);
size_t const lastBlockSize = ((lastBlockSize1==0) & (job->srcSize>=ZSTD_BLOCKSIZE_MAX)) ? ZSTD_BLOCKSIZE_MAX : lastBlockSize1;
size_t const cSize = (job->lastChunk) ?
ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
/* stats */
job->cSize += cSize;
job->readSize = ZSTD_BLOCKSIZE_MAX * (blockNb+1);
job->readSize = job->srcSize;
}
}
#endif
@ -443,7 +446,7 @@ struct ZSTDMT_CCtx_s {
size_t targetDictSize;
inBuff_t inBuff;
XXH64_state_t xxhState;
unsigned singleThreaded;
unsigned singleBlockingThread;
unsigned jobIDMask;
unsigned doneJobID;
unsigned nextJobID;
@ -457,7 +460,7 @@ struct ZSTDMT_CCtx_s {
/* Sets parameters relevant to the compression job, initializing others to
* default values. Notably, nbThreads should probably be zero. */
static ZSTD_CCtx_params ZSTDMT_makeJobCCtxParams(ZSTD_CCtx_params const params)
static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
{
ZSTD_CCtx_params jobParams;
memset(&jobParams, 0, sizeof(jobParams));
@ -646,17 +649,21 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
/* ===== Multi-threaded compression ===== */
/* ------------------------------------------ */
static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbThreads) {
size_t const chunkSizeTarget = (size_t)1 << (windowLog + 2);
size_t const chunkMaxSize = chunkSizeTarget << 2;
size_t const passSizeMax = chunkMaxSize * nbThreads;
unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
unsigned const nbChunksLarge = multiplier * nbThreads;
unsigned const nbChunksMax = (unsigned)(srcSize / chunkSizeTarget) + 1;
unsigned const nbChunksSmall = MIN(nbChunksMax, nbThreads);
return (multiplier>1) ? nbChunksLarge : nbChunksSmall;
}
static unsigned ZSTDMT_computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbThreads) {
assert(nbThreads>0);
{ size_t const chunkSizeTarget = (size_t)1 << (windowLog + 2);
size_t const chunkMaxSize = chunkSizeTarget << 2;
size_t const passSizeMax = chunkMaxSize * nbThreads;
unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
unsigned const nbChunksLarge = multiplier * nbThreads;
unsigned const nbChunksMax = (unsigned)(srcSize / chunkSizeTarget) + 1;
unsigned const nbChunksSmall = MIN(nbChunksMax, nbThreads);
return (multiplier>1) ? nbChunksLarge : nbChunksSmall;
} }
/* ZSTDMT_compress_advanced_internal() :
* This is a blocking function : it will only give back control to caller after finishing its compression job.
*/
static size_t ZSTDMT_compress_advanced_internal(
ZSTDMT_CCtx* mtctx,
void* dst, size_t dstCapacity,
@ -664,10 +671,10 @@ static size_t ZSTDMT_compress_advanced_internal(
const ZSTD_CDict* cdict,
ZSTD_CCtx_params const params)
{
ZSTD_CCtx_params const jobParams = ZSTDMT_makeJobCCtxParams(params);
ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
unsigned const overlapRLog = (params.overlapSizeLog>9) ? 0 : 9-params.overlapSizeLog;
size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog);
unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, params.nbThreads);
unsigned nbChunks = ZSTDMT_computeNbChunks(srcSize, params.cParams.windowLog, params.nbThreads);
size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
size_t const avgChunkSize = (((proposedChunkSize-1) & 0x1FFFF) < 0x7FFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
const char* const srcStart = (const char*)src;
@ -678,14 +685,16 @@ static size_t ZSTDMT_compress_advanced_internal(
assert(jobParams.nbThreads == 0);
assert(mtctx->cctxPool->totalCCtx == params.nbThreads);
DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbChunks=%2u (rawSize=%u bytes; fixedSize=%u) ",
DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbChunks=%2u (rawSize=%u bytes; fixedSize=%u) ",
nbChunks, (U32)proposedChunkSize, (U32)avgChunkSize);
if (nbChunks==1) { /* fallback to single-thread mode */
if ((nbChunks==1) | (params.nbThreads<=1)) { /* fallback to single-thread mode : this is a blocking invocation anyway */
ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
}
assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is required for compressWithinDst */
assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgChunkSize) );
XXH64_reset(&xxh64, 0);
@ -695,6 +704,7 @@ static size_t ZSTDMT_compress_advanced_internal(
mtctx->jobIDMask = 0;
mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, mtctx->cMem);
if (mtctx->jobs==NULL) return ERROR(memory_allocation);
assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0)); /* ensure nbJobs is a power of 2 */
mtctx->jobIDMask = nbJobs - 1;
}
@ -827,10 +837,10 @@ size_t ZSTDMT_initCStream_internal(
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
assert(zcs->cctxPool->totalCCtx == params.nbThreads);
zcs->singleThreaded = (params.nbThreads==1) | (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
zcs->singleBlockingThread = pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN; /* do not trigger multi-threading when srcSize is too small */
if (zcs->singleThreaded) {
ZSTD_CCtx_params const singleThreadParams = ZSTDMT_makeJobCCtxParams(params);
if (zcs->singleBlockingThread) {
ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
DEBUGLOG(4, "single thread mode");
assert(singleThreadParams.nbThreads == 0);
return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0],
@ -921,10 +931,11 @@ size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
}
size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
ZSTD_CCtx_params cctxParams = zcs->params;
cctxParams.cParams = params.cParams;
cctxParams.fParams = params.fParams;
DEBUGLOG(4, "ZSTDMT_initCStream (cLevel=%i)", compressionLevel);
return ZSTDMT_initCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
}
@ -1071,7 +1082,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
assert(output->pos <= output->size);
assert(input->pos <= input->size);
if (mtctx->singleThreaded) { /* delegate to single-thread (synchronous) */
if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */
return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
}
@ -1166,7 +1177,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou
size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
{
DEBUGLOG(5, "ZSTDMT_flushStream");
if (mtctx->singleThreaded)
if (mtctx->singleBlockingThread)
return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output);
return ZSTDMT_flushStream_internal(mtctx, output, 0 /* endFrame */);
}
@ -1174,7 +1185,7 @@ size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
{
DEBUGLOG(4, "ZSTDMT_endStream");
if (mtctx->singleThreaded)
if (mtctx->singleBlockingThread)
return ZSTD_endStream(mtctx->cctxPool->cctx[0], output);
return ZSTDMT_flushStream_internal(mtctx, output, 1 /* endFrame */);
}

View File

@ -85,7 +85,7 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
* List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
typedef enum {
ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
} ZSTDMT_parameter;
/* ZSTDMT_setMTCtxParameter() :

View File

@ -1759,7 +1759,7 @@ static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skip
* or an error code, which can be tested using ZSTD_isError() */
size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
DEBUGLOG(5, "ZSTD_decompressContinue");
DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (U32)srcSize);
/* Sanity check */
if (srcSize != dctx->expected) return ERROR(srcSize_wrong); /* not allowed */
if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
@ -1820,12 +1820,12 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
case ZSTDds_decompressLastBlock:
case ZSTDds_decompressBlock:
DEBUGLOG(5, "case ZSTDds_decompressBlock");
DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
{ size_t rSize;
switch(dctx->bType)
{
case bt_compressed:
DEBUGLOG(5, "case bt_compressed");
DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
break;
case bt_raw :
@ -1839,12 +1839,12 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
return ERROR(corruption_detected);
}
if (ZSTD_isError(rSize)) return rSize;
DEBUGLOG(5, "decoded size from block : %u", (U32)rSize);
DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (U32)rSize);
dctx->decodedSize += rSize;
if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
DEBUGLOG(4, "decoded size from frame : %u", (U32)dctx->decodedSize);
DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (U32)dctx->decodedSize);
if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
if (dctx->decodedSize != dctx->fParams.frameContentSize) {
return ERROR(corruption_detected);
@ -1868,7 +1868,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
assert(srcSize == 4); /* guaranteed by dctx->expected */
{ U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
U32 const check32 = MEM_readLE32(src);
DEBUGLOG(4, "checksum : calculated %08X :: %08X read", h32, check32);
DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", h32, check32);
if (check32 != h32) return ERROR(checksum_wrong);
dctx->expected = 0;
dctx->stage = ZSTDds_getFrameHeaderSize;

View File

@ -1243,6 +1243,7 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
maxTestSize = FUZ_randomLength(&lseed, oldTestLog+2);
if (maxTestSize >= srcBufferSize) maxTestSize = srcBufferSize-1;
{ int const compressionLevel = (FUZ_rand(&lseed) % 5) + 1;
DISPLAYLEVEL(5, "Init with compression level = %i \n", compressionLevel);
CHECK_Z( ZSTDMT_initCStream(zc, compressionLevel) );
}
} else {
@ -1301,9 +1302,12 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
if ((FUZ_rand(&lseed) & 15) == 0) {
size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize);
size_t const previousPos = outBuff.pos;
outBuff.size = outBuff.pos + adjustedDstSize;
DISPLAYLEVEL(5, "Flushing into dst buffer of size %u \n", (U32)adjustedDstSize);
CHECK_Z( ZSTDMT_flushStream(zc, &outBuff) );
assert(outBuff.pos >= previousPos);
DISPLAYLEVEL(6, "%u bytes flushed by ZSTDMT_flushStream \n", (U32)(outBuff.pos-previousPos));
} }
/* final frame epilogue */
@ -1311,10 +1315,13 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
while (remainingToFlush) {
size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize);
size_t const previousPos = outBuff.pos;
outBuff.size = outBuff.pos + adjustedDstSize;
DISPLAYLEVEL(5, "Ending into dst buffer of size %u \n", (U32)adjustedDstSize);
remainingToFlush = ZSTDMT_endStream(zc, &outBuff);
CHECK (ZSTD_isError(remainingToFlush), "ZSTDMT_endStream error : %s", ZSTD_getErrorName(remainingToFlush));
assert(outBuff.pos >= previousPos);
DISPLAYLEVEL(6, "%u bytes flushed by ZSTDMT_endStream \n", (U32)(outBuff.pos-previousPos));
DISPLAYLEVEL(5, "endStream : remainingToFlush : %u \n", (U32)remainingToFlush);
} }
crcOrig = XXH64_digest(&xxhState);