Add sequence nb validation to compressSequences(), adjust minMatch comparisons

This commit is contained in:
senhuang42 2020-12-01 10:53:30 -05:00
parent 4c5f337248
commit 3efe9c902b
2 changed files with 10 additions and 8 deletions

View File

@ -4680,13 +4680,13 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
U32 firstHalfMatchLength;
litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
if (matchLength > blockSize && firstHalfMatchLength >= MINMATCH) {
if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) {
/* Only ever split the match if it is larger than the block size */
U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;
if (secondHalfMatchLength < MINMATCH) {
/* Move the endPosInSequence backward so that it creates match of MINMATCH length */
endPosInSequence -= MINMATCH - secondHalfMatchLength;
bytesAdjustment = MINMATCH - secondHalfMatchLength;
if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) {
/* Move the endPosInSequence backward so that it creates match of minMatch length */
endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
firstHalfMatchLength -= bytesAdjustment;
}
matchLength = firstHalfMatchLength;
@ -4886,6 +4886,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapaci
DEBUGLOG(3, "ZSTD_compressSequences()");
assert(cctx != NULL);
FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
RETURN_ERROR_IF(inSeqsSize > cctx->seqStore.maxNbSeq, memory_allocation, "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
/* Begin writing output, starting with frame header */
frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
op += frameHeaderSize;

View File

@ -1356,9 +1356,10 @@ ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t se
* behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
* specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
*
* In addition to the two adjustable experimental params, other noteworthy cctx parameters are the compression level and window log.
* - The compression level accordingly adjusts the strength of the entropy coder, as it would in typical compression.
* - The window log affects offset validation: this function will return an error at higher debug levels if a provided offset
* In addition to the two adjustable experimental params, there are other important cctx params.
* - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
* - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression.
* - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
* is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
*
* Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.