From 59d1f797681d24fe479b1f60cf66ddf5135a6002 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 23 Jan 2016 19:28:41 +0100 Subject: [PATCH] variable litblock header size --- lib/huff0.c | 21 ++-- lib/huff0.h | 9 +- lib/huff0_static.h | 21 +++- lib/zstd.h | 4 +- lib/zstd_compress.c | 261 ++++++++++++++++++++++++++++-------------- lib/zstd_decompress.c | 156 ++++++++++++++++++------- lib/zstd_internal.h | 9 +- lib/zstd_static.h | 4 +- programs/bench.c | 12 +- 9 files changed, 339 insertions(+), 158 deletions(-) diff --git a/lib/huff0.c b/lib/huff0.c index 26a7639ff..2544c9e69 100644 --- a/lib/huff0.c +++ b/lib/huff0.c @@ -49,7 +49,6 @@ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #else # ifdef __GNUC__ -# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) # define FORCE_INLINE static inline __attribute__((always_inline)) # else # define FORCE_INLINE static inline @@ -512,7 +511,7 @@ size_t HUF_compress2 (void* dst, size_t dstSize, op += errorCode; /* Compress */ - //errorCode = HUF_compress_usingCTable(op, oend - op, src, srcSize, CTable); /* single segment */ + //if (srcSize < MIN_4STREAMS) errorCode = HUF_compress_usingCTable(op, oend - op, src, srcSize, CTable); else /* single segment */ errorCode = HUF_compress_into4Segments(op, oend - op, src, srcSize, CTable); if (HUF_isError(errorCode)) return errorCode; if (errorCode==0) return 0; @@ -531,7 +530,7 @@ size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSi } -/********************************************************* +/* ******************************************************* * Huff0 : Huffman block decompression *********************************************************/ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ @@ -621,9 +620,9 @@ static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, } -/**************************/ +/* ************************/ /* single-symbol decoding */ -/**************************/ +/* ************************/ size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) { @@ -866,9 +865,9 @@ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS } -/***************************/ +/* *************************/ /* double-symbols decoding */ -/***************************/ +/* *************************/ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, const U32* rankValOrigin, const int minWeight, @@ -1266,9 +1265,9 @@ size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cS } -/**********************************/ +/* ********************************/ /* quad-symbol decoding */ -/**********************************/ +/* ********************************/ typedef struct { BYTE nbBits; BYTE nbBytes; } HUF_DDescX6; typedef union { BYTE byte[4]; U32 sequence; } HUF_DSeqX6; @@ -1657,9 +1656,9 @@ size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cS } -/**********************************/ +/* ********************************/ /* Generic decompression selector */ -/**********************************/ +/* ********************************/ typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = diff --git a/lib/huff0.h b/lib/huff0.h index 2ebd5cfbd..fe28d7bea 100644 --- a/lib/huff0.h +++ b/lib/huff0.h @@ -1,7 +1,7 @@ /* ****************************************************************** Huff0 : Huffman coder, part of New Generation Entropy library header file - Copyright (C) 2013-2015, Yann Collet. + Copyright (C) 2013-2016, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -30,7 +30,6 @@ You can contact the author at : - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c ****************************************************************** */ #ifndef HUFF0_H #define HUFF0_H @@ -66,8 +65,10 @@ HUF_compress(): HUF_decompress(): Decompress Huff0 data from buffer 'cSrc', of size 'cSrcSize', into already allocated destination buffer 'dst', of size 'dstSize'. - 'dstSize' must be the exact size of original (uncompressed) data. - Note : in contrast with FSE, HUF_decompress can regenerate RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, because it knows size to regenerate. + @dstSize : must be the **exact** size of original (uncompressed) data. + Note : in contrast with FSE, HUF_decompress can regenerate + RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, + because it knows size to regenerate. @return : size of regenerated data (== dstSize) or an error code, which can be tested using HUF_isError() */ diff --git a/lib/huff0_static.h b/lib/huff0_static.h index 5df0727d7..28ae92ab2 100644 --- a/lib/huff0_static.h +++ b/lib/huff0_static.h @@ -1,7 +1,7 @@ /* ****************************************************************** - Huff0 : Huffman coder, part of New Generation Entropy library - header file for static linking (only) - Copyright (C) 2013-2015, Yann Collet + Huff0 : Huffman codec, part of New Generation Entropy library + header file, for static linking only + Copyright (C) 2013-2016, Yann Collet BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -30,7 +30,6 @@ You can contact the author at : - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c ****************************************************************** */ #ifndef HUFF0_STATIC_H #define HUFF0_STATIC_H @@ -47,15 +46,21 @@ extern "C" { /* **************************************** -* Static allocation macros +* Static allocation ******************************************/ /* Huff0 buffer bounds */ #define HUF_CTABLEBOUND 129 #define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true if incompressible pre-filtered with fast heuristic */ #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ +/* static allocation of Huff0's Compression Table */ +#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ + U32 name##hb[maxSymbolValue+1]; \ + void* name##hv = &(name##hb); \ + HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ + /* static allocation of Huff0's DTable */ -#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1< /* For Visual 2005 */ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #else -# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) # ifdef __GNUC__ # define FORCE_INLINE static inline __attribute__((always_inline)) # else @@ -64,10 +63,17 @@ /* ************************************* * Constants ***************************************/ -ZSTDLIB_API unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } static const U32 g_searchStrength = 8; +/* ************************************* +* Helper functions +***************************************/ +unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } + +size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; } + + /* ************************************* * Sequence storage ***************************************/ @@ -116,7 +122,6 @@ struct ZSTD_CCtx_s size_t hbSize; char headerBuffer[ZSTD_frameHeaderSize_max]; - seqStore_t seqStore; /* sequences storage ptrs */ U32* hashTable; U32* contentTable; @@ -236,11 +241,64 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, /* ******************************************************* * Block entropic compression *********************************************************/ -size_t ZSTD_compressBound(size_t srcSize) /* maximum compressed size */ -{ - return FSE_compressBound(srcSize) + 12; -} +/* Block format description + + Block = Literal Section - Sequences Section + Prerequisite : size of (compressed) block, maximum size of regenerated data + + 1) Literal Section + + 1.1) Header : 1-5 bytes + flags: 2 bits + 00 compressed by Huff0 + 01 unused + 10 is Raw (uncompressed) + 11 is Rle + Note : using 01 => Huff0 with precomputed table ? + Note : delta map ? => compressed ? + + 1.1.1) Huff0-compressed literal block : 3-5 bytes + srcSize < 1 KB => 3 bytes (2-2-10-10) + srcSize < 17KB => 4 bytes (2-2-14-14) + else => 5 bytes (2-2-18-18) + big endian convention + + 1.1.2) Raw (uncompressed) literal block header : 1-3 bytes + size : 5 bits: (IS_RAW<<6) + (0<<4) + size + 12 bits: (IS_RAW<<6) + (2<<4) + (size>>8) + size&255 + 20 bits: (IS_RAW<<6) + (3<<4) + (size>>16) + size>>8&255 + size&255 + + 1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes + size : 5 bits: (IS_RLE<<6) + (0<<4) + size + 12 bits: (IS_RLE<<6) + (2<<4) + (size>>8) + size&255 + 20 bits: (IS_RLE<<6) + (3<<4) + (size>>16) + size>>8&255 + size&255 + + 1.1.4) Unused + Use Huff0 w/ precalculated DTable ? + FSE ? => probably not, not efficient on literals + + 1.2) Literal block content + + 1.2.1) Huff0 block, using sizes from header + See Huff0 format + + 1.2.2) Raw content + + 1.2.3) single byte + + 1.2.4) _usingDTable variant ? + + + 2) Sequences section + TO DO +*/ size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize) { @@ -262,24 +320,60 @@ size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* src, size static size_t ZSTD_noCompressLiterals (void* dst, size_t maxDstSize, const void* src, size_t srcSize) { BYTE* const ostart = (BYTE* const)dst; + const U32 flSize = 1 + (srcSize>31) + (srcSize>4095); - if (srcSize + 3 > maxDstSize) return ERROR(dstSize_tooSmall); + if (srcSize + flSize > maxDstSize) return ERROR(dstSize_tooSmall); - MEM_writeLE32(dst, ((U32)srcSize << 2) | IS_RAW); - memcpy(ostart + 3, src, srcSize); - return srcSize + 3; + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((IS_RAW<<6) + (0<<5) + srcSize); + break; + case 2: /* 2 - 2 - 12 */ + ostart[0] = (BYTE)((IS_RAW<<6) + (2<<4) + (srcSize >> 8)); + ostart[1] = (BYTE)srcSize; + break; + default: /*note : should not be necessary : flSize is within {1,2,3} */ + case 3: /* 2 - 2 - 20 */ + ostart[0] = (BYTE)((IS_RAW<<6) + (3<<4) + (srcSize >> 16)); + ostart[1] = (BYTE)(srcSize>>8); + ostart[2] = (BYTE)srcSize; + break; + } + + memcpy(ostart + flSize, src, srcSize); + return srcSize + flSize; } static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize) { BYTE* const ostart = (BYTE* const)dst; + U32 flSize = 1 + (srcSize>31) + (srcSize>4095); - (void)maxDstSize; - MEM_writeLE32(dst, ((U32)srcSize << 2) | IS_RLE); /* note : maxDstSize > litHeaderSize > 4 */ - ostart[3] = *(const BYTE*)src; - return 4; + (void)maxDstSize; /* maxDstSize guaranteed to be >=4, hence large enough ? */ + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((IS_RLE<<6) + (0<<5) + srcSize); + break; + case 2: /* 2 - 2 - 12 */ + ostart[0] = (BYTE)((IS_RLE<<6) + (2<<4) + (srcSize >> 8)); + ostart[1] = (BYTE)srcSize; + break; + default: /*note : should not be necessary : flSize is necessary within {1,2,3} */ + case 3: /* 2 - 2 - 20 */ + ostart[0] = (BYTE)((IS_RLE<<6) + (3<<4) + (srcSize >> 16)); + ostart[1] = (BYTE)(srcSize>>8); + ostart[2] = (BYTE)srcSize; + break; + } + + ostart[flSize] = *(const BYTE*)src; + return flSize+1; } + size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 1; } static size_t ZSTD_compressLiterals (void* dst, size_t maxDstSize, @@ -287,27 +381,41 @@ static size_t ZSTD_compressLiterals (void* dst, size_t maxDstSize, { const size_t minGain = ZSTD_minGain(srcSize); BYTE* const ostart = (BYTE*)dst; - size_t hsize; - static const size_t litHeaderSize = 5; + size_t lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + size_t clitSize; - if (maxDstSize < litHeaderSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ + if (maxDstSize < 4) return ERROR(dstSize_tooSmall); /* not enough space for compression */ - hsize = HUF_compress(ostart+litHeaderSize, maxDstSize-litHeaderSize, src, srcSize); + clitSize = HUF_compress(ostart+lhSize, maxDstSize-lhSize, src, srcSize); - if ((hsize==0) || (hsize >= srcSize - minGain)) return ZSTD_noCompressLiterals(dst, maxDstSize, src, srcSize); - if (hsize==1) return ZSTD_compressRleLiteralsBlock(dst, maxDstSize, src, srcSize); + if ((clitSize==0) || (clitSize >= srcSize - minGain)) return ZSTD_noCompressLiterals(dst, maxDstSize, src, srcSize); + if (clitSize==1) return ZSTD_compressRleLiteralsBlock(dst, maxDstSize, src, srcSize); /* Build header */ + switch(lhSize) { - ostart[0] = (BYTE)(srcSize << 2); /* is a block, is compressed */ - ostart[1] = (BYTE)(srcSize >> 6); - ostart[2] = (BYTE)(srcSize >>14); - ostart[2] += (BYTE)(hsize << 5); - ostart[3] = (BYTE)(hsize >> 3); - ostart[4] = (BYTE)(hsize >>11); + case 3: /* 2 - 2 - 10 - 10 */ + ostart[0] = (BYTE) (srcSize>>6) + (0<< 4); + ostart[1] = (BYTE)((srcSize<<2) + (clitSize>>8)); + ostart[2] = (BYTE)(clitSize); + break; + case 4: /* 2 - 2 - 14 - 14 */ + ostart[0] = (BYTE)(srcSize>>10) + (2<<4); + ostart[1] = (BYTE)(srcSize>> 2); + ostart[2] = (BYTE)((srcSize<<6) + (clitSize>>8)); + ostart[3] = (BYTE)(clitSize); + break; + default: /* should not be necessary, lhSize is {3,4,5} */ + case 5: /* 2 - 2 - 18 - 18 */ + ostart[0] = (BYTE)(srcSize>>14) + (3<<4); + ostart[1] = (BYTE)(srcSize>>6); + ostart[2] = (BYTE)((srcSize<<2) + (clitSize>>16)); + ostart[3] = (BYTE)(clitSize>>8); + ostart[4] = (BYTE)(clitSize); + break; } - return hsize+litHeaderSize; + return lhSize+clitSize; } @@ -754,8 +862,7 @@ static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) FORCE_INLINE -size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, - void* dst, size_t maxDstSize, +void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, const void* src, size_t srcSize, const U32 mls) { @@ -848,38 +955,32 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, memcpy(seqStorePtr->lit, anchor, lastLLSize); seqStorePtr->lit += lastLLSize; } - - /* Finale compression stage */ - return ZSTD_compressSequences(dst, maxDstSize, - seqStorePtr, srcSize); } -size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, - void* dst, size_t maxDstSize, - const void* src, size_t srcSize) +void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) { const U32 mls = ctx->params.searchLength; switch(mls) { default: case 4 : - return ZSTD_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 4); + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return; case 5 : - return ZSTD_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 5); + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return; case 6 : - return ZSTD_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 6); + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return; case 7 : - return ZSTD_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 7); + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return; } } //FORCE_INLINE -size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, - void* dst, size_t maxDstSize, - const void* src, size_t srcSize, - const U32 mls) +void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 mls) { U32* hashTable = ctx->hashTable; const U32 hBits = ctx->params.hashLog; @@ -989,15 +1090,10 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, memcpy(seqStorePtr->lit, anchor, lastLLSize); seqStorePtr->lit += lastLLSize; } - - /* Finale compression stage */ - return ZSTD_compressSequences(dst, maxDstSize, - seqStorePtr, srcSize); } -size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, - void* dst, size_t maxDstSize, +void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { const U32 mls = ctx->params.searchLength; @@ -1005,13 +1101,13 @@ size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, { default: case 4 : - return ZSTD_compressBlock_fast_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 4); + return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); case 5 : - return ZSTD_compressBlock_fast_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 5); + return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); case 6 : - return ZSTD_compressBlock_fast_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 6); + return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); case 7 : - return ZSTD_compressBlock_fast_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 7); + return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); } } @@ -1415,8 +1511,8 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( * Common parser - lazy strategy *********************************/ FORCE_INLINE -size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, - void* dst, size_t maxDstSize, const void* src, size_t srcSize, +void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, const U32 searchMethod, const U32 depth) { seqStore_t* seqStorePtr = &(ctx->seqStore); @@ -1559,36 +1655,32 @@ _storeSequence: memcpy(seqStorePtr->lit, anchor, lastLLSize); seqStorePtr->lit += lastLLSize; } - - /* Final compression stage */ - return ZSTD_compressSequences(dst, maxDstSize, - seqStorePtr, srcSize); } -size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 1, 2); + return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); } -size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 2); + return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); } -size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 1); + return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); } -size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 0); + return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); } FORCE_INLINE -size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, - void* dst, size_t maxDstSize, const void* src, size_t srcSize, +void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, const U32 searchMethod, const U32 depth) { seqStore_t* seqStorePtr = &(ctx->seqStore); @@ -1778,36 +1870,32 @@ _storeSequence: memcpy(seqStorePtr->lit, anchor, lastLLSize); seqStorePtr->lit += lastLLSize; } - - /* Final compression stage */ - return ZSTD_compressSequences(dst, maxDstSize, - seqStorePtr, srcSize); } -size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 0, 0); + return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); } -size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 0, 1); + return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); } -size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 0, 2); + return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); } -static size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 1, 2); + return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); } -typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize); -static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) { static const ZSTD_blockCompressor blockCompressor[2][5] = { { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy,ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2 }, @@ -1822,7 +1910,8 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t maxDs { ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(zc->params.strategy, zc->lowLimit < zc->dictLimit); if (srcSize < MIN_CBLOCK_SIZE+3) return 0; /* don't even attempt compression below a certain srcSize */ - return blockCompressor(zc, dst, maxDstSize, src, srcSize); + blockCompressor(zc, src, srcSize); + return ZSTD_compressSequences(dst, maxDstSize, &(zc->seqStore), srcSize); } diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 4a026df33..6dc6f9a7d 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -171,6 +171,49 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) /* ************************************************************* * Decompression section ***************************************************************/ + +/* Frame format description + Frame Header - [ Block Header - Block ] - Frame End + 1) Frame Header + - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd_internal.h) + - 1 byte - Window Descriptor + 2) Block Header + - 3 bytes, starting with a 2-bits descriptor + Uncompressed, Compressed, Frame End, unused + 3) Block + See Block Format Description + 4) Frame End + - 3 bytes, compatible with Block Header +*/ + +/* Block format description + Literal Section - Sequences Section + 1) Literal Section + 1.1) Header : up to 5 bytes + flags: + 00 compressed by Huff0 + 01 is Raw (uncompressed) + 10 is Rle + 11 unused + Note : using 11 for Huff0 with precomputed table ? + Note : delta map ? => compressed ? + Note 2 : 19 bits for sizes, seems a bit larger than necessary + Note 3 : RLE blocks ? + + 1.2.1) Huff0 block, using sizes from header + See Huff0 format + + 1.2.2) Huff0 block, using precomputed DTable + _usingDTable variants + + 1.2.3) uncompressed blocks + as the name says (both 2 or 3 bytes variants) + + 2) Sequences section + TO DO +*/ + + /** ZSTD_decodeFrameHeader_Part1 * decode the 1st part of the Frame Header, which tells Frame Header size. * srcSize must be == ZSTD_frameHeaderSize_min @@ -231,6 +274,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bp return cSize; } + static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) { if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); @@ -239,26 +283,6 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, s } -/** ZSTD_decompressLiterals - @return : nb of bytes read from src, or an error code*/ -static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr, - const void* src, size_t srcSize) -{ - const BYTE* ip = (const BYTE*)src; - - const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ - const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ - - if (litSize > *maxDstSizePtr) return ERROR(corruption_detected); - if (litCSize + 5 > srcSize) return ERROR(corruption_detected); - - if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected); - - *maxDstSizePtr = litSize; - return litCSize + 5; -} - - /** ZSTD_decodeLiteralsBlock @return : nb of bytes read from src (< srcSize ) */ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, @@ -269,47 +293,101 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, /* any compressed block with literals segment must be at least this size */ if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); - switch(*istart & 3) + switch(istart[0]>> 6) { - /* compressed */ - case 0: + case IS_HUF: { - size_t litSize = BLOCKSIZE; - const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize); + size_t litSize, litCSize; + U32 lhSize = ((istart[0]) >> 4) & 3; + switch(lhSize) + { + case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */ + /* 2 - 2 - 10 - 10 */ + lhSize=3; + litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2); + litCSize = ((istart[1] & 3) << 8) + istart[2]; + break; + case 2: + /* 2 - 2 - 14 - 14 */ + lhSize=4; + litSize = ((istart[0] & 15) << 10) + (istart[1] << 2) + (istart[2] >> 6); + litCSize = ((istart[2] & 63) << 8) + istart[3]; + break; + case 3: + /* 2 - 2 - 18 - 18 */ + lhSize=5; + litSize = ((istart[0] & 15) << 14) + (istart[1] << 6) + (istart[2] >> 2); + litCSize = ((istart[2] & 3) << 16) + (istart[3] << 8) + istart[4]; + break; + } + + if (HUF_isError( HUF_decompress(dctx->litBuffer, litSize, istart+lhSize, litCSize) )) + return ERROR(corruption_detected); + dctx->litPtr = dctx->litBuffer; dctx->litBufSize = BLOCKSIZE+8; dctx->litSize = litSize; - return readSize; /* works if it's an error too */ + return litCSize + lhSize; } case IS_RAW: { - const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ - if (litSize > srcSize-11) /* risk of reading too far with wildcopy */ + size_t litSize; + U32 lhSize = ((istart[0]) >> 4) & 3; + switch(lhSize) { - if (litSize > srcSize-3) return ERROR(corruption_detected); - memcpy(dctx->litBuffer, istart, litSize); + case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */ + lhSize=1; + litSize = istart[0] & 31; + break; + case 2: + litSize = ((istart[0] & 15) << 8) + istart[1]; + break; + case 3: + litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2]; + break; + } + + if (litSize > srcSize-11) /* risk of reading beyond src buffer with wildcopy */ + { + if (litSize > srcSize-litSize) return ERROR(corruption_detected); + memcpy(dctx->litBuffer, istart+lhSize, litSize); dctx->litPtr = dctx->litBuffer; dctx->litBufSize = BLOCKSIZE+8; dctx->litSize = litSize; - return litSize+3; + return litSize+lhSize; } /* direct reference into compressed stream */ - dctx->litPtr = istart+3; - dctx->litBufSize = srcSize-3; + dctx->litPtr = istart+lhSize; + dctx->litBufSize = srcSize-lhSize; dctx->litSize = litSize; - return litSize+3; } + return lhSize+litSize; + } case IS_RLE: { - const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ + size_t litSize; + U32 lhSize = ((istart[0]) >> 4) & 3; + switch(lhSize) + { + case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */ + lhSize = 1; + litSize = istart[0] & 31; + break; + case 2: + litSize = ((istart[0] & 15) << 8) + istart[1]; + break; + case 3: + litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2]; + break; + } if (litSize > BLOCKSIZE) return ERROR(corruption_detected); - memset(dctx->litBuffer, istart[3], litSize); + memset(dctx->litBuffer, istart[lhSize], litSize); dctx->litPtr = dctx->litBuffer; dctx->litBufSize = BLOCKSIZE+8; dctx->litSize = litSize; - return 4; + return lhSize+1; } - default: - return ERROR(corruption_detected); /* forbidden nominal case */ + default: /* IS_PCH */ + return ERROR(corruption_detected); /* not yet nominal case */ } } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index cae2cb8f8..a5d20409b 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -42,6 +42,7 @@ extern "C" { ***************************************/ #include "mem.h" #include "error_private.h" +#include "zstd_static.h" /* ************************************* @@ -73,8 +74,10 @@ static const size_t ZSTD_frameHeaderSize_min = 5; #define BIT1 2 #define BIT0 1 -#define IS_RAW BIT0 -#define IS_RLE BIT1 +#define IS_HUF 0 +#define IS_PCH 1 +#define IS_RAW 2 +#define IS_RLE 3 #define MINMATCH 4 #define REPCODE_STARTVALUE 4 @@ -104,7 +107,7 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } /*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */ -static void ZSTD_wildcopy(void* dst, const void* src, size_t length) +MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, size_t length) { const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; diff --git a/lib/zstd_static.h b/lib/zstd_static.h index c60fa65c2..4ce266148 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -123,7 +123,7 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx, ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, ZSTD_parameters params); -ZSTDLIB_API size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* ctx, const void* dict, size_t dictSize); ZSTDLIB_API size_t ZSTD_duplicateCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx); ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); @@ -169,7 +169,7 @@ ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); /** - Streaming decompression, bufferless mode + Streaming decompression, direct mode (bufferless) A ZSTD_DCtx object is required to track streaming operations. Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. diff --git a/programs/bench.c b/programs/bench.c index fcd674aac..fe2b07f0f 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -357,15 +357,17 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, { if (((const BYTE*)srcBuffer)[u] != ((const BYTE*)resultBuffer)[u]) { - U32 bn; + U32 segNb, bNb, pos; size_t bacc = 0; printf("Decoding error at pos %u ", (U32)u); - for (bn = 0; bn < nbBlocks; bn++) + for (segNb = 0; segNb < nbBlocks; segNb++) { - if (bacc + blockTable[bn].srcSize > u) break; - bacc += blockTable[bn].srcSize; + if (bacc + blockTable[segNb].srcSize > u) break; + bacc += blockTable[segNb].srcSize; } - printf("(block %u, pos %u) \n", bn, (U32)(u - bacc)); + pos = (U32)(u - bacc); + bNb = pos / (128 KB); + printf("(segment %u, block %u, pos %u) \n", segNb, bNb, pos); break; } }