first working version with both encoder and decode alternate LL + ML coding scheme.

decompression speed highly impacted
This commit is contained in:
Yann Collet 2016-03-22 23:19:28 +01:00
parent fadda6c875
commit be391438ff
5 changed files with 71 additions and 95 deletions

View File

@ -80,7 +80,6 @@ static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
ssPtr->lit = ssPtr->litStart;
ssPtr->litLength = ssPtr->litLengthStart;
ssPtr->matchLength = ssPtr->matchLengthStart;
ssPtr->dumps = ssPtr->dumpsStart;
}
@ -184,14 +183,14 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.windowLog);
const U32 divider = (params.searchLength==3) ? 3 : 4;
const size_t maxNbSeq = blockSize / divider;
const size_t tokenSpace = blockSize + 12*maxNbSeq;
const size_t tokenSpace = blockSize + 11*maxNbSeq;
const size_t contentSize = (params.strategy == ZSTD_fast) ? 0 : (1 << params.contentLog);
const size_t hSize = 1 << params.hashLog;
const size_t h3Size = (params.searchLength==3) ? (1 << HASHLOG3) : 0;
const size_t tableSpace = (contentSize + hSize + h3Size) * sizeof(U32);
/* Check if workSpace is large enough, alloc a new one if needed */
{ size_t const optSpace = ((1<<MLbits) + (MaxLL+1) + (1<<Offbits) + (1<<Litbits))*sizeof(U32)
{ size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (1<<Offbits) + (1<<Litbits))*sizeof(U32)
+ (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
+ ((params.strategy == ZSTD_btopt) ? optSpace : 0);
@ -227,12 +226,11 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
zc->seqStore.mlCodeStart = zc->seqStore.llCodeStart + maxNbSeq;
zc->seqStore.offCodeStart = zc->seqStore.mlCodeStart + maxNbSeq;
zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq;
zc->seqStore.dumpsStart = zc->seqStore.litStart + maxNbSeq;
if (params.strategy == ZSTD_btopt) {
zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + maxNbSeq));
zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.litStart + blockSize));
zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1);
zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<<MLbits);
zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1);
zc->seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (1<<Offbits)));
zc->seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1));
zc->seqStore.litLengthSum = 0;
@ -599,12 +597,6 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
size_t const nbSeq = offsetTableEnd - offsetTable;
BYTE* seqHead;
static U32 blockNb = 0;
blockNb++;
if (blockNb==79)
blockNb += !nbSeq;
/* Compress literals */
{ const BYTE* const literals = seqStorePtr->litStart;
size_t const litSize = seqStorePtr->lit - literals;
@ -620,25 +612,8 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
if (nbSeq==0) goto _check_compressibility;
/* dumps : contains rests of large lengths */
if ((oend-op) < 3 /* dumps */ + 1 /*seqHead*/)
return ERROR(dstSize_tooSmall);
seqHead = op;
{ size_t const dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart;
if (dumpsLength < 512) {
op[0] = (BYTE)(dumpsLength >> 8);
op[1] = (BYTE)(dumpsLength);
op += 2;
} else {
op[0] = 2;
op[1] = (BYTE)(dumpsLength>>8);
op[2] = (BYTE)(dumpsLength);
op += 3;
}
if ((size_t)(oend-op) < dumpsLength+6) return ERROR(dstSize_tooSmall);
memcpy(op, seqStorePtr->dumpsStart, dumpsLength);
op += dumpsLength;
}
/* seqHead : flags for FSE encoding type */
seqHead = op++;
#define MIN_SEQ_FOR_DYNAMIC_FSE 64
#define MAX_SEQ_FOR_STATIC_FSE 1000
@ -714,7 +689,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
/* ML codes */
{ static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 20, 31,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
@ -753,7 +728,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
MLtype = FSE_ENCODING_DYNAMIC;
} }
seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
zc->flagStaticTables = 0;
/* Encoding Sequences */
@ -791,6 +766,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
BIT_addBits(&blockStream, mlTable[n], ML_bits[MLCode]);
BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */
BIT_addBits(&blockStream, llTable[n], LL_bits[LLCode]);
//if (blockStream.bitPos > 63) printf("pb : blockStream.bitPos == %u > 63 \n", blockStream.bitPos);
BIT_flushBits(&blockStream); /* 7 */ /* 7 */
} }
@ -824,7 +800,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B
static const BYTE* g_start = NULL;
const U32 pos = (U32)(literals - g_start);
if (g_start==NULL) g_start = literals;
if ((pos > 198618400) && (pos < 198618500))
if ((pos > 10354000) && (pos < 10355000))
printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n",
pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
#endif

View File

@ -559,9 +559,37 @@ FORCE_INLINE size_t ZSTD_buildSeqTableLL(FSE_DTable* DTable, U32 type, U32 max,
}
size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
const void* src, size_t srcSize)
FORCE_INLINE size_t ZSTD_buildSeqTableML(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog,
const void* src, size_t srcSize)
{
switch(type)
{
case FSE_ENCODING_RLE :
if (!srcSize) return ERROR(srcSize_wrong);
if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
FSE_buildDTable_rle(DTable, *(const BYTE*)src); /* if *src > max, data is corrupted */
return 1;
case FSE_ENCODING_RAW :
FSE_buildDTable(DTable, ML_defaultNorm, max, ML_defaultNormLog);
return 0;
case FSE_ENCODING_STATIC:
return 0;
default : /* impossible */
case FSE_ENCODING_DYNAMIC :
{ U32 tableLog;
S16 norm[MaxSeq+1];
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
if (FSE_isError(headerSize)) return ERROR(corruption_detected);
if (tableLog > maxLog) return ERROR(corruption_detected);
FSE_buildDTable(DTable, norm, max, tableLog);
return headerSize;
} }
}
size_t ZSTD_decodeSeqHeaders(int* nbSeq,
FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
const void* src, size_t srcSize)
{
const BYTE* const istart = (const BYTE* const)src;
const BYTE* ip = istart;
@ -585,26 +613,13 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen
LLtype = *ip >> 6;
Offtype = (*ip >> 4) & 3;
MLtype = (*ip >> 2) & 3;
{ size_t dumpsLength;
if (*ip & 2) {
dumpsLength = ip[2];
dumpsLength += ip[1] << 8;
ip += 3;
} else {
dumpsLength = ip[1];
dumpsLength += (ip[0] & 1) << 8;
ip += 2;
}
*dumpsPtr = ip;
ip += dumpsLength;
*dumpsLengthPtr = dumpsLength;
}
ip++;
/* check */
if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
/* Build DTables */
{ size_t const bhSize = ZSTD_buildSeqTableLL(DTableLL, LLtype, 35, LLFSELog, ip, iend-ip);
{ size_t const bhSize = ZSTD_buildSeqTableLL(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip);
if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
ip += bhSize;
}
@ -612,7 +627,7 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen
if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
ip += bhSize;
}
{ size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MLbits, MLFSELog, ip, iend-ip);
{ size_t const bhSize = ZSTD_buildSeqTableML(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip);
if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
ip += bhSize;
}
@ -633,8 +648,6 @@ typedef struct {
FSE_DState_t stateOffb;
FSE_DState_t stateML;
size_t prevOffset;
const BYTE* dumps;
const BYTE* dumpsEnd;
} seqState_t;
@ -662,31 +675,26 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls)
if (offsetCode | !litCode) seqState->prevOffset = seq->offset; /* cmove */
seq->offset = offset;
if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */
}
/* Literal length update */
{ static const U32 ML_base[MaxML+1] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800,
0x1000, 0x2000, 0x4000, 0x8000, 0x10000 };
U32 const mlCode = FSE_peakSymbol(&(seqState->stateML));
seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), ML_bits[mlCode]) + mls;
}
/* ANS update */
FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); /* update */
if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
/* MatchLength */
{ size_t matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
const BYTE* dumps = seqState->dumps;
if (matchLength == MaxML) {
const BYTE* const de = seqState->dumpsEnd;
const U32 add = *dumps++;
if (add < 255) matchLength += add;
else {
matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
if (matchLength&1) matchLength>>=1, dumps += 3;
else matchLength = (U16)(matchLength)>>1, dumps += 2;
}
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
}
matchLength += mls;
seq->matchLength = matchLength;
seqState->dumps = dumps;
}
FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */
if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); /* update */
if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
#if 0 /* debug */
{
@ -781,12 +789,10 @@ static size_t ZSTD_decompressSequences(
BYTE* const ostart = (BYTE* const)dst;
BYTE* op = ostart;
BYTE* const oend = ostart + maxDstSize;
size_t dumpsLength;
const BYTE* litPtr = dctx->litPtr;
const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
const BYTE* const litEnd = litPtr + dctx->litSize;
int nbSeq;
const BYTE* dumps;
U32* DTableLL = dctx->LLTable;
U32* DTableML = dctx->MLTable;
U32* DTableOffb = dctx->OffTable;
@ -796,7 +802,7 @@ static size_t ZSTD_decompressSequences(
const U32 mls = dctx->fParams.mml;
/* Build Decoding Tables */
{ size_t const errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
{ size_t const errorCode = ZSTD_decodeSeqHeaders(&nbSeq,
DTableLL, DTableML, DTableOffb,
ip, seqSize);
if (ZSTD_isError(errorCode)) return errorCode;
@ -810,8 +816,6 @@ static size_t ZSTD_decompressSequences(
memset(&sequence, 0, sizeof(sequence));
sequence.offset = REPCODE_STARTVALUE;
seqState.dumps = dumps;
seqState.dumpsEnd = dumps + dumpsLength;
seqState.prevOffset = REPCODE_STARTVALUE;
{ size_t const errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
if (ERR_isError(errorCode)) return ERROR(corruption_detected); }
@ -825,7 +829,7 @@ static size_t ZSTD_decompressSequences(
ZSTD_decodeSequence(&sequence, &seqState, mls);
#if 0 /* for debug */
{ U32 pos = (U32)(op-base);
if ((pos > 198618400) && (pos < 198618500))
if ((pos > 10354000) && (pos < 10355000))
printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n",
pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset);
}
@ -867,17 +871,16 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
const void* src, size_t srcSize)
{ /* blockType == blockCompressed */
const BYTE* ip = (const BYTE*)src;
size_t litCSize;
if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
ZSTD_LOG_BLOCK("%p: ZSTD_decompressBlock_internal searchLength=%d\n", dctx->base, dctx->params.searchLength);
/* Decode literals sub-block */
litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
if (ZSTD_isError(litCSize)) return litCSize;
ip += litCSize;
srcSize -= litCSize;
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
if (ZSTD_isError(litCSize)) return litCSize;
ip += litCSize;
srcSize -= litCSize; }
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
}

View File

@ -102,16 +102,15 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
#define HASHLOG3 17
#define Litbits 8
#define MLbits 7
#define Offbits 5
#define MaxLit ((1<<Litbits) - 1)
#define MaxML 52
#define MaxLL 35
#define MaxOff ((1<<Offbits)- 1)
#define MLFSELog 10
#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
#define MLFSELog 9
#define LLFSELog 9
#define OffFSELog 9
#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
#define FSE_ENCODING_RAW 0
#define FSE_ENCODING_RLE 1
@ -209,8 +208,6 @@ typedef struct {
U16* matchLengthStart;
U16* matchLength;
BYTE* mlCodeStart;
BYTE* dumpsStart;
BYTE* dumps;
U32 longLength;
/* opt */
ZSTD_optimal_t* priceTable;

View File

@ -56,7 +56,7 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
if (ssPtr->litLengthSum == 0) {
ssPtr->litSum = (2<<Litbits);
ssPtr->litLengthSum = MaxLL+1;
ssPtr->matchLengthSum = (1<<MLbits);
ssPtr->matchLengthSum = MaxML+1;
ssPtr->offCodeSum = (1<<Offbits);
ssPtr->matchSum = (2<<Litbits);

View File

@ -49,7 +49,7 @@
#include <stdio.h> /* fprintf, fopen, ftello64 */
#include <sys/types.h> /* stat64 */
#include <sys/stat.h> /* stat64 */
#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */
#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */
/* sleep : posix - windows - others */
#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)))
@ -65,7 +65,7 @@
#include "mem.h"
#include "zstd_static.h"
#include "xxhash.h"
#include "datagen.h" /* RDG_genBuffer */
#include "datagen.h" /* RDG_genBuffer */
/* *************************************
@ -283,7 +283,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
(double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC) );
(void)crcCheck; (void)fastestD; (void)crcOrig; /* unused when decompression disabled */
#if 0
#if 1
/* Decompression */
memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */