Greatly improved compression and decompression speed, at the expense of some compression ratio.

Most of the change is due to a modification in the performance parameter (HASH_LOG) now set to 12, to match Intel L1 cache processors.
You can change it back to 17 to get back previous compression ratio.
AMD users are invited to try HASH_LOG = 13, since AMD L1 cache is twice larger.

git-svn-id: https://lz4.googlecode.com/svn/trunk@10 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
This commit is contained in:
yann.collet.73@gmail.com 2011-06-05 21:23:42 +00:00
parent 6b798d5e40
commit 075bf1349b
2 changed files with 35 additions and 24 deletions

42
lz4.c
View File

@ -36,13 +36,13 @@
//**************************************
// Performance parameter <---------------------------------------------------------
// Performance parameter
//**************************************
// Lowering this value reduce memory usage
// It may also improve speed, especially if you reach L1 cache size (32KB for Intel, 64KB for AMD)
// Expanding memory usage typically improves compression ratio
// Memory usage formula : N->2^(N+2) Bytes (examples : 17 -> 512KB ; 12 -> 16KB)
#define HASH_LOG 17
#define HASH_LOG 12
//**************************************
@ -125,6 +125,7 @@ int LZ4_compressCtx(void** ctx,
BYTE *ip = (BYTE*) source, /* input pointer */
*anchor = (BYTE*) source,
*incompressible = anchor + INCOMPRESSIBLE,
*iend = (BYTE*) source + isize,
*ilimit = iend - MINMATCH - 1;
@ -134,7 +135,6 @@ int LZ4_compressCtx(void** ctx,
int len, length, sequence, h;
U32 step=1;
S32 limit=INCOMPRESSIBLE;
// Init
@ -154,23 +154,23 @@ int LZ4_compressCtx(void** ctx,
ref = HashTable[h];
HashTable[h] = ip;
// Check Min Match
if (( ((ip-ref) >> MAXD_LOG) != 0) || (*(U32*)ref != sequence))
// Min Match
if (( ((ip-ref) >> MAXD_LOG)) || (*(U32*)ref != sequence))
{
if (ip-anchor>limit) { limit <<= 1; step += 1 + (step>>2); }
ip += step;
if (ip>incompressible) { incompressible += INCOMPRESSIBLE << (step >> 1); step++; }
ip+=step;
continue;
}
}
step=1;
// catch up
if (step>1) { HashTable[h] = ref; ip -= (step-1); step=1; continue; }
limit = INCOMPRESSIBLE;
// Catch up
while ((ip>anchor) && (*(ip-1)==*(ref-1))) { ip--; ref--; }
// Encode Literal length
len = length = ip - anchor;
length = ip - anchor;
orun = op++;
if (len>(RUN_MASK-1)) { *orun=(RUN_MASK<<ML_BITS); len-=RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
else *orun = (len<<ML_BITS);
if (length>(RUN_MASK-1)) { *orun=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
else *orun = (length<<ML_BITS);
// Copy Literals
l_end = op + length;
@ -183,7 +183,16 @@ int LZ4_compressCtx(void** ctx,
// Start Counting
ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified
anchor = ip;
while ((ip<iend) && (*ref == *ip)) { ip++; ref++; } // Ends at *ip!=*ref
while (ip<(iend-3))
{
if (*(U32*)ref == *(U32*)ip) { ip+=4; ref+=4; continue; }
if (*(U16*)ref == *(U16*)ip) { ip+=2; ref+=2; }
if (*ref == *ip) ip++;
goto _endCount;
}
if ((ip<(iend-1)) && (*(U16*)ref == *(U16*)ip)) { ip+=2; ref+=2; }
if ((ip<iend) && (*ref == *ip)) ip++;
_endCount:
len = (ip - anchor);
// Encode MatchLength
@ -192,6 +201,7 @@ int LZ4_compressCtx(void** ctx,
// Prepare next loop
anchor = ip;
incompressible = anchor + INCOMPRESSIBLE;
}
// Encode Last Literals
@ -222,7 +232,7 @@ int LZ4_uncompress(char* source,
BYTE *ip = (BYTE*) source;
BYTE *op = (BYTE*) dest,
*oend=(BYTE*) dest + osize,
*oend= op + osize,
*ref, *cpy,
runcode;

17
lz4.h
View File

@ -66,9 +66,9 @@ int LZ4_uncompress_unknownOutputSize (char* source, char* dest, int isize, int m
LZ4_uncompress :
return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
This version never writes beyond dest + osize, and is therefore protected against malicious data packets
This version never writes beyond dest + maxOutputSize, and is therefore protected against malicious data packets
note 1 : isize is the input size, therefore the compressed size
note 2 : destination buffer must be already allocated
note 2 : destination buffer must already be allocated, with at least maxOutputSize bytes
note 3 : this version is slower by up to 10%, and is therefore not recommended for general use
*/
@ -88,20 +88,21 @@ LZ4_compressCtx :
*/
//****************************
// Deprecated Functions
//****************************
//*********************************
// Faster Decoding function
//*********************************
int LZ4_decode (char* source, char* dest, int isize);
#define LZ4_uncompress_fast LZ4_decode
int LZ4_decode (char* source, char* dest, int isize);
/*
LZ4_decode :
LZ4_decode : This version is the fastest one, besting LZ4_uncompress by a few %.
return : the number of bytes in decoded buffer dest
note 1 : isize is the input size, therefore the compressed size
note 2 : destination buffer must be already allocated.
The program calling the decoder must know in advance the size of decoded stream to properly allocate the destination buffer
The destination buffer size must be at least "decompressedSize + 3 Bytes"
This version is unprotected against malicious data packets designed to create buffer overflow errors.
This version is **unprotected** against malicious data packets designed to create buffer overflow errors.
It is therefore deprecated, but still present in this version for compatibility.
*/