edge case : compress up to end-mflimit (12 bytes)

The LZ4 block format specification states that the last match must start at a minimum distance of 12 bytes from the end of the block. However, out of an abundance of caution, the reference implementation would actually stop searching matches at 13 bytes from the end of the block. This patch fixes this small detail. The new version is now able to properly compress a limit case such as `aaaaaaaabaaa\n` as reported by Gao Xiang (@hsiangkao). Obviously, it doesn't change a lot of things. This is just one additional match candidate per block, with a maximum match length of 7 (since last 5 bytes must remain literals). With default policy, blocks are 4 MB long, so it doesn't happen too often Compressing silesia.tar at default level 1 saves 5 bytes (100930101 -> 100930096). At max level 12, it saves a grand 16 bytes (77389871 -> 77389855). The impact is a bit more visible when blocks are smaller, hence more numerous. For example, compressing silesia with blocks of 64 KB (using -12 -B4D) saves 543 bytes (77304583 -> 77304040). So the smaller the packet size, the more visible the impact. And it happens we have a ton of scenarios with little blocks using LZ4 compression ... And a useless "hooray" sidenote : the patch improves the LZ4 compression record of silesia (using -12 -B7D --no-frame-crc) by 16 bytes (77270672 -> 77270656) and the record on enwik9 by 44 bytes (371680396 -> 371680352) (previously claimed by [smallz4](http://create.stephan-brumme.com/smallz4/) ).
2024-11-24 18:33:50 +08:00 · 2018-02-24 11:47:53 -08:00 · 2018-02-24 11:47:53 -08:00 · 7173a631db
commit 7173a631db
parent 99c26729b5
3 changed files with 10 additions and 9 deletions
--- a/lib/lz4.c
+++ b/lib/lz4.c
@ -545,7 +545,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
    const ptrdiff_t dictDelta = dictEnd - (const BYTE*)source;
    const BYTE* anchor = (const BYTE*) source;
    const BYTE* const iend = ip + inputSize;
-    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
    const BYTE* const matchlimit = iend - LASTLITERALS;

    BYTE* op = (BYTE*) dest;
@ -594,7 +594,8 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
                forwardIp += step;
                step = (searchMatchNb++ >> LZ4_skipTrigger);

-                if (unlikely(forwardIp > mflimit)) goto _last_literals;
+                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+                assert(ip < mflimitPlusOne);

                match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
                if (dict==usingExtDict) {
@ -680,7 +681,7 @@ _next_match:
        anchor = ip;

        /* Test end of chunk */
-        if (ip > mflimit) break;
+        if (ip >= mflimitPlusOne) break;

        /* Fill table */
        LZ4_putPosition(ip-2, cctx->hashTable, tableType, base);
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@ -425,7 +425,7 @@ static int LZ4HC_compress_hashChain (
    if (inputSize < LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */

    /* Main Loop */
-    while (ip < mflimit) {
+    while (ip <= mflimit) {
        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis);
        if (ml<MINMATCH) { ip++; continue; }

@ -435,7 +435,7 @@ static int LZ4HC_compress_hashChain (
        ml0 = ml;

 _Search2:
-        if (ip+ml < mflimit)
+        if (ip+ml <= mflimit)
            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
                            ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
                            maxNbAttempts, patternAnalysis);
@ -482,7 +482,7 @@ _Search3:
        }
        /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */

-        if (start2 + ml2 < mflimit)
+        if (start2 + ml2 <= mflimit)
            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
                            start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
                            maxNbAttempts, patternAnalysis);
--- a/lib/lz4opt.h
+++ b/lib/lz4opt.h
@ -127,7 +127,7 @@ static int LZ4HC_compress_optimal (

    /* Main Loop */
    assert(ip - anchor < LZ4_MAX_INPUT_SIZE);
-    while (ip < mflimit) {
+    while (ip <= mflimit) {
        int const llen = (int)(ip - anchor);
        int best_mlen, best_off;
        int cur, last_match_pos = 0;
@ -186,7 +186,7 @@ static int LZ4HC_compress_optimal (
            const BYTE* const curPtr = ip + cur;
            LZ4HC_match_t newMatch;

-            if (curPtr >= mflimit) break;
+            if (curPtr > mflimit) break;
            DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u",
                    cur, opt[cur].price, opt[cur+1].price, cur+1);
            if (fullUpdate) {
@ -314,7 +314,7 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */
                if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) )   /* updates ip, op and anchor */
                    goto _dest_overflow;
        }   }
-    }  /* while (ip < mflimit) */
+    }  /* while (ip <= mflimit) */

 _last_literals:
    /* Encode Last Literals */