From 27efcd4d4582add02884698ddca3c2cb96a281c5 Mon Sep 17 00:00:00 2001
From: "yann.collet.73@gmail.com"
 <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>
Date: Mon, 12 Aug 2013 08:35:52 +0000
Subject: [PATCH] Removed dependency to "lz4_encoder.h" and "lz4hc_encoder.h"
 Improved speed of LZ4_decompress_fast() with GCC Improved speed of
 LZ4_decompress_safe() for 32-bits Made the fast LZ4 compression compatible
 with low-memory systems (buffer address < 64K). Thanks Francois Gretief for
 report and suggestion. Makefile : added fuzzer32 Makefile : added fullbench32
 fullbench : added ability to select one specific function to benchmark lz4.c
 : copy macros follow memcpy() arguments convention Small coding style
 modifications, hinted by cppCheck.

git-svn-id: https://lz4.googlecode.com/svn/trunk@101 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
---
 Makefile        |  10 +-
 bench.c         | 108 ++++++------
 fullbench.c     | 109 +++++++-----
 fuzzer.c        |  41 +++--
 lz4.c           | 436 ++++++++++++++++++++++++++----------------------
 lz4.h           |   2 +-
 lz4_encoder.h   | 262 -----------------------------
 lz4c.c          |  21 +--
 lz4hc.c         | 314 +++++++++++++++++++++++++++++-----
 lz4hc_encoder.h | 349 --------------------------------------
 10 files changed, 680 insertions(+), 972 deletions(-)
 delete mode 100644 lz4_encoder.h
 delete mode 100644 lz4hc_encoder.h

diff --git a/Makefile b/Makefile
index aafa94f2..995b387f 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@ endif
 
 default: lz4c
 
-all: lz4c lz4cs lz4c32 fuzzer fullbench
+all: lz4c lz4cs lz4c32 fuzzer fuzzer32 fullbench fullbench32
 
 lz4c: lz4.c lz4hc.c bench.c xxhash.c lz4c.c
 	$(CC)      -O3 $(CFLAGS) $^ -o $@$(EXT)
@@ -24,8 +24,14 @@ lz4c32: lz4.c lz4hc.c bench.c xxhash.c lz4c.c
 fuzzer : lz4.c lz4hc.c fuzzer.c
 	$(CC)      -O3 $(CFLAGS) $^ -o $@$(EXT)
 	
+fuzzer32 : lz4.c lz4hc.c fuzzer.c
+	$(CC) -m32 -O3 $(CFLAGS) $^ -o $@$(EXT)
+
 fullbench : lz4.c lz4hc.c xxhash.c fullbench.c
 	$(CC)      -O3 $(CFLAGS) $^ -o $@$(EXT)
 
+fullbench32 : lz4.c lz4hc.c xxhash.c fullbench.c
+	$(CC) -m32 -O3 $(CFLAGS) $^ -o $@$(EXT)
+
 clean:
-	rm -f core *.o lz4c$(EXT) lz4cs$(EXT) lz4c32$(EXT) fuzzer$(EXT) fullbench$(EXT)
+	rm -f core *.o lz4c$(EXT) lz4cs$(EXT) lz4c32$(EXT) fuzzer$(EXT) fullbench$(EXT) fullbench32$(EXT)
diff --git a/bench.c b/bench.c
index 07fd091b..016a46e7 100644
--- a/bench.c
+++ b/bench.c
@@ -246,17 +246,7 @@ static U64 BMK_GetFileSize(char* infilename)
 int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel)
 {
   int fileIdx=0;
-  FILE* fileIn;
-  char* infilename;
-  U64 largefilesize;
-  size_t benchedSize;
-  int nbChunks;
-  int maxCChunkSize;
-  size_t readSize;
   char* orig_buff;
-  char* compressed_buff; int compressed_buff_size;
-  struct chunkParameters* chunkP;
-  U32 crcc, crcd=0;
   struct compressionParameters compP;
 
   U64 totals = 0;
@@ -281,40 +271,51 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel)
   // Loop for each file
   while (fileIdx<nbFiles)
   {
+      FILE*  inFile;
+      char*  inFileName;
+      U64    inFileSize;
+      size_t benchedSize;
+      int nbChunks;
+      int maxCompressedChunkSize;
+      size_t readSize;
+      char* compressedBuffer; int compressedBuffSize;
+      struct chunkParameters* chunkP;
+      U32 crcOrig;
+
       // Check file existence
-      infilename = fileNamesTable[fileIdx++];
-      fileIn = fopen( infilename, "rb" );
-      if (fileIn==NULL)
+      inFileName = fileNamesTable[fileIdx++];
+      inFile = fopen( inFileName, "rb" );
+      if (inFile==NULL)
       {
-        DISPLAY( "Pb opening %s\n", infilename);
+        DISPLAY( "Pb opening %s\n", inFileName);
         return 11;
       }
 
       // Memory allocation & restrictions
-      largefilesize = BMK_GetFileSize(infilename);
-      benchedSize = (size_t) BMK_findMaxMem(largefilesize) / 2;
-      if ((U64)benchedSize > largefilesize) benchedSize = (size_t)largefilesize;
-      if (benchedSize < largefilesize)
+      inFileSize = BMK_GetFileSize(inFileName);
+      benchedSize = (size_t) BMK_findMaxMem(inFileSize) / 2;
+      if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
+      if (benchedSize < inFileSize)
       {
-          DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", infilename, (int)(benchedSize>>20));
+          DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20));
       }
 
       // Alloc
       chunkP = (struct chunkParameters*) malloc(((benchedSize / chunkSize)+1) * sizeof(struct chunkParameters));
       orig_buff = (char*)malloc((size_t )benchedSize);
       nbChunks = (int) (benchedSize / chunkSize) + 1;
-      maxCChunkSize = LZ4_compressBound(chunkSize);
-      compressed_buff_size = nbChunks * maxCChunkSize;
-      compressed_buff = (char*)malloc((size_t )compressed_buff_size);
+      maxCompressedChunkSize = LZ4_compressBound(chunkSize);
+      compressedBuffSize = nbChunks * maxCompressedChunkSize;
+      compressedBuffer = (char*)malloc((size_t )compressedBuffSize);
 
 
-      if (!orig_buff || !compressed_buff)
+      if (!orig_buff || !compressedBuffer)
       {
         DISPLAY("\nError: not enough memory!\n");
         free(orig_buff);
-        free(compressed_buff);
+        free(compressedBuffer);
         free(chunkP);
-        fclose(fileIn);
+        fclose(inFile);
         return 12;
       }
 
@@ -323,51 +324,54 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel)
           int i;
           size_t remaining = benchedSize;
           char* in = orig_buff;
-          char* out = compressed_buff;
+          char* out = compressedBuffer;
           for (i=0; i<nbChunks; i++)
           {
               chunkP[i].id = i;
               chunkP[i].origBuffer = in; in += chunkSize;
               if ((int)remaining > chunkSize) { chunkP[i].origSize = chunkSize; remaining -= chunkSize; } else { chunkP[i].origSize = (int)remaining; remaining = 0; }
-              chunkP[i].compressedBuffer = out; out += maxCChunkSize;
+              chunkP[i].compressedBuffer = out; out += maxCompressedChunkSize;
               chunkP[i].compressedSize = 0;
           }
       }
 
       // Fill input buffer
-      DISPLAY("Loading %s...       \r", infilename);
-      readSize = fread(orig_buff, 1, benchedSize, fileIn);
-      fclose(fileIn);
+      DISPLAY("Loading %s...       \r", inFileName);
+      readSize = fread(orig_buff, 1, benchedSize, inFile);
+      fclose(inFile);
 
       if (readSize != benchedSize)
       {
-        DISPLAY("\nError: problem reading file '%s' !!    \n", infilename);
+        DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName);
         free(orig_buff);
-        free(compressed_buff);
+        free(compressedBuffer);
         free(chunkP);
         return 13;
       }
 
       // Calculating input Checksum
-      crcc = XXH32(orig_buff, (unsigned int)benchedSize,0);
+      crcOrig = XXH32(orig_buff, (unsigned int)benchedSize,0);
 
 
       // Bench
       {
-        int loopNb, nb_loops, chunkNb;
+        int loopNb, chunkNb;
         size_t cSize=0;
-        int milliTime;
         double fastestC = 100000000., fastestD = 100000000.;
         double ratio=0.;
+        U32 crcCheck=0;
 
         DISPLAY("\r%79s\r", "");
         for (loopNb = 1; loopNb <= nbIterations; loopNb++)
         {
-          // Compression
-          DISPLAY("%1i-%-14.14s : %9i ->\r", loopNb, infilename, (int)benchedSize);
-          { size_t i; for (i=0; i<benchedSize; i++) compressed_buff[i]=(char)i; }     // warmimg up memory
+          int nbLoops;
+          int milliTime;
 
-          nb_loops = 0;
+          // Compression
+          DISPLAY("%1i-%-14.14s : %9i ->\r", loopNb, inFileName, (int)benchedSize);
+          { size_t i; for (i=0; i<benchedSize; i++) compressedBuffer[i]=(char)i; }     // warmimg up memory
+
+          nbLoops = 0;
           milliTime = BMK_GetMilliStart();
           while(BMK_GetMilliStart() == milliTime);
           milliTime = BMK_GetMilliStart();
@@ -375,20 +379,20 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel)
           {
             for (chunkNb=0; chunkNb<nbChunks; chunkNb++)
                 chunkP[chunkNb].compressedSize = compP.compressionFunction(chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origSize);
-            nb_loops++;
+            nbLoops++;
           }
           milliTime = BMK_GetMilliSpan(milliTime);
 
-          if ((double)milliTime < fastestC*nb_loops) fastestC = (double)milliTime/nb_loops;
+          if ((double)milliTime < fastestC*nbLoops) fastestC = (double)milliTime/nbLoops;
           cSize=0; for (chunkNb=0; chunkNb<nbChunks; chunkNb++) cSize += chunkP[chunkNb].compressedSize;
           ratio = (double)cSize/(double)benchedSize*100.;
 
-          DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s\r", loopNb, infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000.);
+          DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s\r", loopNb, inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000.);
 
           // Decompression
           { size_t i; for (i=0; i<benchedSize; i++) orig_buff[i]=0; }     // zeroing area, for CRC checking
 
-          nb_loops = 0;
+          nbLoops = 0;
           milliTime = BMK_GetMilliStart();
           while(BMK_GetMilliStart() == milliTime);
           milliTime = BMK_GetMilliStart();
@@ -402,24 +406,24 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel)
                 //chunkP[chunkNb].origSize = LZ4_decompress_safe_partial(chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedSize, chunkSize-5, chunkSize);
                 //chunkP[chunkNb].compressedSize = LZ4_uncompress(chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origBuffer, chunkP[chunkNb].origSize);
                 //chunkP[chunkNb].origSize = LZ4_uncompress_unknownOutputSize(chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedSize, chunkSize);
-            nb_loops++;
+            nbLoops++;
           }
           milliTime = BMK_GetMilliSpan(milliTime);
 
-          if ((double)milliTime < fastestD*nb_loops) fastestD = (double)milliTime/nb_loops;
-          DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\r", loopNb, infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.);
+          if ((double)milliTime < fastestD*nbLoops) fastestD = (double)milliTime/nbLoops;
+          DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\r", loopNb, inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.);
 
           // CRC Checking
-          crcd = XXH32(orig_buff, (unsigned int)benchedSize,0);
-          if (crcc!=crcd) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", infilename, (unsigned)crcc, (unsigned)crcd); break; }
+          crcCheck = XXH32(orig_buff, (unsigned int)benchedSize,0);
+          if (crcOrig!=crcCheck) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", inFileName, (unsigned)crcOrig, (unsigned)crcCheck); break; }
         }
 
-        if (crcc==crcd)
+        if (crcOrig==crcCheck)
         {
             if (ratio<100.)
-                DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\n", infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.);
+                DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\n", inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.);
             else
-                DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%),%7.1f MB/s ,%7.1f MB/s \n", infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.);
+                DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%),%7.1f MB/s ,%7.1f MB/s \n", inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.);
         }
         totals += benchedSize;
         totalz += cSize;
@@ -428,7 +432,7 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel)
       }
 
       free(orig_buff);
-      free(compressed_buff);
+      free(compressedBuffer);
       free(chunkP);
   }
 
diff --git a/fullbench.c b/fullbench.c
index 66aa0ed2..779e1bab 100644
--- a/fullbench.c
+++ b/fullbench.c
@@ -116,6 +116,9 @@
 #define MAX_MEM    (1984<<20)
 #define DEFAULT_CHUNKSIZE   (4<<20)
 
+#define ALL_COMPRESSORS -1
+#define ALL_DECOMPRESSORS -1
+
 
 //**************************************
 // Local structures
@@ -145,6 +148,8 @@ static int nbIterations = NBLOOPS;
 static int BMK_pause = 0;
 static int compressionTest = 1;
 static int decompressionTest = 1;
+static int compressionAlgo = ALL_COMPRESSORS;
+static int decompressionAlgo = ALL_DECOMPRESSORS;
 
 void BMK_SetBlocksize(int bsize)
 {
@@ -277,22 +282,16 @@ static inline int local_LZ4_decompress_safe_partial(const char* in, char* out, i
 int fullSpeedBench(char** fileNamesTable, int nbFiles)
 {
   int fileIdx=0;
-  FILE* fileIn;
-  char* infilename;
-  U64 largefilesize;
-  size_t benchedSize;
-  int nbChunks;
-  int maxCChunkSize;
-  size_t readSize;
   char* orig_buff;
-  char* compressed_buff; int compressed_buff_size;
-  struct chunkParameters* chunkP;
-  U32 crcc, crcd=0;
 # define NB_COMPRESSION_ALGORITHMS 4
+# define MINCOMPRESSIONCHAR '0'
+# define MAXCOMPRESSIONCHAR '3'
   static char* compressionNames[] = { "LZ4_compress", "LZ4_compress_limitedOutput", "LZ4_compressHC", "LZ4_compressHC_limitedOutput" };
   double totalCTime[NB_COMPRESSION_ALGORITHMS] = {0};
   double totalCSize[NB_COMPRESSION_ALGORITHMS] = {0};
 # define NB_DECOMPRESSION_ALGORITHMS 5
+# define MINDECOMPRESSIONCHAR '0'
+# define MAXDECOMPRESSIONCHAR '4'
   static char* decompressionNames[] = { "LZ4_decompress_fast", "LZ4_decompress_fast_withPrefix64k", "LZ4_decompress_safe", "LZ4_decompress_safe_withPrefix64k", "LZ4_decompress_safe_partial" };
   double totalDTime[NB_DECOMPRESSION_ALGORITHMS] = {0};
 
@@ -302,31 +301,42 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
   // Loop for each file
   while (fileIdx<nbFiles)
   {
+      FILE* inFile;
+      char* inFileName;
+      U64   inFileSize;
+      size_t benchedSize;
+      int nbChunks;
+      int maxCompressedChunkSize;
+      struct chunkParameters* chunkP;
+      size_t readSize;
+      char* compressed_buff; int compressedBuffSize;
+      U32 crcOriginal;
+
       // Check file existence
-      infilename = fileNamesTable[fileIdx++];
-      fileIn = fopen( infilename, "rb" );
-      if (fileIn==NULL)
+      inFileName = fileNamesTable[fileIdx++];
+      inFile = fopen( inFileName, "rb" );
+      if (inFile==NULL)
       {
-        DISPLAY( "Pb opening %s\n", infilename);
+        DISPLAY( "Pb opening %s\n", inFileName);
         return 11;
       }
 
       // Memory allocation & restrictions
-      largefilesize = BMK_GetFileSize(infilename);
-      benchedSize = (size_t) BMK_findMaxMem(largefilesize) / 2;
-      if ((U64)benchedSize > largefilesize) benchedSize = (size_t)largefilesize;
-      if (benchedSize < largefilesize)
+      inFileSize = BMK_GetFileSize(inFileName);
+      benchedSize = (size_t) BMK_findMaxMem(inFileSize) / 2;
+      if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
+      if (benchedSize < inFileSize)
       {
-          DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", infilename, (int)(benchedSize>>20));
+          DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20));
       }
 
       // Alloc
       chunkP = (struct chunkParameters*) malloc(((benchedSize / chunkSize)+1) * sizeof(struct chunkParameters));
       orig_buff = (char*) malloc((size_t)benchedSize);
       nbChunks = (int) (benchedSize / chunkSize) + 1;
-      maxCChunkSize = LZ4_compressBound(chunkSize);
-      compressed_buff_size = nbChunks * maxCChunkSize;
-      compressed_buff = (char*)malloc((size_t)compressed_buff_size);
+      maxCompressedChunkSize = LZ4_compressBound(chunkSize);
+      compressedBuffSize = nbChunks * maxCompressedChunkSize;
+      compressed_buff = (char*)malloc((size_t)compressedBuffSize);
 
 
       if(!orig_buff || !compressed_buff)
@@ -335,7 +345,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
         free(orig_buff);
         free(compressed_buff);
         free(chunkP);
-        fclose(fileIn);
+        fclose(inFile);
         return 12;
       }
 
@@ -350,19 +360,19 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
               chunkP[i].id = i;
               chunkP[i].origBuffer = in; in += chunkSize;
               if ((int)remaining > chunkSize) { chunkP[i].origSize = chunkSize; remaining -= chunkSize; } else { chunkP[i].origSize = (int)remaining; remaining = 0; }
-              chunkP[i].compressedBuffer = out; out += maxCChunkSize;
+              chunkP[i].compressedBuffer = out; out += maxCompressedChunkSize;
               chunkP[i].compressedSize = 0;
           }
       }
 
       // Fill input buffer
-      DISPLAY("Loading %s...       \r", infilename);
-      readSize = fread(orig_buff, 1, benchedSize, fileIn);
-      fclose(fileIn);
+      DISPLAY("Loading %s...       \r", inFileName);
+      readSize = fread(orig_buff, 1, benchedSize, inFile);
+      fclose(inFile);
 
       if(readSize != benchedSize)
       {
-        DISPLAY("\nError: problem reading file '%s' !!    \n", infilename);
+        DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName);
         free(orig_buff);
         free(compressed_buff);
         free(chunkP);
@@ -370,7 +380,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
       }
 
       // Calculating input Checksum
-      crcc = XXH32(orig_buff, (unsigned int)benchedSize,0);
+      crcOriginal = XXH32(orig_buff, (unsigned int)benchedSize,0);
 
 
       // Bench
@@ -380,7 +390,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
         double ratio=0.;
 
         DISPLAY("\r%79s\r", "");
-        DISPLAY(" %s : \n", infilename);
+        DISPLAY(" %s : \n", inFileName);
 
         // Compression Algorithms
         for (cAlgNb=0; (cAlgNb < NB_COMPRESSION_ALGORITHMS) && (compressionTest); cAlgNb++)
@@ -389,6 +399,8 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
             int (*compressionFunction)(const char*, char*, int);
             double bestTime = 100000000.;
 
+            if ((compressionAlgo != ALL_COMPRESSORS) && (compressionAlgo != cAlgNb)) continue;
+
             switch(cAlgNb)
             {
             case 0: compressionFunction = LZ4_compress; break;
@@ -452,6 +464,8 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
             int (*decompressionFunction)(const char*, char*, int, int);
             double bestTime = 100000000.;
 
+            if ((decompressionAlgo != ALL_DECOMPRESSORS) && (decompressionAlgo != dAlgNb)) continue;
+
             switch(dAlgNb)
             {
             case 0: decompressionFunction = local_LZ4_decompress_fast; break;
@@ -466,8 +480,9 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
             {
                 double averageTime;
                 int milliTime;
+                U32 crcDecoded;
 
-                DISPLAY("%1i-%-19.19s : %9i ->\r", loopNb, dName, (int)benchedSize);
+                DISPLAY("%1i-%-24.24s :%10i ->\r", loopNb, dName, (int)benchedSize);
 
                 nb_loops = 0;
                 milliTime = BMK_GetMilliStart();
@@ -487,13 +502,14 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
                 averageTime = (double)milliTime / nb_loops;
                 if (averageTime < bestTime) bestTime = averageTime;
 
-                DISPLAY("%1i-%-19.19s : %9i -> %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000.);
+                DISPLAY("%1i-%-24.24s :%10i -> %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000.);
+
+                // CRC Checking
+                crcDecoded = XXH32(orig_buff, (int)benchedSize, 0);
+                if (crcOriginal!=crcDecoded) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", inFileName, (unsigned)crcOriginal, (unsigned)crcDecoded); exit(1); }
             }
 
-            // CRC Checking
-            crcd = XXH32(orig_buff, (int)benchedSize, 0);
-            if (crcc!=crcd) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", infilename, (unsigned)crcc, (unsigned)crcd); exit(1); }
-            DISPLAY("%-21.21s : %9i -> %7.1f MB/s\n", dName, (int)benchedSize, (double)benchedSize / bestTime / 1000.);
+            DISPLAY("%-26.26s :%10i -> %7.1f MB/s\n", dName, (int)benchedSize, (double)benchedSize / bestTime / 1000.);
 
             totalDTime[dAlgNb] += bestTime;
         }
@@ -514,11 +530,13 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
       for (AlgNb = 0; (AlgNb < NB_COMPRESSION_ALGORITHMS) && (compressionTest); AlgNb ++)
       {
           char* cName = compressionNames[AlgNb];
+          if ((compressionAlgo != ALL_COMPRESSORS) && (compressionAlgo != AlgNb)) continue;
           DISPLAY("%-21.21s :%10llu ->%10llu (%5.2f%%), %6.1f MB/s\n", cName, (long long unsigned int)totals, (long long unsigned int)totalCSize[AlgNb], (double)totalCSize[AlgNb]/(double)totals*100., (double)totals/totalCTime[AlgNb]/1000.);
       }
       for (AlgNb = 0; (AlgNb < NB_DECOMPRESSION_ALGORITHMS) && (decompressionTest); AlgNb ++)
       {
           char* dName = decompressionNames[AlgNb];
+          if ((decompressionAlgo != ALL_DECOMPRESSORS) && (decompressionAlgo != AlgNb)) continue;
           DISPLAY("%-21.21s :%10llu -> %6.1f MB/s\n", dName, (long long unsigned int)totals, (double)totals/totalDTime[AlgNb]/1000.);
       }
   }
@@ -536,16 +554,18 @@ int usage(char* exename)
     DISPLAY( "Arguments :\n");
     DISPLAY( " -c     : compression tests only\n");
     DISPLAY( " -d     : decompression tests only\n");
-    DISPLAY( " -H     : Help (this text + advanced options)\n");
+    DISPLAY( " -H/-h  : Help (this text + advanced options)\n");
     return 0;
 }
 
 int usage_advanced()
 {
     DISPLAY( "\nAdvanced options :\n");
+    DISPLAY( " -c#    : test only compression function # [%c-%c]\n", MINCOMPRESSIONCHAR, MAXCOMPRESSIONCHAR);
+    DISPLAY( " -d#    : test only compression function # [%c-%c]\n", MINDECOMPRESSIONCHAR, MAXDECOMPRESSIONCHAR);
+    DISPLAY( " -i#    : iteration loops [1-9](default : %i)\n", NBLOOPS);
     DISPLAY( " -B#    : Block size [4-7](default : 7)\n");
     //DISPLAY( " -BD    : Block dependency (improve compression ratio)\n");
-    DISPLAY( " -i#    : iteration loops [1-9](default : 6)\n");
     return 0;
 }
 
@@ -584,12 +604,21 @@ int main(int argc, char** argv)
                 switch(argument[0])
                 {
                     // Select compression algorithm only
-                case 'c': decompressionTest = 0; break;
+                case 'c': 
+                    decompressionTest = 0; 
+                    if ((argument[1]>= MINCOMPRESSIONCHAR) && (argument[1]<= MAXCOMPRESSIONCHAR))
+                       compressionAlgo = argument[1] - '0', argument++;
+                    break;
 
                     // Select decompression algorithm only
-                case 'd': compressionTest = 0; break;
+                case 'd': 
+                    compressionTest = 0;
+                    if ((argument[1]>= MINDECOMPRESSIONCHAR) && (argument[1]<= MAXDECOMPRESSIONCHAR))
+                       decompressionAlgo = argument[1] - '0', argument++;
+                    break;
 
                     // Display help on usage
+                case 'h' :
                 case 'H': usage(exename); usage_advanced(); return 0;
 
                     // Modify Block Properties
diff --git a/fuzzer.c b/fuzzer.c
index a10b222e..fdbc3c0d 100644
--- a/fuzzer.c
+++ b/fuzzer.c
@@ -101,7 +101,7 @@ int FUZ_SecurityTest()
   char* input;
   int i, r;
 
-  printf("Overflow test (issue 52)...");
+  printf("Overflow test (issue 52)...\n");
   input = (char*) malloc (20<<20);
   output = (char*) malloc (20<<20);
   input[0] = 0x0F;
@@ -132,7 +132,8 @@ int main() {
         unsigned int seed, randState, cur_seq=PRIME3, seeds[NUM_SEQ], timestamp=FUZ_GetMilliStart();
         int i, j, k, ret, len, lenHC, attemptNb;
         char userInput[30] = {0};
-#       define FUZ_CHECKTEST(cond, message) testNb++; if (cond) { printf("Test %i : %s : seed %u, cycle %i \n", testNb, message, seed, attemptNb); goto _output_error; }
+#       define FUZ_CHECKTEST(cond, message) if (cond) { printf("Test %i : %s : seed %u, cycle %i \n", testNb, message, seed, attemptNb); goto _output_error; }
+#       define FUZ_DISPLAYTEST              testNb++; printf("%2i\b\b", testNb);
 
         printf("starting LZ4 fuzzer\n");
         printf("Select an Initialisation number (default : random) : ");
@@ -145,7 +146,7 @@ int main() {
         printf("Seed = %u\n", seed);
         randState = seed;
 
-        FUZ_SecurityTest();
+        //FUZ_SecurityTest();
 
         for (i = 0; i < 2048; i++)
                 cbuf[FUZ_avail + i] = cbuf[FUZ_avail + 2048 + i] = FUZ_rand(&randState) >> 16;
@@ -154,7 +155,7 @@ int main() {
         {
             int testNb = 0;
 
-            printf("\r%7i /%7i\r", attemptNb, NB_ATTEMPTS);
+            printf("\r%7i /%7i   - ", attemptNb, NB_ATTEMPTS);
             
             for (j = 0; j < NUM_SEQ; j++) {
                     seeds[j] = FUZ_rand(&randState) << 8;
@@ -173,71 +174,87 @@ int main() {
             }
 
             // Test compression HC
+            FUZ_DISPLAYTEST;   // 1
             ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[off_full], LEN, FUZ_max);
             FUZ_CHECKTEST(ret==0, "HC compression failed despite sufficient space");
             lenHC = ret;
 
             // Test compression
+            FUZ_DISPLAYTEST;   // 2
             ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[off_full], LEN, FUZ_max);
             FUZ_CHECKTEST(ret==0, "compression failed despite sufficient space");
             len = ret;
 
             // Test decoding with output size being exactly what's necessary => must work
+            FUZ_DISPLAYTEST;   // 3
             ret = LZ4_decompress_fast((char*)&cbuf[off_full], (char*)testOut, LEN);
-            FUZ_CHECKTEST(ret<0, "decompression failed despite correct space");
+            FUZ_CHECKTEST(ret<0, "LZ4_decompress_fast failed despite correct space");
 
             // Test decoding with one byte missing => must fail
+            FUZ_DISPLAYTEST;   // 4
             ret = LZ4_decompress_fast((char*)&cbuf[off_full], (char*)testOut, LEN-1);
-            FUZ_CHECKTEST(ret>=0, "decompression should have failed, due to Output Size being too small");
+            FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast should have failed, due to Output Size being too small");
 
             // Test decoding with one byte too much => must fail
+            FUZ_DISPLAYTEST;
             ret = LZ4_decompress_fast((char*)&cbuf[off_full], (char*)testOut, LEN+1);
-            FUZ_CHECKTEST(ret>=0, "decompression should have failed, due to Output Size being too large");
+            FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast should have failed, due to Output Size being too large");
 
             // Test decoding with enough output size => must work
+            FUZ_DISPLAYTEST;
             ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len, LEN+1);
-            FUZ_CHECKTEST(ret<0, "decompression failed despite sufficient space");
+            FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe failed despite sufficient space");
 
             // Test decoding with output size being exactly what's necessary => must work
+            FUZ_DISPLAYTEST;
             ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len, LEN);
-            FUZ_CHECKTEST(ret<0, "decompression failed despite sufficient space");
+            FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe failed despite sufficient space");
 
             // Test decoding with output size being one byte too short => must fail
+            FUZ_DISPLAYTEST;
             ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len, LEN-1);
             FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to Output Size being one byte too short");
 
             // Test decoding with input size being one byte too short => must fail
+            FUZ_DISPLAYTEST;
             ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len-1, LEN);
             FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being one byte too short");
 
             // Test decoding with input size being one byte too large => must fail
+            FUZ_DISPLAYTEST;
             ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len+1, LEN);
-            FUZ_CHECKTEST(ret>=0, "decompression should have failed, due to input size being too large");
+            FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being too large");
             //if (ret>=0) { printf("Test 10 : decompression should have failed, due to input size being too large : seed %u, len %d\n", seed, LEN); goto _output_error; }
 
             // Test partial decoding with target output size being max/2 => must work
+            FUZ_DISPLAYTEST;
             ret = LZ4_decompress_safe_partial((char*)&cbuf[off_full], (char*)testOut, len, LEN/2, LEN);
-            FUZ_CHECKTEST(ret<0, "partial decompression failed despite sufficient space");
+            FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe_partial failed despite sufficient space");
 
             // Test partial decoding with target output size being just below max => must work
+            FUZ_DISPLAYTEST;
             ret = LZ4_decompress_safe_partial((char*)&cbuf[off_full], (char*)testOut, len, LEN-3, LEN);
-            FUZ_CHECKTEST(ret<0, "partial decompression failed despite sufficient space");
+            FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe_partial failed despite sufficient space");
 
             // Test compression with output size being exactly what's necessary (should work)
+            FUZ_DISPLAYTEST;
             ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-len], LEN, len);
             FUZ_CHECKTEST(!test_canary(&cbuf[FUZ_avail]), "compression overran output buffer");
             FUZ_CHECKTEST(ret==0, "compression failed despite sufficient space");
 
             // Test HC compression with output size being exactly what's necessary (should work)
+            FUZ_DISPLAYTEST;
             ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-len], LEN, lenHC);
             FUZ_CHECKTEST(ret==0, "HC compression failed despite sufficient space");
 
             // Test compression with just one missing byte into output buffer => must fail
+            FUZ_DISPLAYTEST;
             ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-(len-1)], LEN, len-1);
             FUZ_CHECKTEST(ret, "compression overran output buffer");
             FUZ_CHECKTEST(!test_canary(&cbuf[FUZ_avail]), "compression overran output buffer");
 
             // Test HC compression with just one missing byte into output buffer => must fail
+            FUZ_DISPLAYTEST;
             ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-(len-1)], LEN, lenHC-1);
             FUZ_CHECKTEST(ret, "HC compression overran output buffer");
 
diff --git a/lz4.c b/lz4.c
index 779d68aa..549edad4 100644
--- a/lz4.c
+++ b/lz4.c
@@ -31,10 +31,6 @@
    - LZ4 source repository : http://code.google.com/p/lz4/
 */
 
-/*
-Note : this source file requires "lz4_encoder.h"
-*/
-
 //**************************************
 // Tuning parameters
 //**************************************
@@ -46,10 +42,8 @@ Note : this source file requires "lz4_encoder.h"
 #define MEMORY_USAGE 14
 
 // HEAPMODE :
-// Select how default compression function will allocate memory for its hash table,
+// Select how default compression functions will allocate memory for their hash table,
 // in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)).
-// Default allocation strategy is to use stack (HEAPMODE 0)
-// Note : explicit functions *_stack* and *_heap* are unaffected by this setting
 #define HEAPMODE 0
 
 
@@ -113,7 +107,7 @@ Note : this source file requires "lz4_encoder.h"
 #endif
 
 #ifdef _MSC_VER    // Visual Studio
-#  define forceinline static __forceinline
+#  define FORCE_INLINE static __forceinline
 #  include <intrin.h>                    // For Visual 2005
 #  if LZ4_ARCH64   // 64-bits
 #    pragma intrinsic(_BitScanForward64) // For Visual 2005
@@ -125,9 +119,9 @@ Note : this source file requires "lz4_encoder.h"
 #  pragma warning(disable : 4127)        // disable: C4127: conditional expression is constant
 #else 
 #  ifdef __GNUC__
-#    define forceinline static inline __attribute__((always_inline))
+#    define FORCE_INLINE static inline __attribute__((always_inline))
 #  else
-#    define forceinline static inline
+#    define FORCE_INLINE static inline
 #  endif
 #endif
 
@@ -175,6 +169,8 @@ Note : this source file requires "lz4_encoder.h"
   typedef unsigned long long  U64;
 #endif
 
+typedef const BYTE* Ptr;
+
 #if defined(__GNUC__)  && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
 #  define _PACKED __attribute__ ((packed))
 #else
@@ -232,15 +228,15 @@ typedef struct {size_t v;} _PACKED size_t_S;
 // Architecture-specific macros
 //**************************************
 #define STEPSIZE                  sizeof(size_t)
-#define LZ4_COPYSTEP(s,d)         { AARCH(d) = AARCH(s); d+=STEPSIZE; s+=STEPSIZE; }
-#define LZ4_COPY8(s,d)            { LZ4_COPYSTEP(s,d); if (STEPSIZE<8) LZ4_COPYSTEP(s,d); }
-#define LZ4_SECURECOPY(s,d,e)     { if ((STEPSIZE==8)&&(d<e)) LZ4_WILDCOPY(s,d,e); }
+#define LZ4_COPYSTEP(d,s)         { AARCH(d) = AARCH(s); d+=STEPSIZE; s+=STEPSIZE; }
+#define LZ4_COPY8(d,s)            { LZ4_COPYSTEP(d,s); if (STEPSIZE<8) LZ4_COPYSTEP(d,s); }
+#define LZ4_SECURECOPY(d,s,e)     { if ((STEPSIZE==4)||(d<e)) LZ4_WILDCOPY(d,s,e); }
 
 #if LZ4_ARCH64   // 64-bit
 #  define HTYPE                   U32
-#  define INITBASE(base)          const BYTE* const base = ip
+#  define INITBASE(base)          Ptr const base = ip
 #else            // 32-bit
-#  define HTYPE                   const BYTE*
+#  define HTYPE                   Ptr
 #  define INITBASE(base)          const int base = 0
 #endif
 
@@ -256,8 +252,7 @@ typedef struct {size_t v;} _PACKED size_t_S;
 //**************************************
 // Macros
 //**************************************
-#define LZ4_WILDCOPY(s,d,e)     { do { LZ4_COPY8(s,d) } while (d<e); }
-#define LZ4_BLINDCOPY(s,d,l)    { BYTE* e=(d)+(l); LZ4_WILDCOPY(s,d,e); d=e; }
+#define LZ4_WILDCOPY(d,s,e)     { do { LZ4_COPY8(d,s) } while (d<e); }           // at the end, d>=e;
 
 
 //****************************
@@ -265,7 +260,7 @@ typedef struct {size_t v;} _PACKED size_t_S;
 //****************************
 #if LZ4_ARCH64
 
-forceinline int LZ4_NbCommonBytes (register U64 val)
+FORCE_INLINE int LZ4_NbCommonBytes (register U64 val)
 {
 # if defined(LZ4_BIG_ENDIAN)
 #   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -297,7 +292,7 @@ forceinline int LZ4_NbCommonBytes (register U64 val)
 
 #else
 
-forceinline int LZ4_NbCommonBytes (register U32 val)
+FORCE_INLINE int LZ4_NbCommonBytes (register U32 val)
 {
 # if defined(LZ4_BIG_ENDIAN)
 #   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -329,201 +324,234 @@ forceinline int LZ4_NbCommonBytes (register U32 val)
 #endif
 
 
-
-//******************************
+//****************************
 // Compression functions
-//******************************
+//****************************
+#define LZ4_HASHLOG (MEMORY_USAGE-2)
+typedef enum { notLimited = 0, limited = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
 
-/*
-int LZ4_compress_stack(
-                 const char* source,
-                 char* dest,
-                 int inputSize)
+FORCE_INLINE int LZ4_hashSequence(U32 sequence, tableType_t tableType)
+{
+    if (tableType == byU16)
+        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+    else
+        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+}
 
-Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
-Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
-return : the number of bytes written in buffer 'dest'
-*/
-#define FUNCTION_NAME LZ4_compress_stack
-#include "lz4_encoder.h"
+FORCE_INLINE int LZ4_hashPosition(Ptr p, tableType_t tableType) { return LZ4_hashSequence(A32(p), tableType); }
+
+FORCE_INLINE void LZ4_putPositionOnHash(Ptr p, U32 h, void* tableBase, tableType_t tableType, Ptr srcBase)
+{
+    switch (tableType)
+    {
+    case byPtr: { Ptr* hashTable = (Ptr*) tableBase; hashTable[h] = p; break; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = p-srcBase; break; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); break; }
+    }
+}
+
+FORCE_INLINE void LZ4_putPosition(Ptr p, void* tableBase, tableType_t tableType, Ptr srcBase)
+{
+    U32 h = LZ4_hashPosition(p, tableType);
+    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+FORCE_INLINE Ptr LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, Ptr srcBase)
+{
+    if (tableType == byPtr) { Ptr* hashTable = (Ptr*) tableBase; return hashTable[h]; }
+    if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
+    { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   // default, to ensure a return
+}
+
+FORCE_INLINE Ptr LZ4_getPosition(Ptr p, void* tableBase, tableType_t tableType, Ptr srcBase)
+{
+    U32 h = LZ4_hashPosition(p, tableType);
+    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
 
 
-/*
-int LZ4_compress_stack_limitedOutput(
-                 const char* source,
-                 char* dest,
-                 int inputSize,
-                 int maxOutputSize)
-
-Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
-If it cannot achieve it, compression will stop, and result of the function will be zero.
-return : the number of bytes written in buffer 'dest', or 0 if the compression fails
-*/
-#define FUNCTION_NAME LZ4_compress_stack_limitedOutput
-#define LIMITED_OUTPUT
-#include "lz4_encoder.h"
-
-
-/*
-int LZ4_compress64k_stack(
-                 const char* source,
-                 char* dest,
-                 int inputSize)
-
-Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
-This function compresses better than LZ4_compress_stack(), on the condition that
-'inputSize' must be < to LZ4_64KLIMIT, or the function will fail.
-Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
-return : the number of bytes written in buffer 'dest', or 0 if compression fails
-*/
-#define FUNCTION_NAME LZ4_compress64k_stack
-#define COMPRESS_64K
-#include "lz4_encoder.h"
-
-
-/*
-int LZ4_compress64k_stack_limitedOutput(
-                 const char* source,
-                 char* dest,
-                 int inputSize,
-                 int maxOutputSize)
-
-Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
-This function compresses better than LZ4_compress_stack_limitedOutput(), on the condition that
-'inputSize' must be < to LZ4_64KLIMIT, or the function will fail.
-If it cannot achieve it, compression will stop, and result of the function will be zero.
-return : the number of bytes written in buffer 'dest', or 0 if the compression fails
-*/
-#define FUNCTION_NAME LZ4_compress64k_stack_limitedOutput
-#define COMPRESS_64K
-#define LIMITED_OUTPUT
-#include "lz4_encoder.h"
-
-
-/*
-void* LZ4_createHeapMemory();
-int LZ4_freeHeapMemory(void* ctx);
-
-Used to allocate and free hashTable memory 
-to be used by the LZ4_compress_heap* family of functions.
-LZ4_createHeapMemory() returns NULL is memory allocation fails.
-*/
-void* LZ4_create() { return malloc(HASHTABLESIZE); }
-int   LZ4_free(void* ctx) { free(ctx); return 0; }
-
-
-/*
-int LZ4_compress_heap(
-                 void* ctx,
-                 const char* source,
-                 char* dest,
-                 int inputSize)
-
-Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
-The memory used for compression must be created by LZ4_createHeapMemory() and provided by pointer 'ctx'.
-Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
-return : the number of bytes written in buffer 'dest'
-*/
-#define FUNCTION_NAME LZ4_compress_heap
-#define USE_HEAPMEMORY
-#include "lz4_encoder.h"
-
-
-/*
-int LZ4_compress_heap_limitedOutput(
+FORCE_INLINE int LZ4_compress_generic(
                  void* ctx,
                  const char* source,
                  char* dest,
                  int inputSize,
-                 int maxOutputSize)
+                 int maxOutputSize,
 
-Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
-If it cannot achieve it, compression will stop, and result of the function will be zero.
-The memory used for compression must be created by LZ4_createHeapMemory() and provided by pointer 'ctx'.
-return : the number of bytes written in buffer 'dest', or 0 if the compression fails
-*/
-#define FUNCTION_NAME LZ4_compress_heap_limitedOutput
-#define LIMITED_OUTPUT
-#define USE_HEAPMEMORY
-#include "lz4_encoder.h"
+                 limitedOutput_directive limitedOutput,
+                 tableType_t tableType)
+{
+    Ptr ip = (Ptr) source;
+    Ptr const base = (Ptr) source;
+    Ptr anchor = (Ptr) source;
+    Ptr const iend = ip + inputSize;
+    Ptr const mflimit = iend - MFLIMIT;
+    Ptr const matchlimit = iend - LASTLITERALS;
 
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + maxOutputSize;
 
-/*
-int LZ4_compress64k_heap(
-                 void* ctx,
-                 const char* source,
-                 char* dest,
-                 int inputSize)
+    int length;
+    const int skipStrength = SKIPSTRENGTH;
+    U32 forwardH;
 
-Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
-The memory used for compression must be created by LZ4_createHeapMemory() and provided by pointer 'ctx'.
-'inputSize' must be < to LZ4_64KLIMIT, or the function will fail.
-Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
-return : the number of bytes written in buffer 'dest'
-*/
-#define FUNCTION_NAME LZ4_compress64k_heap
-#define COMPRESS_64K
-#define USE_HEAPMEMORY
-#include "lz4_encoder.h"
+    // Init conditions
+    if (inputSize<MINLENGTH) goto _last_literals;
+    if ((tableType == byU16) && (inputSize>=LZ4_64KLIMIT)) return 0;   // Size too large (not within 64K limit)
 
+    // First Byte
+    LZ4_putPosition(ip, ctx, tableType, base);
+    ip++; forwardH = LZ4_hashPosition(ip, tableType);
 
-/*
-int LZ4_compress64k_heap_limitedOutput(
-                 void* ctx,
-                 const char* source,
-                 char* dest,
-                 int inputSize,
-                 int maxOutputSize)
+    // Main Loop
+    for ( ; ; )
+    {
+        int findMatchAttempts = (1U << skipStrength) + 3;
+        Ptr forwardIp = ip;
+        Ptr ref;
+        BYTE* token;
 
-Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
-If it cannot achieve it, compression will stop, and result of the function will be zero.
-The memory used for compression must be created by LZ4_createHeapMemory() and provided by pointer 'ctx'.
-'inputSize' must be < to LZ4_64KLIMIT, or the function will fail.
-return : the number of bytes written in buffer 'dest', or 0 if the compression fails
-*/
-#define FUNCTION_NAME LZ4_compress64k_heap_limitedOutput
-#define COMPRESS_64K
-#define LIMITED_OUTPUT
-#define USE_HEAPMEMORY
-#include "lz4_encoder.h"
+        // Find a match
+        do {
+            U32 h = forwardH;
+            int step = findMatchAttempts++ >> skipStrength;
+            ip = forwardIp;
+            forwardIp = ip + step;
+
+            if unlikely(forwardIp > mflimit) { goto _last_literals; }
+
+            forwardH = LZ4_hashPosition(forwardIp, tableType);
+            ref = LZ4_getPositionOnHash(h, ctx, tableType, base);
+            LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
+
+        } while ((ref + MAX_DISTANCE < ip) || (A32(ref) != A32(ip)));
+
+        // Catch up
+        while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
+
+        // Encode Literal length
+        length = (int)(ip - anchor);
+        token = op++;
+        if ((limitedOutput) && unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend)) return 0;   // Check output limit
+        if (length>=(int)RUN_MASK) 
+        { 
+            int len = length-RUN_MASK; 
+            *token=(RUN_MASK<<ML_BITS); 
+            for(; len >= 255 ; len-=255) *op++ = 255; 
+            *op++ = (BYTE)len; 
+        }
+        else *token = (BYTE)(length<<ML_BITS);
+
+        // Copy Literals
+        { BYTE* end=(op)+(length); LZ4_WILDCOPY(op,anchor,end); op=end; }
+
+_next_match:
+        // Encode Offset
+        LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
+
+        // Start Counting
+        ip+=MINMATCH; ref+=MINMATCH;    // MinMatch already verified
+        anchor = ip;
+        while likely(ip<matchlimit-(STEPSIZE-1))
+        {
+            size_t diff = AARCH(ref) ^ AARCH(ip);
+            if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
+            ip += LZ4_NbCommonBytes(diff);
+            goto _endCount;
+        }
+        if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
+        if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
+        if ((ip<matchlimit) && (*ref == *ip)) ip++;
+_endCount:
+
+        // Encode MatchLength
+        length = (int)(ip - anchor);
+        if ((limitedOutput) && unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend)) return 0;    // Check output limit
+        if (length>=(int)ML_MASK) 
+        { 
+            *token += ML_MASK; 
+            length -= ML_MASK; 
+            for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } 
+            if (length >= 255) { length-=255; *op++ = 255; } 
+            *op++ = (BYTE)length; 
+        }
+        else *token += (BYTE)(length);
+
+        // Test end of chunk
+        if (ip > mflimit) { anchor = ip;  break; }
+
+        // Fill table
+        LZ4_putPosition(ip-2, ctx, tableType, base);
+
+        // Test next position
+        ref = LZ4_getPosition(ip, ctx, tableType, base); 
+        LZ4_putPosition(ip, ctx, tableType, base);
+        if ((ref + MAX_DISTANCE >= ip) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
+
+        // Prepare next loop
+        anchor = ip++;
+        forwardH = LZ4_hashPosition(ip, tableType);
+    }
+
+_last_literals:
+    // Encode Last Literals
+    {
+        int lastRun = (int)(iend - anchor);
+        if ((limitedOutput) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;  // Check output limit
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun >= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+        else *op++ = (BYTE)(lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    }
+
+    // End
+    return (int) (((char*)op)-dest);
+}
 
 
 int LZ4_compress(const char* source, char* dest, int inputSize)
 {
-#if HEAPMODE
-    void* ctx = LZ4_create();
-    int result;
-    if (ctx == NULL) return 0;    // Failed allocation => compression not done
-    if (inputSize < LZ4_64KLIMIT)
-        result = LZ4_compress64k_heap(ctx, source, dest, inputSize);
-    else result = LZ4_compress_heap(ctx, source, dest, inputSize);
-    LZ4_free(ctx);
-    return result;
+#if (HEAPMODE)
+    void* ctx = calloc(1U<<(MEMORY_USAGE-2), 4);   // Aligned on 4-bytes boundaries
 #else
-    if (inputSize < (int)LZ4_64KLIMIT) return LZ4_compress64k_stack(source, dest, inputSize);
-    return LZ4_compress_stack(source, dest, inputSize);
+    U32 ctx[1U<<(MEMORY_USAGE-2)] = {0};           // Ensure data is aligned on 4-bytes boundaries
 #endif
+    int result;
+
+    if (inputSize < (int)LZ4_64KLIMIT)
+        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, byU16);
+    else
+        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, (sizeof(void*)==8) ? byU32 : byPtr);
+
+#if (HEAPMODE)
+    free(ctx);
+#endif
+    return result;
 }
 
 
 int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
 {
-#if HEAPMODE
-    void* ctx = LZ4_create();
-    int result;
-    if (ctx == NULL) return 0;    // Failed allocation => compression not done
-    if (inputSize < LZ4_64KLIMIT)
-        result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest, inputSize, maxOutputSize);
-    else result = LZ4_compress_heap_limitedOutput(ctx, source, dest, inputSize, maxOutputSize);
-    LZ4_free(ctx);
-    return result;
+#if (HEAPMODE)
+    void* ctx = calloc(1U<<(MEMORY_USAGE-2), 4);   // Aligned on 4-bytes boundaries
 #else
-    if (inputSize < (int)LZ4_64KLIMIT) return LZ4_compress64k_stack_limitedOutput(source, dest, inputSize, maxOutputSize);
-    return LZ4_compress_stack_limitedOutput(source, dest, inputSize, maxOutputSize);
+    U32 ctx[1U<<(MEMORY_USAGE-2)] = {0};           // Ensure data is aligned on 4-bytes boundaries
 #endif
+    int result;
+
+    if (inputSize < (int)LZ4_64KLIMIT)
+        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limited, byU16);
+    else
+        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limited, (sizeof(void*)==8) ? byU32 : byPtr);
+
+#if (HEAPMODE)
+    free(ctx);
+#endif
+    return result;
 }
 
 
+
 //****************************
 // Decompression functions
 //****************************
@@ -537,7 +565,7 @@ typedef enum { full = 0, partial = 1 } earlyEnd_directive;
 // It shall be instanciated several times, using different sets of directives
 // Note that it is essential this generic function is really inlined, 
 // in order to remove useless branches during compilation optimisation.
-forceinline int LZ4_decompress_generic(
+FORCE_INLINE int LZ4_decompress_generic(
                  const char* source,
                  char* dest,
                  int inputSize,          //
@@ -550,9 +578,9 @@ forceinline int LZ4_decompress_generic(
                  )
 {
     // Local Variables
-    const BYTE* restrict ip = (const BYTE*) source;
-    const BYTE* ref;
-    const BYTE* const iend = ip + inputSize;
+    Ptr restrict ip = (Ptr) source;
+    Ptr ref;
+    Ptr const iend = ip + inputSize;
 
     BYTE* op = (BYTE*) dest;
     BYTE* const oend = op + outputSize;
@@ -609,7 +637,7 @@ forceinline int LZ4_decompress_generic(
             op += length;
             break;                                       // Necessarily EOF, due to parsing restrictions
         }
-        LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
+        LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy;
 
         // get offset
         LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
@@ -618,7 +646,7 @@ forceinline int LZ4_decompress_generic(
         // get matchlength
         if ((length=(token&ML_MASK)) == ML_MASK) 
         { 
-            for ( ; (!endOnInput) || (ip<iend-(LASTLITERALS+1)) ; )   // Ensure enough bytes remain for LASTLITERALS + token
+            while ((!endOnInput) || (ip<iend-(LASTLITERALS+1)))   // Ensure enough bytes remain for LASTLITERALS + token
             {
                 unsigned s = *ip++; 
                 length += s; 
@@ -642,18 +670,18 @@ forceinline int LZ4_decompress_generic(
             op += 4, ref += 4; ref -= dec32table[op-ref];
             A32(op) = A32(ref); 
             op += STEPSIZE-4; ref -= dec64;
-        } else { LZ4_COPYSTEP(ref,op); }
+        } else { LZ4_COPYSTEP(op,ref); }
         cpy = op + length - (STEPSIZE-4);
 
-        if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4))
+        if unlikely(cpy>oend-COPYLENGTH-(STEPSIZE-4))
         {
             if (cpy > oend-LASTLITERALS) goto _output_error;    // Error : last 5 bytes must be literals
-            LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
+            LZ4_SECURECOPY(op, ref, (oend-COPYLENGTH));
             while(op<cpy) *op++=*ref++;
             op=cpy;
             continue;
         }
-        LZ4_WILDCOPY(ref, op, cpy);
+        LZ4_WILDCOPY(op, ref, cpy);
         op=cpy;   // correction
     }
 
@@ -674,23 +702,27 @@ int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOu
     return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, full, 0);
 }
 
-int LZ4_decompress_fast(const char* source, char* dest, int outputSize)
-{
-    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, noPrefix, full, 0);
-}
-
 int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int inputSize, int maxOutputSize)
 {
     return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, withPrefix, full, 0);
 }
 
-int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int outputSize)
-{
-    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0);
-}
-
 int LZ4_decompress_safe_partial(const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize)
 {
     return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, partial, targetOutputSize);
 }
 
+int LZ4_decompress_fast(const char* source, char* dest, int outputSize)
+{
+#ifdef _MSC_VER   // This version is faster with Visual
+    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, noPrefix, full, 0);
+#else
+    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0);
+#endif
+}
+
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int outputSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0);
+}
+
diff --git a/lz4.h b/lz4.h
index 693ac650..ab73fdc8 100644
--- a/lz4.h
+++ b/lz4.h
@@ -114,7 +114,7 @@ LZ4_decompress_fast() :
     return : the number of bytes read from the source buffer (in other words, the compressed size)
              If the source stream is malformed, the function will stop decoding and return a negative result.
     note : This function is a bit faster than LZ4_decompress_safe()
-           This function never writes outside of output buffers, and never read before input buffer, but may read beyond input buffer (since it doesn't know its size) in case of malicious data packet.
+           This function never writes outside of output buffers, but may read beyond input buffer in case of malicious data packet.
            Use this function preferably into a trusted environment (data to decode comes from a trusted source).
            Destination buffer must be already allocated. Its size must be a minimum of 'outputSize' bytes.
 */
diff --git a/lz4_encoder.h b/lz4_encoder.h
deleted file mode 100644
index f1731486..00000000
--- a/lz4_encoder.h
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
-   LZ4 Encoder - Part of LZ4 compression algorithm
-   Copyright (C) 2011-2013, Yann Collet.
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
-   - LZ4 source repository : http://code.google.com/p/lz4/
-*/
-
-/* lz4_encoder.h must be included into lz4.c
-   The objective of this file is to create a single LZ4 compression function source
-   which will be instanciated multiple times with minor variations
-   depending on a set of #define.
-*/
-
-
-
-//****************************
-// Check required defines
-//****************************
-
-#ifndef FUNCTION_NAME
-#  error "FUNTION_NAME is not defined"
-#endif
-
-
-//****************************
-// Local definitions
-//****************************
-
-#ifdef COMPRESS_64K
-#  define HASHLOG (MEMORY_USAGE-1)
-#  define CURRENT_H_TYPE U16
-#  define CURRENTBASE(base) const BYTE* const base = ip
-#else
-#  define HASHLOG (MEMORY_USAGE-2)
-#  define CURRENT_H_TYPE HTYPE
-#  define CURRENTBASE(base) INITBASE(base)
-#endif
-
-#define HASHTABLE_NBCELLS  (1U<<HASHLOG)
-#define LZ4_HASH(i)        (((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG))
-#define LZ4_HASHVALUE(p)   LZ4_HASH(A32(p))
-
-
-
-//****************************
-// Function code
-//****************************
-
-int FUNCTION_NAME(
-#ifdef USE_HEAPMEMORY
-                 void* ctx,
-#endif
-                 const char* source,
-                 char* dest,
-                 int inputSize
-#ifdef LIMITED_OUTPUT
-                ,int maxOutputSize
-#endif
-                 )
-{
-#ifdef USE_HEAPMEMORY
-    CURRENT_H_TYPE* HashTable = (CURRENT_H_TYPE*)ctx;
-#else
-    CURRENT_H_TYPE HashTable[HASHTABLE_NBCELLS] = {0};
-#endif
-
-    const BYTE* ip = (BYTE*) source;
-    CURRENTBASE(base);
-    const BYTE* anchor = ip;
-    const BYTE* const iend = ip + inputSize;
-    const BYTE* const mflimit = iend - MFLIMIT;
-#define matchlimit (iend - LASTLITERALS)
-
-    BYTE* op = (BYTE*) dest;
-#ifdef LIMITED_OUTPUT
-    BYTE* const oend = op + maxOutputSize;
-#endif
-
-    int length;
-    const int skipStrength = SKIPSTRENGTH;
-    U32 forwardH;
-
-
-    // Init
-    if (inputSize<MINLENGTH) goto _last_literals;
-#ifdef COMPRESS_64K
-    if (inputSize>=LZ4_64KLIMIT) return 0;   // Size too large (not within 64K limit)
-#endif
-#ifdef USE_HEAPMEMORY
-    memset((void*)HashTable, 0, HASHTABLESIZE);
-#endif
-
-    // First Byte
-    HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base);
-    ip++; forwardH = LZ4_HASHVALUE(ip);
-
-    // Main Loop
-    for ( ; ; )
-    {
-        int findMatchAttempts = (1U << skipStrength) + 3;
-        const BYTE* forwardIp = ip;
-        const BYTE* ref;
-        BYTE* token;
-
-        // Find a match
-        do {
-            U32 h = forwardH;
-            int step = findMatchAttempts++ >> skipStrength;
-            ip = forwardIp;
-            forwardIp = ip + step;
-
-            if unlikely(forwardIp > mflimit) { goto _last_literals; }
-
-            forwardH = LZ4_HASHVALUE(forwardIp);
-            ref = base + HashTable[h];
-            HashTable[h] = (CURRENT_H_TYPE)(ip - base);
-
-        } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
-
-        // Catch up
-        while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
-
-        // Encode Literal length
-        length = (int)(ip - anchor);
-        token = op++;
-#ifdef LIMITED_OUTPUT
-        if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0;   // Check output limit
-#endif
-        if (length>=(int)RUN_MASK) 
-        { 
-            int len = length-RUN_MASK; 
-            *token=(RUN_MASK<<ML_BITS); 
-            for(; len >= 255 ; len-=255) *op++ = 255; 
-            *op++ = (BYTE)len; 
-        }
-        else *token = (BYTE)(length<<ML_BITS);
-
-        // Copy Literals
-        LZ4_BLINDCOPY(anchor, op, length);
-
-_next_match:
-        // Encode Offset
-        LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
-
-        // Start Counting
-        ip+=MINMATCH; ref+=MINMATCH;    // MinMatch already verified
-        anchor = ip;
-        while likely(ip<matchlimit-(STEPSIZE-1))
-        {
-            size_t diff = AARCH(ref) ^ AARCH(ip);
-            if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
-            ip += LZ4_NbCommonBytes(diff);
-            goto _endCount;
-        }
-        if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
-        if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
-        if ((ip<matchlimit) && (*ref == *ip)) ip++;
-_endCount:
-
-        // Encode MatchLength
-        length = (int)(ip - anchor);
-#ifdef LIMITED_OUTPUT
-        if unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend) return 0;    // Check output limit
-#endif
-        if (length>=(int)ML_MASK) 
-        { 
-            *token += ML_MASK; 
-            length -= ML_MASK; 
-            for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } 
-            if (length >= 255) { length-=255; *op++ = 255; } 
-            *op++ = (BYTE)length; 
-        }
-        else *token += (BYTE)(length);
-
-        // Test end of chunk
-        if (ip > mflimit) { anchor = ip;  break; }
-
-        // Fill table
-        HashTable[LZ4_HASHVALUE(ip-2)] = (CURRENT_H_TYPE)(ip - 2 - base);
-
-        // Test next position
-        ref = base + HashTable[LZ4_HASHVALUE(ip)];
-        HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base);
-        if ((ref >= ip - MAX_DISTANCE) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
-
-        // Prepare next loop
-        anchor = ip++;
-        forwardH = LZ4_HASHVALUE(ip);
-    }
-
-_last_literals:
-    // Encode Last Literals
-    {
-        int lastRun = (int)(iend - anchor);
-#ifdef LIMITED_OUTPUT
-        if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0;  // Check output limit
-#endif
-        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun >= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
-        else *op++ = (BYTE)(lastRun<<ML_BITS);
-        memcpy(op, anchor, iend - anchor);
-        op += iend-anchor;
-    }
-
-    // End
-    return (int) (((char*)op)-dest);
-}
-
-
-
-//****************************
-// Clean defines
-//****************************
-
-// Required defines
-#undef FUNCTION_NAME
-
-// Locally Generated
-#undef HASHLOG
-#undef HASHTABLE_NBCELLS
-#undef LZ4_HASH
-#undef LZ4_HASHVALUE
-#undef CURRENT_H_TYPE
-#undef CURRENTBASE
-
-// Optional defines
-#ifdef LIMITED_OUTPUT
-#undef LIMITED_OUTPUT
-#endif
-
-#ifdef USE_HEAPMEMORY
-#undef USE_HEAPMEMORY
-#endif
-
-#ifdef COMPRESS_64K
-#undef COMPRESS_64K
-#endif
diff --git a/lz4c.c b/lz4c.c
index 4d90bef2..b6fc0ee1 100644
--- a/lz4c.c
+++ b/lz4c.c
@@ -269,10 +269,10 @@ int get_fileHandle(char* input_filename, char* output_filename, FILE** pfinput,
         if (output_filename != nulmark) *pfoutput = fopen( output_filename, "rb" );
         if (*pfoutput!=0) 
         { 
-            char ch;
             fclose(*pfoutput); 
             if (!overwrite)
             {
+                char ch;
                 DISPLAY( "Warning : %s already exists\n", output_filename); 
                 DISPLAY( "Overwrite ? (Y/N) : ");
                 ch = (char)getchar();
@@ -447,12 +447,11 @@ int compress_file_blockDependency(char* input_filename, char* output_filename, i
         // Write Block
         if (outSize > 0)
         {
-            unsigned int checksum;
             int sizeToWrite;
             * (unsigned int*) out_buff = LITTLE_ENDIAN_32(outSize);
             if (blockChecksum)
             {
-                checksum = XXH32(out_buff+4, outSize, LZ4S_CHECKSUM_SEED);
+                unsigned int checksum = XXH32(out_buff+4, outSize, LZ4S_CHECKSUM_SEED);
                 * (unsigned int*) (out_buff+4+outSize) = LITTLE_ENDIAN_32(checksum);
             }
             sizeToWrite = 4 + outSize + (4*blockChecksum);
@@ -462,7 +461,6 @@ int compress_file_blockDependency(char* input_filename, char* output_filename, i
         }
         else   // Copy Original
         {
-            unsigned int checksum;
             * (unsigned int*) out_buff = LITTLE_ENDIAN_32(inSize|0x80000000);   // Add Uncompressed flag
             sizeCheck = fwrite(out_buff, 1, 4, foutput);
             if (sizeCheck!=(size_t)(4)) EXM_THROW(34, "Write error : cannot write block header");
@@ -470,7 +468,7 @@ int compress_file_blockDependency(char* input_filename, char* output_filename, i
             if (sizeCheck!=(size_t)(inSize)) EXM_THROW(35, "Write error : cannot write block");
             if (blockChecksum)
             {
-                checksum = XXH32(in_start, inSize, LZ4S_CHECKSUM_SEED);
+                unsigned int checksum = XXH32(in_start, inSize, LZ4S_CHECKSUM_SEED);
                 * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum);
                 sizeCheck = fwrite(out_buff, 1, 4, foutput);
                 if (sizeCheck!=(size_t)(4)) EXM_THROW(36, "Write error : cannot write block checksum");
@@ -589,12 +587,11 @@ int compress_file(char* input_filename, char* output_filename, int compressionle
         // Write Block
         if (outSize > 0)
         {
-            unsigned int checksum;
             int sizeToWrite;
             * (unsigned int*) out_buff = LITTLE_ENDIAN_32(outSize);
             if (blockChecksum)
             {
-                checksum = XXH32(out_buff+4, outSize, LZ4S_CHECKSUM_SEED);
+                unsigned int checksum = XXH32(out_buff+4, outSize, LZ4S_CHECKSUM_SEED);
                 * (unsigned int*) (out_buff+4+outSize) = LITTLE_ENDIAN_32(checksum);
             }
             sizeToWrite = 4 + outSize + (4*blockChecksum);
@@ -603,7 +600,6 @@ int compress_file(char* input_filename, char* output_filename, int compressionle
         }
         else  // Copy Original Uncompressed
         {
-            unsigned int checksum;
             * (unsigned int*) out_buff = LITTLE_ENDIAN_32(((unsigned long)readSize)|0x80000000);   // Add Uncompressed flag
             sizeCheck = fwrite(out_buff, 1, 4, foutput);
             if (sizeCheck!=(size_t)(4)) EXM_THROW(34, "Write error : cannot write block header");
@@ -611,7 +607,7 @@ int compress_file(char* input_filename, char* output_filename, int compressionle
             if (sizeCheck!=readSize) EXM_THROW(35, "Write error : cannot write block");
             if (blockChecksum)
             {
-                checksum = XXH32(in_buff, (int)readSize, LZ4S_CHECKSUM_SEED);
+                unsigned int checksum = XXH32(in_buff, (int)readSize, LZ4S_CHECKSUM_SEED);
                 * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum);
                 sizeCheck = fwrite(out_buff, 1, 4, foutput);
                 if (sizeCheck!=(size_t)(4)) EXM_THROW(36, "Write error : cannot write block checksum");
@@ -661,10 +657,7 @@ unsigned long long decodeLegacyStream(FILE* finput, FILE* foutput)
     unsigned long long filesize = 0;
     char* in_buff;
     char* out_buff;
-    size_t uselessRet;
-    int sinkint;
     unsigned int blockSize;
-    size_t sizeCheck;
 
 
     // Allocate Memory
@@ -675,6 +668,10 @@ unsigned long long decodeLegacyStream(FILE* finput, FILE* foutput)
     // Main Loop
     while (1)
     {
+        size_t uselessRet;
+        int sinkint;
+        size_t sizeCheck;
+
         // Block Size
         uselessRet = fread(&blockSize, 1, 4, finput);
         if( uselessRet==0 ) break;                 // Nothing to read : file read is completed
diff --git a/lz4hc.c b/lz4hc.c
index 8f2b4229..79fd3f75 100644
--- a/lz4hc.c
+++ b/lz4hc.c
@@ -31,11 +31,6 @@
    - LZ4 source repository : http://code.google.com/p/lz4/
 */
 
-/*
-Note : this source file requires "lz4hc_encoder.h"
-*/
-
-
 //**************************************
 // Memory routines
 //**************************************
@@ -100,7 +95,7 @@ Note : this source file requires "lz4hc_encoder.h"
 #endif
 
 #ifdef _MSC_VER    // Visual Studio
-#  define forceinline static __forceinline
+#  define FORCE_INLINE static __forceinline
 #  include <intrin.h>                    // For Visual 2005
 #  if LZ4_ARCH64   // 64-bits
 #    pragma intrinsic(_BitScanForward64) // For Visual 2005
@@ -113,9 +108,9 @@ Note : this source file requires "lz4hc_encoder.h"
 #  pragma warning(disable : 4701)        // disable: C4701: potentially uninitialized local variable used
 #else 
 #  ifdef __GNUC__
-#    define forceinline static inline __attribute__((always_inline))
+#    define FORCE_INLINE static inline __attribute__((always_inline))
 #  else
-#    define forceinline static inline
+#    define FORCE_INLINE static inline
 #  endif
 #endif
 
@@ -273,7 +268,7 @@ typedef struct
 //**************************************
 #if LZ4_ARCH64
 
-forceinline int LZ4_NbCommonBytes (register U64 val)
+FORCE_INLINE int LZ4_NbCommonBytes (register U64 val)
 {
 #if defined(LZ4_BIG_ENDIAN)
 #  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -305,7 +300,7 @@ forceinline int LZ4_NbCommonBytes (register U64 val)
 
 #else
 
-forceinline int LZ4_NbCommonBytes (register U32 val)
+FORCE_INLINE int LZ4_NbCommonBytes (register U32 val)
 {
 #if defined(LZ4_BIG_ENDIAN)
 #  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -337,7 +332,7 @@ forceinline int LZ4_NbCommonBytes (register U32 val)
 #endif
 
 
-forceinline int LZ4_InitHC (LZ4HC_Data_Structure* hc4, const BYTE* base)
+FORCE_INLINE int LZ4_InitHC (LZ4HC_Data_Structure* hc4, const BYTE* base)
 {
     MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
     MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
@@ -365,7 +360,7 @@ int LZ4_freeHC (void* LZ4HC_Data)
 
 
 // Update chains up to ip (excluded)
-forceinline void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
+FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
 {
     U16*   chainTable = hc4->chainTable;
     HTYPE* HashTable  = hc4->hashTable;
@@ -403,7 +398,7 @@ char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
 }
 
 
-forceinline size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit)
+FORCE_INLINE size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit)
 {
     const BYTE* p1t = p1;
 
@@ -421,7 +416,7 @@ forceinline size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYT
 }
 
 
-forceinline int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos)
+FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos)
 {
     U16* const chainTable = hc4->chainTable;
     HTYPE* const HashTable = hc4->hashTable;
@@ -489,7 +484,7 @@ forceinline int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const B
 }
 
 
-forceinline int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos)
+FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos)
 {
     U16* const  chainTable = hc4->chainTable;
     HTYPE* const HashTable = hc4->hashTable;
@@ -548,37 +543,276 @@ _endCount:
 }
 
 
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
 
-//**************************************
-// Compression functions
-//**************************************
+FORCE_INLINE int LZ4HC_encodeSequence (
+                       const BYTE** ip,
+                       BYTE** op,
+                       const BYTE** anchor,
+                       int matchLength,
+                       const BYTE* ref,
+                       limitedOutput_directive limitedOutputBuffer,
+                       BYTE* oend)
+{
+    int length;
+    BYTE* token;
 
-/*
-int LZ4_compressHC(
-                 const char* source,
-                 char* dest,
-                 int inputSize)
+    // Encode Literal length
+    length = (int)(*ip - *anchor);
+    token = (*op)++;
+    if ((limitedOutputBuffer) && ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend)) return 1;   // Check output limit
+    if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; } 
+    else *token = (BYTE)(length<<ML_BITS);
 
-Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
-Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
-return : the number of bytes written in buffer 'dest'
-*/
-#define FUNCTION_NAME LZ4_compressHC
-#include "lz4hc_encoder.h"
+    // Copy Literals
+    LZ4_BLINDCOPY(*anchor, *op, length);
+
+    // Encode Offset
+    LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
+
+    // Encode MatchLength
+    length = (int)(matchLength-MINMATCH);
+    if ((limitedOutputBuffer) && (*op + (1 + LASTLITERALS) + (length>>8) > oend)) return 1;   // Check output limit
+    if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } 
+    else *token += (BYTE)(length);	
+
+    // Prepare next loop
+    *ip += matchLength;
+    *anchor = *ip; 
+
+    return 0;
+}
 
 
-/*
-int LZ4_compressHC_limitedOutput(
-                 const char* source,
+static int LZ4HC_compress_generic (
+                 void* ctxvoid,
+                 const char* source, 
                  char* dest,
                  int inputSize,
-                 int maxOutputSize)
+                 int maxOutputSize,
+                 limitedOutput_directive limit
+                )
+{
+    LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = (iend - LASTLITERALS);
 
-Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
-If it cannot achieve it, compression will stop, and result of the function will be zero.
-return : the number of bytes written in buffer 'dest', or 0 if the compression fails
-*/
-#define FUNCTION_NAME LZ4_compressHC_limitedOutput
-#define LIMITED_OUTPUT
-#include "lz4hc_encoder.h"
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + maxOutputSize;
+
+    int   ml, ml2, ml3, ml0;
+    const BYTE* ref=NULL;
+    const BYTE* start2=NULL;
+    const BYTE* ref2=NULL;
+    const BYTE* start3=NULL;
+    const BYTE* ref3=NULL;
+    const BYTE* start0;
+    const BYTE* ref0;
+
+
+    // Ensure blocks follow each other
+    if (ip != ctx->end) return 0;
+    ctx->end += inputSize;
+
+    ip++;
+
+    // Main Loop
+    while (ip < mflimit)
+    {
+        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));
+        if (!ml) { ip++; continue; }
+
+        // saved, in case we would skip too much
+        start0 = ip;
+        ref0 = ref;
+        ml0 = ml;
+
+_Search2:
+        if (ip+ml < mflimit)
+            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
+        else ml2 = ml;
+
+        if (ml2 == ml)  // No better match
+        {
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+            continue;
+        }
+
+        if (start0 < ip)
+        {
+            if (start2 < ip + ml0)   // empirical
+            {
+                ip = start0;
+                ref = ref0;
+                ml = ml0;
+            }
+        }
+
+        // Here, start0==ip
+        if ((start2 - ip) < 3)   // First Match too small : removed
+        {
+            ml = ml2;
+            ip = start2;
+            ref =ref2;
+            goto _Search2;
+        }
+
+_Search3:
+        // Currently we have :
+        // ml2 > ml1, and
+        // ip1+3 <= ip2 (usually < ip1+ml1)
+        if ((start2 - ip) < OPTIMAL_ML)
+        {
+            int correction;
+            int new_ml = ml;
+            if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
+            if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+            correction = new_ml - (int)(start2 - ip);
+            if (correction > 0)
+            {
+                start2 += correction;
+                ref2 += correction;
+                ml2 -= correction;
+            }
+        }
+        // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18)
+
+        if (start2 + ml2 < mflimit)
+            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
+        else ml3 = ml2;
+
+        if (ml3 == ml2) // No better match : 2 sequences to encode
+        {
+            // ip & ref are known; Now for ml
+            if (start2 < ip+ml)  ml = (int)(start2 - ip);
+            // Now, encode 2 sequences
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+            ip = start2;
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0;
+            continue;
+        }
+
+        if (start3 < ip+ml+3) // Not enough space for match 2 : remove it
+        {
+            if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
+            {
+                if (start2 < ip+ml)
+                {
+                    int correction = (int)(ip+ml - start2);
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                    if (ml2 < MINMATCH)
+                    {
+                        start2 = start3;
+                        ref2 = ref3;
+                        ml2 = ml3;
+                    }
+                }
+
+                if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+                ip  = start3;
+                ref = ref3;
+                ml  = ml3;
+
+                start0 = start2;
+                ref0 = ref2;
+                ml0 = ml2;
+                goto _Search2;
+            }
+
+            start2 = start3;
+            ref2 = ref3;
+            ml2 = ml3;
+            goto _Search3;
+        }
+
+        // OK, now we have 3 ascending matches; let's write at least the first one
+        // ip & ref are known; Now for ml
+        if (start2 < ip+ml)
+        {
+            if ((start2 - ip) < (int)ML_MASK)
+            {
+                int correction;
+                if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
+                if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+                correction = ml - (int)(start2 - ip);
+                if (correction > 0)
+                {
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                }
+            }
+            else
+            {
+                ml = (int)(start2 - ip);
+            }
+        }
+        if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+
+        ip = start2;
+        ref = ref2;
+        ml = ml2;
+
+        start2 = start3;
+        ref2 = ref3;
+        ml2 = ml3;
+
+        goto _Search3;
+
+    }
+
+    // Encode Last Literals
+    {
+        int lastRun = (int)(iend - anchor);
+        if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;  // Check output limit
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } 
+        else *op++ = (BYTE)(lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    } 
+
+    // End
+    return (int) (((char*)op)-dest);
+}
+
+
+int LZ4_compressHC(const char* source, char* dest, int inputSize)
+{
+    void* ctx = LZ4_createHC(source);
+    int result;
+    if (ctx==NULL) return 0;
+
+    result = LZ4HC_compress_generic (ctx, source, dest, inputSize, 0, noLimit);
+
+    LZ4_freeHC(ctx);
+    return result;
+}
+
+int LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize)
+{
+    return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, noLimit);
+}
+
+
+int LZ4_compressHC_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    void* ctx = LZ4_createHC(source);
+    int result;
+    if (ctx==NULL) return 0;
+
+    result = LZ4HC_compress_generic (ctx, source, dest, inputSize, maxOutputSize, limitedOutput);
+
+    LZ4_freeHC(ctx);
+    return result;
+}
+
+int LZ4_compressHC_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, limitedOutput);
+}
 
diff --git a/lz4hc_encoder.h b/lz4hc_encoder.h
deleted file mode 100644
index 49a0f231..00000000
--- a/lz4hc_encoder.h
+++ /dev/null
@@ -1,349 +0,0 @@
-/*
-   LZ4 HC Encoder - Part of LZ4 HC algorithm
-   Copyright (C) 2011-2013, Yann Collet.
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
-   - LZ4 source repository : http://code.google.com/p/lz4/
-*/
-
-/* lz4hc_encoder.h must be included into lz4hc.c
-   The objective of this file is to create a single LZ4 compression function source
-   which will be instanciated multiple times with minor variations
-   depending on a set of #define.
-*/
-
-//****************************
-// Check required defines
-//****************************
-
-#ifndef FUNCTION_NAME
-#  error "FUNTION_NAME is not defined"
-#endif
-
-
-//****************************
-// Local definitions
-//****************************
-#define COMBINED_NAME_RAW(n1,n2) n1 ## n2
-#define COMBINED_NAME(n1,n2) COMBINED_NAME_RAW(n1,n2)
-#define ENCODE_SEQUENCE_NAME COMBINED_NAME(FUNCTION_NAME,_encodeSequence)
-#ifdef LIMITED_OUTPUT
-#  define ENCODE_SEQUENCE(i,o,a,m,r,d) if (ENCODE_SEQUENCE_NAME(i,o,a,m,r,d)) return 0;
-#else
-#  define ENCODE_SEQUENCE(i,o,a,m,r,d) ENCODE_SEQUENCE_NAME(i,o,a,m,r)
-#endif
-
-//****************************
-// Function code
-//****************************
-
-forceinline int ENCODE_SEQUENCE_NAME (
-                       const BYTE** ip, 
-                       BYTE** op, 
-                       const BYTE** anchor, 
-                       int matchLength, 
-                       const BYTE* ref
-#ifdef LIMITED_OUTPUT
-                      ,BYTE* oend
-#endif
-                       )
-{
-    int length, len; 
-    BYTE* token;
-
-    // Encode Literal length
-    length = (int)(*ip - *anchor);
-    token = (*op)++;
-#ifdef LIMITED_OUTPUT
-    if ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend) return 1; 		// Check output limit
-#endif
-    if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; } 
-    else *token = (BYTE)(length<<ML_BITS);
-
-    // Copy Literals
-    LZ4_BLINDCOPY(*anchor, *op, length);
-
-    // Encode Offset
-    LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
-
-    // Encode MatchLength
-    length = (int)(matchLength-MINMATCH);
-#ifdef LIMITED_OUTPUT
-    if (*op + (1 + LASTLITERALS) + (length>>8) > oend) return 1; 		// Check output limit
-#endif
-    if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } 
-    else *token += (BYTE)(length);	
-
-    // Prepare next loop
-    *ip += matchLength;
-    *anchor = *ip; 
-
-    return 0;
-}
-
-
-int COMBINED_NAME(FUNCTION_NAME,_continue) (
-                 void* ctxvoid,
-                 const char* source, 
-                 char* dest,
-                 int inputSize
-#ifdef LIMITED_OUTPUT
-                ,int maxOutputSize
-#endif
-                )
-{
-    LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
-    const BYTE* ip = (const BYTE*) source;
-    const BYTE* anchor = ip;
-    const BYTE* const iend = ip + inputSize;
-    const BYTE* const mflimit = iend - MFLIMIT;
-    const BYTE* const matchlimit = (iend - LASTLITERALS);
-
-    BYTE* op = (BYTE*) dest;
-#ifdef LIMITED_OUTPUT
-    BYTE* const oend = op + maxOutputSize;
-#endif
-
-    int	ml, ml2, ml3, ml0;
-    const BYTE* ref=NULL;
-    const BYTE* start2=NULL;
-    const BYTE* ref2=NULL;
-    const BYTE* start3=NULL;
-    const BYTE* ref3=NULL;
-    const BYTE* start0;
-    const BYTE* ref0;
-
-    // Ensure blocks follow each other
-    if (ip != ctx->end) return 0;
-    ctx->end += inputSize;
-
-    ip++;
-
-    // Main Loop
-    while (ip < mflimit)
-    {
-        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));
-        if (!ml) { ip++; continue; }
-
-        // saved, in case we would skip too much
-        start0 = ip;
-        ref0 = ref;
-        ml0 = ml;
-
-_Search2:
-        if (ip+ml < mflimit)
-            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
-        else ml2 = ml;
-
-        if (ml2 == ml)  // No better match
-        {
-            ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
-            continue;
-        }
-
-        if (start0 < ip)
-        {
-            if (start2 < ip + ml0)   // empirical
-            {
-                ip = start0;
-                ref = ref0;
-                ml = ml0;
-            }
-        }
-
-        // Here, start0==ip
-        if ((start2 - ip) < 3)   // First Match too small : removed
-        {
-            ml = ml2;
-            ip = start2;
-            ref =ref2;
-            goto _Search2;
-        }
-
-_Search3:
-        // Currently we have :
-        // ml2 > ml1, and
-        // ip1+3 <= ip2 (usually < ip1+ml1)
-        if ((start2 - ip) < OPTIMAL_ML)
-        {
-            int correction;
-            int new_ml = ml;
-            if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
-            if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
-            correction = new_ml - (int)(start2 - ip);
-            if (correction > 0)
-            {
-                start2 += correction;
-                ref2 += correction;
-                ml2 -= correction;
-            }
-        }
-        // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18)
-
-        if (start2 + ml2 < mflimit)
-            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
-        else ml3 = ml2;
-
-        if (ml3 == ml2) // No better match : 2 sequences to encode
-        {
-            // ip & ref are known; Now for ml
-            if (start2 < ip+ml)  ml = (int)(start2 - ip);
-            // Now, encode 2 sequences
-            ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
-            ip = start2;
-            ENCODE_SEQUENCE(&ip, &op, &anchor, ml2, ref2, oend);
-            continue;
-        }
-
-        if (start3 < ip+ml+3) // Not enough space for match 2 : remove it
-        {
-            if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
-            {
-                if (start2 < ip+ml)
-                {
-                    int correction = (int)(ip+ml - start2);
-                    start2 += correction;
-                    ref2 += correction;
-                    ml2 -= correction;
-                    if (ml2 < MINMATCH)
-                    {
-                        start2 = start3;
-                        ref2 = ref3;
-                        ml2 = ml3;
-                    }
-                }
-
-                ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
-                ip  = start3;
-                ref = ref3;
-                ml  = ml3;
-
-                start0 = start2;
-                ref0 = ref2;
-                ml0 = ml2;
-                goto _Search2;
-            }
-
-            start2 = start3;
-            ref2 = ref3;
-            ml2 = ml3;
-            goto _Search3;
-        }
-
-        // OK, now we have 3 ascending matches; let's write at least the first one
-        // ip & ref are known; Now for ml
-        if (start2 < ip+ml)
-        {
-            if ((start2 - ip) < (int)ML_MASK)
-            {
-                int correction;
-                if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
-                if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
-                correction = ml - (int)(start2 - ip);
-                if (correction > 0)
-                {
-                    start2 += correction;
-                    ref2 += correction;
-                    ml2 -= correction;
-                }
-            }
-            else
-            {
-                ml = (int)(start2 - ip);
-            }
-        }
-        ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
-
-        ip = start2;
-        ref = ref2;
-        ml = ml2;
-
-        start2 = start3;
-        ref2 = ref3;
-        ml2 = ml3;
-
-        goto _Search3;
-
-    }
-
-    // Encode Last Literals
-    {
-        int lastRun = (int)(iend - anchor);
-#ifdef LIMITED_OUTPUT
-        if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0;  // Check output limit
-#endif
-        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } 
-        else *op++ = (BYTE)(lastRun<<ML_BITS);
-        memcpy(op, anchor, iend - anchor);
-        op += iend-anchor;
-    } 
-
-    // End
-    return (int) (((char*)op)-dest);
-}
-
-
-int FUNCTION_NAME (const char* source, 
-                 char* dest,
-                 int inputSize
-#ifdef LIMITED_OUTPUT
-                ,int maxOutputSize
-#endif
-                )
-{
-    void* ctx = LZ4_createHC(source);
-    int result;
-    if (ctx==NULL) return 0;
-#ifdef LIMITED_OUTPUT
-    result = COMBINED_NAME(FUNCTION_NAME,_continue) (ctx, source, dest, inputSize, maxOutputSize);
-#else
-    result = COMBINED_NAME(FUNCTION_NAME,_continue) (ctx, source, dest, inputSize);
-#endif
-    LZ4_freeHC(ctx);
-
-    return result;
-}
-
-
-//****************************
-// Clean defines
-//****************************
-
-// Required defines
-#undef FUNCTION_NAME
-
-// Locally Generated
-#undef ENCODE_SEQUENCE
-#undef ENCODE_SEQUENCE_NAME
-
-// Optional defines
-#ifdef LIMITED_OUTPUT
-#undef LIMITED_OUTPUT
-#endif
-
-