Merge pull request #1443 from lz4/dictAPI

promote dictionary API to stable
2024-11-23 09:54:00 +08:00 · 2024-07-16 16:08:28 -07:00 · 2024-07-16 16:08:28 -07:00 · 76c48c1e43
commit 76c48c1e43
parent 30277ada95 8017ca0ed1
2 changed files with 142 additions and 132 deletions
--- a/lib/lz4.h
+++ b/lib/lz4.h
@ -129,8 +129,8 @@ extern "C" {

 /*------   Version   ------*/
 #define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
-#define LZ4_VERSION_MINOR    9    /* for new (non-breaking) interface capabilities */
-#define LZ4_VERSION_RELEASE  5    /* for tweaks, bug-fixes, or development */
+#define LZ4_VERSION_MINOR   10    /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE  0    /* for tweaks, bug-fixes, or development */

 #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)

@ -370,7 +370,7 @@ LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
 */
 LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);

-/*! LZ4_loadDictSlow() : v1.9.5+
+/*! LZ4_loadDictSlow() : v1.10.0+
 *  Same as LZ4_loadDict(),
 *  but uses a bit more cpu to reference the dictionary content more thoroughly.
 *  This is expected to slightly improve compression ratio.
@ -379,6 +379,42 @@ LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, in
 */
 LZ4LIB_API int LZ4_loadDictSlow(LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);

+/*! LZ4_attach_dictionary() : stable since v1.10.0
+ *
+ *  This allows efficient re-use of a static dictionary multiple times.
+ *
+ *  Rather than re-loading the dictionary buffer into a working context before
+ *  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
+ *  working LZ4_stream_t, this function introduces a no-copy setup mechanism,
+ *  in which the working stream references @dictionaryStream in-place.
+ *
+ *  Several assumptions are made about the state of @dictionaryStream.
+ *  Currently, only states which have been prepared by LZ4_loadDict() or
+ *  LZ4_loadDictSlow() should be expected to work.
+ *
+ *  Alternatively, the provided @dictionaryStream may be NULL,
+ *  in which case any existing dictionary stream is unset.
+ *
+ *  If a dictionary is provided, it replaces any pre-existing stream history.
+ *  The dictionary contents are the only history that can be referenced and
+ *  logically immediately precede the data compressed in the first subsequent
+ *  compression call.
+ *
+ *  The dictionary will only remain attached to the working stream through the
+ *  first compression call, at the end of which it is cleared.
+ * @dictionaryStream stream (and source buffer) must remain in-place / accessible / unchanged
+ *  through the completion of the compression session.
+ *
+ *  Note: there is no equivalent LZ4_attach_*() method on the decompression side
+ *  because there is no initialization cost, hence no need to share the cost across multiple sessions.
+ *  To decompress LZ4 blocks using dictionary, attached or not,
+ *  just employ the regular LZ4_setStreamDecode() for streaming,
+ *  or the stateless LZ4_decompress_safe_usingDict() for one-shot decompression.
+ */
+LZ4LIB_API void
+LZ4_attach_dictionary(LZ4_stream_t* workingStream,
+                const LZ4_stream_t* dictionaryStream);
+
 /*! LZ4_compress_fast_continue() :
 *  Compress 'src' content using data from previously compressed blocks, for better compression ratio.
 * 'dst' buffer must be already allocated.
@ -580,37 +616,6 @@ LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const c
 */
 int LZ4_compress_destSize_extState(void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration);

-/*! LZ4_attach_dictionary() :
- *  This is an experimental API that allows
- *  efficient use of a static dictionary many times.
- *
- *  Rather than re-loading the dictionary buffer into a working context before
- *  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
- *  working LZ4_stream_t, this function introduces a no-copy setup mechanism,
- *  in which the working stream references the dictionary stream in-place.
- *
- *  Several assumptions are made about the state of the dictionary stream.
- *  Currently, only streams which have been prepared by LZ4_loadDict() should
- *  be expected to work.
- *
- *  Alternatively, the provided dictionaryStream may be NULL,
- *  in which case any existing dictionary stream is unset.
- *
- *  If a dictionary is provided, it replaces any pre-existing stream history.
- *  The dictionary contents are the only history that can be referenced and
- *  logically immediately precede the data compressed in the first subsequent
- *  compression call.
- *
- *  The dictionary will only remain attached to the working stream through the
- *  first compression call, at the end of which it is cleared. The dictionary
- *  stream (and source buffer) must remain in-place / accessible / unchanged
- *  through the completion of the first compression call on the stream.
- */
-LZ4LIB_STATIC_API void
-LZ4_attach_dictionary(LZ4_stream_t* workingStream,
-                const LZ4_stream_t* dictionaryStream);
-
-
 /*! In-place compression and decompression
 *
 * It's possible to have input and output sharing the same buffer,
--- a/lib/lz4frame.h
+++ b/lib/lz4frame.h
@ -513,6 +513,109 @@ LZ4F_decompress(LZ4F_dctx* dctx,
 LZ4FLIB_API void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx);   /* always successful */


+/**********************************
+ *  Dictionary compression API
+ *********************************/
+
+/* A Dictionary is useful for the compression of small messages (KB range).
+ * It dramatically improves compression efficiency.
+ *
+ * LZ4 can ingest any input as dictionary, though only the last 64 KB are useful.
+ * Better results are generally achieved by using Zstandard's Dictionary Builder
+ * to generate a high-quality dictionary from a set of samples.
+ *
+ * The same dictionary will have to be used on the decompression side
+ * for decoding to be successful.
+ * To help identify the correct dictionary at decoding stage,
+ * the frame header allows optional embedding of a dictID field.
+ */
+
+/*! LZ4F_compressBegin_usingDict() : stable since v1.10
+ *  Inits dictionary compression streaming, and writes the frame header into dstBuffer.
+ * @dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * @prefsPtr is optional : one may provide NULL as argument,
+ *  however, it's the only way to provide dictID in the frame header.
+ * @dictBuffer must outlive the compression session.
+ * @return : number of bytes written into dstBuffer for the header,
+ *           or an error code (which can be tested using LZ4F_isError())
+ *  NOTE: The LZ4Frame spec allows each independent block to be compressed with the dictionary,
+ *        but this entry supports a more limited scenario, where only the first block uses the dictionary.
+ *        This is still useful for small data, which only need one block anyway.
+ *        For larger inputs, one may be more interested in LZ4F_compressFrame_usingCDict() below.
+ */
+LZ4FLIB_API size_t
+LZ4F_compressBegin_usingDict(LZ4F_cctx* cctx,
+                            void* dstBuffer, size_t dstCapacity,
+                      const void* dictBuffer, size_t dictSize,
+                      const LZ4F_preferences_t* prefsPtr);
+
+/*! LZ4F_decompress_usingDict() : stable since v1.10
+ *  Same as LZ4F_decompress(), using a predefined dictionary.
+ *  Dictionary is used "in place", without any preprocessing.
+**  It must remain accessible throughout the entire frame decoding. */
+LZ4FLIB_API size_t
+LZ4F_decompress_usingDict(LZ4F_dctx* dctxPtr,
+                          void* dstBuffer, size_t* dstSizePtr,
+                    const void* srcBuffer, size_t* srcSizePtr,
+                    const void* dict, size_t dictSize,
+                    const LZ4F_decompressOptions_t* decompressOptionsPtr);
+
+/*****************************************
+ *  Bulk processing dictionary compression
+ *****************************************/
+
+/* Loading a dictionary has a cost, since it involves construction of tables.
+ * The Bulk processing dictionary API makes it possible to share this cost
+ * over an arbitrary number of compression jobs, even concurrently,
+ * markedly improving compression latency for these cases.
+ *
+ * Note that there is no corresponding bulk API for the decompression side,
+ * because dictionary does not carry any initialization cost for decompression.
+ * Use the regular LZ4F_decompress_usingDict() there.
+ */
+typedef struct LZ4F_CDict_s LZ4F_CDict;
+
+/*! LZ4_createCDict() : stable since v1.10
+ *  When compressing multiple messages / blocks using the same dictionary, it's recommended to initialize it just once.
+ *  LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+ *  LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * @dictBuffer can be released after LZ4_CDict creation, since its content is copied within CDict. */
+LZ4FLIB_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
+LZ4FLIB_API void        LZ4F_freeCDict(LZ4F_CDict* CDict);
+
+/*! LZ4_compressFrame_usingCDict() : stable since v1.10
+ *  Compress an entire srcBuffer into a valid LZ4 frame using a digested Dictionary.
+ * @cctx must point to a context created by LZ4F_createCompressionContext().
+ *  If @cdict==NULL, compress without a dictionary.
+ * @dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ *  If this condition is not respected, function will fail (@return an errorCode).
+ *  The LZ4F_preferences_t structure is optional : one may provide NULL as argument,
+ *  but it's not recommended, as it's the only way to provide @dictID in the frame header.
+ * @return : number of bytes written into dstBuffer.
+ *           or an error code if it fails (can be tested using LZ4F_isError())
+ *  Note: for larger inputs generating multiple independent blocks,
+ *        this entry point uses the dictionary for each block. */
+LZ4FLIB_API size_t
+LZ4F_compressFrame_usingCDict(LZ4F_cctx* cctx,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                        const LZ4F_CDict* cdict,
+                        const LZ4F_preferences_t* preferencesPtr);
+
+/*! LZ4F_compressBegin_usingCDict() : stable since v1.10
+ *  Inits streaming dictionary compression, and writes the frame header into dstBuffer.
+ * @dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * @prefsPtr is optional : one may provide NULL as argument,
+ *  note however that it's the only way to insert a @dictID in the frame header.
+ * @cdict must outlive the compression session.
+ * @return : number of bytes written into dstBuffer for the header,
+ *           or an error code, which can be tested using LZ4F_isError(). */
+LZ4FLIB_API size_t
+LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctx,
+                              void* dstBuffer, size_t dstCapacity,
+                        const LZ4F_CDict* cdict,
+                        const LZ4F_preferences_t* prefsPtr);
+

 #if defined (__cplusplus)
 }
@ -612,104 +715,6 @@ LZ4F_uncompressedUpdate(LZ4F_cctx* cctx,
                  const void* srcBuffer, size_t srcSize,
                  const LZ4F_compressOptions_t* cOptPtr);

-/**********************************
- *  Dictionary compression API
- *********************************/
-
-/* A Dictionary is useful for the compression of small messages (KB range).
- * It dramatically improves compression efficiency.
- *
- * LZ4 can ingest any input as dictionary, though only the last 64 KB are useful.
- * Better results are generally achieved by using Zstandard's Dictionary Builder
- * to generate a high-quality dictionary from a set of samples.
- *
- * The same dictionary will have to be used on the decompression side
- * for decoding to be successful.
- * To help identify the correct dictionary at decoding stage,
- * the frame header allows optional embedding of a dictID field.
- */
-
-/*! LZ4F_compressBegin_usingDict() :
- *  Inits dictionary compression streaming, and writes the frame header into dstBuffer.
- * `dstCapacity` must be >= LZ4F_HEADER_SIZE_MAX bytes.
- * `prefsPtr` is optional : you may provide NULL as argument,
- *  however, it's the only way to provide dictID in the frame header.
- * `dictBuffer` must outlive the compression session.
- * @return : number of bytes written into dstBuffer for the header,
- *           or an error code (which can be tested using LZ4F_isError())
- *  NOTE: this entry point doesn't fully exploit the spec,
- *        which allows each independent block to be compressed with the dictionary.
- *        Currently, only the first block uses the dictionary.
- *        This is still technically compliant, but less efficient for large inputs.
- */
-LZ4FLIB_STATIC_API size_t
-LZ4F_compressBegin_usingDict(LZ4F_cctx* cctx,
-                            void* dstBuffer, size_t dstCapacity,
-                      const void* dictBuffer, size_t dictSize,
-                      const LZ4F_preferences_t* prefsPtr);
-
-/*! LZ4F_decompress_usingDict() :
- *  Same as LZ4F_decompress(), using a predefined dictionary.
- *  Dictionary is used "in place", without any preprocessing.
-**  It must remain accessible throughout the entire frame decoding. */
-LZ4FLIB_STATIC_API size_t
-LZ4F_decompress_usingDict(LZ4F_dctx* dctxPtr,
-                          void* dstBuffer, size_t* dstSizePtr,
-                    const void* srcBuffer, size_t* srcSizePtr,
-                    const void* dict, size_t dictSize,
-                    const LZ4F_decompressOptions_t* decompressOptionsPtr);
-
-/**********************************
- *  Bulk processing dictionary API
- *********************************/
-
-/* Loading a dictionary has a cost, since it involves construction of tables.
- * The Bulk processing dictionary API makes it possible to share this cost
- * over an arbitrary number of compression jobs, even concurrently,
- * markedly improving compression latency for these cases.
- */
-typedef struct LZ4F_CDict_s LZ4F_CDict;
-
-/*! LZ4_createCDict() :
- *  When compressing multiple messages / blocks using the same dictionary, it's recommended to load it just once.
- *  LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
- *  LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
- * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */
-LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
-LZ4FLIB_STATIC_API void        LZ4F_freeCDict(LZ4F_CDict* CDict);
-
-/*! LZ4_compressFrame_usingCDict() :
- *  Compress an entire srcBuffer into a valid LZ4 frame using a digested Dictionary.
- *  cctx must point to a context created by LZ4F_createCompressionContext().
- *  If cdict==NULL, compress without a dictionary.
- *  dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
- *  If this condition is not respected, function will fail (@return an errorCode).
- *  The LZ4F_preferences_t structure is optional : you may provide NULL as argument,
- *  but it's not recommended, as it's the only way to provide dictID in the frame header.
- * @return : number of bytes written into dstBuffer.
- *           or an error code if it fails (can be tested using LZ4F_isError()) */
-LZ4FLIB_STATIC_API size_t
-LZ4F_compressFrame_usingCDict(LZ4F_cctx* cctx,
-                              void* dst, size_t dstCapacity,
-                        const void* src, size_t srcSize,
-                        const LZ4F_CDict* cdict,
-                        const LZ4F_preferences_t* preferencesPtr);
-
-/*! LZ4F_compressBegin_usingCDict() :
- *  Inits streaming dictionary compression, and writes the frame header into dstBuffer.
- * `dstCapacity` must be >= LZ4F_HEADER_SIZE_MAX bytes.
- * `prefsPtr` is optional : you may provide NULL as argument,
- *  however, it's the only way to provide dictID in the frame header.
- * `cdict` must outlive the compression session.
- * @return : number of bytes written into dstBuffer for the header,
- *           or an error code (which can be tested using LZ4F_isError()) */
-LZ4FLIB_STATIC_API size_t
-LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctx,
-                              void* dstBuffer, size_t dstCapacity,
-                        const LZ4F_CDict* cdict,
-                        const LZ4F_preferences_t* prefsPtr);
-
-
 /**********************************
 *  Custom memory allocation
 *********************************/