From 03903f57012054852c0c26daca7131a130bb5cbf Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 29 Dec 2021 18:51:03 -0800 Subject: [PATCH] fixed minor compression difference in btlazy2 subtle dependency on sumtype numeric representation --- contrib/seekable_format/zstdseek_decompress.c | 53 ++++++++++++++++++- lib/compress/zstd_lazy.c | 24 ++++----- tests/regression/.gitignore | 1 + 3 files changed, 65 insertions(+), 13 deletions(-) diff --git a/contrib/seekable_format/zstdseek_decompress.c b/contrib/seekable_format/zstdseek_decompress.c index 5eed02495..0848d2519 100644 --- a/contrib/seekable_format/zstdseek_decompress.c +++ b/contrib/seekable_format/zstdseek_decompress.c @@ -23,13 +23,64 @@ # endif #endif +/* ************************************************************ +* Detect POSIX version +* PLATFORM_POSIX_VERSION = 0 for non-Unix e.g. Windows +* PLATFORM_POSIX_VERSION = 1 for Unix-like but non-POSIX +* PLATFORM_POSIX_VERSION > 1 is equal to found _POSIX_VERSION +* Value of PLATFORM_POSIX_VERSION can be forced on command line +***************************************************************/ +#ifndef PLATFORM_POSIX_VERSION + +# if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \ + || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */ + /* exception rule : force posix version to 200112L, + * note: it's better to use unistd.h's _POSIX_VERSION whenever possible */ +# define PLATFORM_POSIX_VERSION 200112L + +/* try to determine posix version through official unistd.h's _POSIX_VERSION (http://pubs.opengroup.org/onlinepubs/7908799/xsh/unistd.h.html). + * note : there is no simple way to know in advance if is present or not on target system, + * Posix specification mandates its presence and its content, but target system must respect this spec. + * It's necessary to _not_ #include whenever target OS is not unix-like + * otherwise it will block preprocessing stage. + * The following list of build macros tries to "guess" if target OS is likely unix-like, and therefore can #include + */ +# elif !defined(_WIN32) \ + && ( defined(__unix__) || defined(__unix) \ + || defined(__midipix__) || defined(__VMS) || defined(__HAIKU__) ) + +# if defined(__linux__) || defined(__linux) || defined(__CYGWIN__) +# ifndef _POSIX_C_SOURCE +# define _POSIX_C_SOURCE 200809L /* feature test macro : https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html */ +# endif +# endif +# include /* declares _POSIX_VERSION */ +# if defined(_POSIX_VERSION) /* POSIX compliant */ +# define PLATFORM_POSIX_VERSION _POSIX_VERSION +# else +# define PLATFORM_POSIX_VERSION 1 +# endif + +# ifdef __UCLIBC__ +# ifndef __USE_MISC +# define __USE_MISC /* enable st_mtim on uclibc */ +# endif +# endif + +# else /* non-unix target platform (like Windows) */ +# define PLATFORM_POSIX_VERSION 0 +# endif + +#endif /* PLATFORM_POSIX_VERSION */ + + /* ************************************************************ * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW ***************************************************************/ #if defined(_MSC_VER) && _MSC_VER >= 1400 # define LONG_SEEK _fseeki64 #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ -# define LONG_SEEK fseeko +# define LONG_SEEK fseeko #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__) # define LONG_SEEK fseeko64 #elif defined(_WIN32) && !defined(__DJGPP__) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 2a0cfc893..47c0687c9 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -230,7 +230,7 @@ ZSTD_DUBT_findBetterDictMatch ( static size_t ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, const BYTE* const ip, const BYTE* const iend, - size_t* offsetPtr, + size_t* offBasePtr, U32 const mls, const ZSTD_dictMode_e dictMode) { @@ -327,8 +327,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, if (matchLength > bestLength) { if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; - if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) - bestLength = matchLength, *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)*offBasePtr)) ) + bestLength = matchLength, *offBasePtr = OFFSET_TO_OFFBASE(curr - matchIndex); if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ if (dictMode == ZSTD_dictMatchState) { nbCompares = 0; /* in addition to avoiding checking any @@ -361,16 +361,16 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, if (dictMode == ZSTD_dictMatchState && nbCompares) { bestLength = ZSTD_DUBT_findBetterDictMatch( ms, ip, iend, - offsetPtr, bestLength, nbCompares, + offBasePtr, bestLength, nbCompares, mls, dictMode); } assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */ ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ if (bestLength >= MINMATCH) { - U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offsetPtr); (void)mIndex; + U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offBasePtr); (void)mIndex; DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", - curr, (U32)bestLength, (U32)*offsetPtr, mIndex); + curr, (U32)bestLength, (U32)*offBasePtr, mIndex); } return bestLength; } @@ -381,14 +381,14 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, FORCE_INLINE_TEMPLATE size_t ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, + size_t* offBasePtr, const U32 mls /* template */, const ZSTD_dictMode_e dictMode) { DEBUGLOG(7, "ZSTD_BtFindBestMatch"); if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ ZSTD_updateDUBT(ms, ip, iLimit, mls); - return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); + return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offBasePtr, mls, dictMode); } /*********************************** @@ -1337,10 +1337,10 @@ typedef struct { static size_t ZSTD_BtFindBestMatch_##dictMode##_##mls( \ ZSTD_matchState_t* ms, \ const BYTE* ip, const BYTE* const iLimit, \ - size_t* offsetPtr) \ + size_t* offBasePtr) \ { \ assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \ - return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \ + return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode);\ } \ static const ZSTD_LazyVTable ZSTD_BtVTable_##dictMode##_##mls = { \ ZSTD_BtFindBestMatch_##dictMode##_##mls \ @@ -1959,7 +1959,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( } } /* search match, depth 1 */ - { size_t ofbCandidate=999999999; + { size_t ofbCandidate = 999999999; size_t const ml2 = searchMax(ms, ip, iend, &ofbCandidate); int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4); @@ -1991,7 +1991,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( } } /* search match, depth 2 */ - { size_t ofbCandidate=999999999; + { size_t ofbCandidate = 999999999; size_t const ml2 = searchMax(ms, ip, iend, &ofbCandidate); int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7); diff --git a/tests/regression/.gitignore b/tests/regression/.gitignore index 1b2618f41..3da209d40 100644 --- a/tests/regression/.gitignore +++ b/tests/regression/.gitignore @@ -1,3 +1,4 @@ # regression test artifacts data-cache +cache test