zstd/programs/fileio.c
2024-03-12 11:27:42 -07:00

3471 lines
132 KiB
C

/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* *************************************
* Compiler Options
***************************************/
#ifdef _MSC_VER /* Visual */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
# pragma warning(disable : 4204) /* non-constant aggregate initializer */
#endif
#if defined(__MINGW32__) && !defined(_POSIX_SOURCE)
# define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */
#endif
/*-*************************************
* Includes
***************************************/
#include "platform.h" /* Large Files support, SET_BINARY_MODE */
#include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */
#include <stdio.h> /* fprintf, open, fdopen, fread, _fileno, stdin, stdout */
#include <stdlib.h> /* malloc, free */
#include <string.h> /* strcmp, strlen */
#include <time.h> /* clock_t, to measure process time */
#include <fcntl.h> /* O_WRONLY */
#include <assert.h>
#include <errno.h> /* errno */
#include <limits.h> /* INT_MAX */
#include <signal.h>
#include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */
#if defined (_MSC_VER)
# include <sys/stat.h>
# include <io.h>
#endif
#include "fileio.h"
#include "fileio_asyncio.h"
#include "fileio_common.h"
FIO_display_prefs_t g_display_prefs = {2, FIO_ps_auto};
UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
#include "../lib/zstd.h"
#include "../lib/zstd_errors.h" /* ZSTD_error_frameParameter_windowTooLarge */
#if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
# include <zlib.h>
# if !defined(z_const)
# define z_const
# endif
#endif
#if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
# include <lzma.h>
#endif
#define LZ4_MAGICNUMBER 0x184D2204
#if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
# define LZ4F_ENABLE_OBSOLETE_ENUMS
# include <lz4frame.h>
# include <lz4.h>
#endif
char const* FIO_zlibVersion(void)
{
#if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
return zlibVersion();
#else
return "Unsupported";
#endif
}
char const* FIO_lz4Version(void)
{
#if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
/* LZ4_versionString() added in v1.7.3 */
# if LZ4_VERSION_NUMBER >= 10703
return LZ4_versionString();
# else
# define ZSTD_LZ4_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
# define ZSTD_LZ4_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LZ4_VERSION)
return ZSTD_LZ4_VERSION_STRING;
# endif
#else
return "Unsupported";
#endif
}
char const* FIO_lzmaVersion(void)
{
#if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
return lzma_version_string();
#else
return "Unsupported";
#endif
}
/*-*************************************
* Constants
***************************************/
#define ADAPT_WINDOWLOG_DEFAULT 23 /* 8 MB */
#define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */
#define FNSPACE 30
/* Default file permissions 0666 (modulated by umask) */
/* Temporary restricted file permissions are used when we're going to
* chmod/chown at the end of the operation. */
#if !defined(_WIN32)
/* These macros aren't defined on windows. */
#define DEFAULT_FILE_PERMISSIONS (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)
#define TEMPORARY_FILE_PERMISSIONS (S_IRUSR|S_IWUSR)
#else
#define DEFAULT_FILE_PERMISSIONS (0666)
#define TEMPORARY_FILE_PERMISSIONS (0600)
#endif
/*-************************************
* Signal (Ctrl-C trapping)
**************************************/
static const char* g_artefact = NULL;
static void INThandler(int sig)
{
assert(sig==SIGINT); (void)sig;
#if !defined(_MSC_VER)
signal(sig, SIG_IGN); /* this invocation generates a buggy warning in Visual Studio */
#endif
if (g_artefact) {
assert(UTIL_isRegularFile(g_artefact));
remove(g_artefact);
}
DISPLAY("\n");
exit(2);
}
static void addHandler(char const* dstFileName)
{
if (UTIL_isRegularFile(dstFileName)) {
g_artefact = dstFileName;
signal(SIGINT, INThandler);
} else {
g_artefact = NULL;
}
}
/* Idempotent */
static void clearHandler(void)
{
if (g_artefact) signal(SIGINT, SIG_DFL);
g_artefact = NULL;
}
/*-*********************************************************
* Termination signal trapping (Print debug stack trace)
***********************************************************/
#if defined(__has_feature) && !defined(BACKTRACE_ENABLE) /* Clang compiler */
# if (__has_feature(address_sanitizer))
# define BACKTRACE_ENABLE 0
# endif /* __has_feature(address_sanitizer) */
#elif defined(__SANITIZE_ADDRESS__) && !defined(BACKTRACE_ENABLE) /* GCC compiler */
# define BACKTRACE_ENABLE 0
#endif
#if !defined(BACKTRACE_ENABLE)
/* automatic detector : backtrace enabled by default on linux+glibc and osx */
# if (defined(__linux__) && (defined(__GLIBC__) && !defined(__UCLIBC__))) \
|| (defined(__APPLE__) && defined(__MACH__))
# define BACKTRACE_ENABLE 1
# else
# define BACKTRACE_ENABLE 0
# endif
#endif
/* note : after this point, BACKTRACE_ENABLE is necessarily defined */
#if BACKTRACE_ENABLE
#include <execinfo.h> /* backtrace, backtrace_symbols */
#define MAX_STACK_FRAMES 50
static void ABRThandler(int sig) {
const char* name;
void* addrlist[MAX_STACK_FRAMES];
char** symbollist;
int addrlen, i;
switch (sig) {
case SIGABRT: name = "SIGABRT"; break;
case SIGFPE: name = "SIGFPE"; break;
case SIGILL: name = "SIGILL"; break;
case SIGINT: name = "SIGINT"; break;
case SIGSEGV: name = "SIGSEGV"; break;
default: name = "UNKNOWN";
}
DISPLAY("Caught %s signal, printing stack:\n", name);
/* Retrieve current stack addresses. */
addrlen = backtrace(addrlist, MAX_STACK_FRAMES);
if (addrlen == 0) {
DISPLAY("\n");
return;
}
/* Create readable strings to each frame. */
symbollist = backtrace_symbols(addrlist, addrlen);
/* Print the stack trace, excluding calls handling the signal. */
for (i = ZSTD_START_SYMBOLLIST_FRAME; i < addrlen; i++) {
DISPLAY("%s\n", symbollist[i]);
}
free(symbollist);
/* Reset and raise the signal so default handler runs. */
signal(sig, SIG_DFL);
raise(sig);
}
#endif
void FIO_addAbortHandler(void)
{
#if BACKTRACE_ENABLE
signal(SIGABRT, ABRThandler);
signal(SIGFPE, ABRThandler);
signal(SIGILL, ABRThandler);
signal(SIGSEGV, ABRThandler);
signal(SIGBUS, ABRThandler);
#endif
}
/*-*************************************
* Parameters: FIO_ctx_t
***************************************/
/* typedef'd to FIO_ctx_t within fileio.h */
struct FIO_ctx_s {
/* file i/o info */
int nbFilesTotal;
int hasStdinInput;
int hasStdoutOutput;
/* file i/o state */
int currFileIdx;
int nbFilesProcessed;
size_t totalBytesInput;
size_t totalBytesOutput;
};
static int FIO_shouldDisplayFileSummary(FIO_ctx_t const* fCtx)
{
return fCtx->nbFilesTotal <= 1 || g_display_prefs.displayLevel >= 3;
}
static int FIO_shouldDisplayMultipleFileSummary(FIO_ctx_t const* fCtx)
{
int const shouldDisplay = (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1);
assert(shouldDisplay || FIO_shouldDisplayFileSummary(fCtx) || fCtx->nbFilesProcessed == 0);
return shouldDisplay;
}
/*-*************************************
* Parameters: Initialization
***************************************/
#define FIO_OVERLAP_LOG_NOTSET 9999
#define FIO_LDM_PARAM_NOTSET 9999
FIO_prefs_t* FIO_createPreferences(void)
{
FIO_prefs_t* const ret = (FIO_prefs_t*)malloc(sizeof(FIO_prefs_t));
if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
ret->compressionType = FIO_zstdCompression;
ret->overwrite = 0;
ret->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
ret->dictIDFlag = 1;
ret->checksumFlag = 1;
ret->removeSrcFile = 0;
ret->memLimit = 0;
ret->nbWorkers = 1;
ret->blockSize = 0;
ret->overlapLog = FIO_OVERLAP_LOG_NOTSET;
ret->adaptiveMode = 0;
ret->rsyncable = 0;
ret->minAdaptLevel = -50; /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */
ret->maxAdaptLevel = 22; /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */
ret->ldmFlag = 0;
ret->ldmHashLog = 0;
ret->ldmMinMatch = 0;
ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
ret->streamSrcSize = 0;
ret->targetCBlockSize = 0;
ret->srcSizeHint = 0;
ret->testMode = 0;
ret->literalCompressionMode = ZSTD_ps_auto;
ret->excludeCompressedFiles = 0;
ret->allowBlockDevices = 0;
ret->asyncIO = AIO_supported();
ret->passThrough = -1;
return ret;
}
FIO_ctx_t* FIO_createContext(void)
{
FIO_ctx_t* const ret = (FIO_ctx_t*)malloc(sizeof(FIO_ctx_t));
if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
ret->currFileIdx = 0;
ret->hasStdinInput = 0;
ret->hasStdoutOutput = 0;
ret->nbFilesTotal = 1;
ret->nbFilesProcessed = 0;
ret->totalBytesInput = 0;
ret->totalBytesOutput = 0;
return ret;
}
void FIO_freePreferences(FIO_prefs_t* const prefs)
{
free(prefs);
}
void FIO_freeContext(FIO_ctx_t* const fCtx)
{
free(fCtx);
}
/*-*************************************
* Parameters: Display Options
***************************************/
void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; }
void FIO_setProgressSetting(FIO_progressSetting_e setting) { g_display_prefs.progressSetting = setting; }
/*-*************************************
* Parameters: Setters
***************************************/
/* FIO_prefs_t functions */
void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType) { prefs->compressionType = compressionType; }
void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; }
void FIO_setSparseWrite(FIO_prefs_t* const prefs, int sparse) { prefs->sparseFileSupport = sparse; }
void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag) { prefs->dictIDFlag = dictIDFlag; }
void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag) { prefs->checksumFlag = checksumFlag; }
void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, int flag) { prefs->removeSrcFile = (flag!=0); }
void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit) { prefs->memLimit = memLimit; }
void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) {
#ifndef ZSTD_MULTITHREAD
if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n");
#endif
prefs->nbWorkers = nbWorkers;
}
void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; }
void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices) { prefs->allowBlockDevices = allowBlockDevices; }
void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) {
if (blockSize && prefs->nbWorkers==0)
DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n");
prefs->blockSize = blockSize;
}
void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog){
if (overlapLog && prefs->nbWorkers==0)
DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n");
prefs->overlapLog = overlapLog;
}
void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, int adapt) {
if ((adapt>0) && (prefs->nbWorkers==0))
EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n");
prefs->adaptiveMode = adapt;
}
void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder) {
prefs->useRowMatchFinder = useRowMatchFinder;
}
void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
if ((rsyncable>0) && (prefs->nbWorkers==0))
EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n");
prefs->rsyncable = rsyncable;
}
void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) {
prefs->streamSrcSize = streamSrcSize;
}
void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
prefs->targetCBlockSize = targetCBlockSize;
}
void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) {
prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint);
}
void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode) {
prefs->testMode = (testMode!=0);
}
void FIO_setLiteralCompressionMode(
FIO_prefs_t* const prefs,
ZSTD_paramSwitch_e mode) {
prefs->literalCompressionMode = mode;
}
void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel)
{
#ifndef ZSTD_NOCOMPRESS
assert(minCLevel >= ZSTD_minCLevel());
#endif
prefs->minAdaptLevel = minCLevel;
}
void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel)
{
prefs->maxAdaptLevel = maxCLevel;
}
void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag) {
prefs->ldmFlag = (ldmFlag>0);
}
void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog) {
prefs->ldmHashLog = ldmHashLog;
}
void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch) {
prefs->ldmMinMatch = ldmMinMatch;
}
void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog) {
prefs->ldmBucketSizeLog = ldmBucketSizeLog;
}
void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) {
prefs->ldmHashRateLog = ldmHashRateLog;
}
void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value)
{
prefs->patchFromMode = value != 0;
}
void FIO_setContentSize(FIO_prefs_t* const prefs, int value)
{
prefs->contentSize = value != 0;
}
void FIO_setAsyncIOFlag(FIO_prefs_t* const prefs, int value) {
#ifdef ZSTD_MULTITHREAD
prefs->asyncIO = value;
#else
(void) prefs;
(void) value;
DISPLAYLEVEL(2, "Note : asyncio is disabled (lack of multithreading support) \n");
#endif
}
void FIO_setPassThroughFlag(FIO_prefs_t* const prefs, int value) {
prefs->passThrough = (value != 0);
}
void FIO_setMMapDict(FIO_prefs_t* const prefs, ZSTD_paramSwitch_e value)
{
prefs->mmapDict = value;
}
/* FIO_ctx_t functions */
void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) {
fCtx->hasStdoutOutput = value;
}
void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value)
{
fCtx->nbFilesTotal = value;
}
void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames) {
size_t i = 0;
for ( ; i < filenames->tableSize; ++i) {
if (!strcmp(stdinmark, filenames->fileNames[i])) {
fCtx->hasStdinInput = 1;
return;
}
}
}
/*-*************************************
* Functions
***************************************/
/** FIO_removeFile() :
* @result : Unlink `fileName`, even if it's read-only */
static int FIO_removeFile(const char* path)
{
stat_t statbuf;
if (!UTIL_stat(path, &statbuf)) {
DISPLAYLEVEL(2, "zstd: Failed to stat %s while trying to remove it\n", path);
return 0;
}
if (!UTIL_isRegularFileStat(&statbuf)) {
DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s\n", path);
return 0;
}
#if defined(_WIN32)
/* windows doesn't allow remove read-only files,
* so try to make it writable first */
if (!(statbuf.st_mode & _S_IWRITE)) {
UTIL_chmod(path, &statbuf, _S_IWRITE);
}
#endif
return remove(path);
}
/** FIO_openSrcFile() :
* condition : `srcFileName` must be non-NULL. `prefs` may be NULL.
* @result : FILE* to `srcFileName`, or NULL if it fails */
static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName, stat_t* statbuf)
{
int allowBlockDevices = prefs != NULL ? prefs->allowBlockDevices : 0;
assert(srcFileName != NULL);
assert(statbuf != NULL);
if (!strcmp (srcFileName, stdinmark)) {
DISPLAYLEVEL(4,"Using stdin for input \n");
SET_BINARY_MODE(stdin);
return stdin;
}
if (!UTIL_stat(srcFileName, statbuf)) {
DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n",
srcFileName, strerror(errno));
return NULL;
}
if (!UTIL_isRegularFileStat(statbuf)
&& !UTIL_isFIFOStat(statbuf)
&& !(allowBlockDevices && UTIL_isBlockDevStat(statbuf))
) {
DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
srcFileName);
return NULL;
}
{ FILE* const f = fopen(srcFileName, "rb");
if (f == NULL)
DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
return f;
}
}
/** FIO_openDstFile() :
* condition : `dstFileName` must be non-NULL.
* @result : FILE* to `dstFileName`, or NULL if it fails */
static FILE*
FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
const char* srcFileName, const char* dstFileName,
const int mode)
{
int isDstRegFile;
if (prefs->testMode) return NULL; /* do not open file in test mode */
assert(dstFileName != NULL);
if (!strcmp (dstFileName, stdoutmark)) {
DISPLAYLEVEL(4,"Using stdout for output \n");
SET_BINARY_MODE(stdout);
if (prefs->sparseFileSupport == 1) {
prefs->sparseFileSupport = 0;
DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n");
}
return stdout;
}
/* ensure dst is not the same as src */
if (srcFileName != NULL && UTIL_isSameFile(srcFileName, dstFileName)) {
DISPLAYLEVEL(1, "zstd: Refusing to open an output file which will overwrite the input file \n");
return NULL;
}
isDstRegFile = UTIL_isRegularFile(dstFileName); /* invoke once */
if (prefs->sparseFileSupport == 1) {
prefs->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
if (!isDstRegFile) {
prefs->sparseFileSupport = 0;
DISPLAYLEVEL(4, "Sparse File Support is disabled when output is not a file \n");
}
}
if (isDstRegFile) {
/* Check if destination file already exists */
#if !defined(_WIN32)
/* this test does not work on Windows :
* `NUL` and `nul` are detected as regular files */
if (!strcmp(dstFileName, nulmark)) {
EXM_THROW(40, "%s is unexpectedly categorized as a regular file",
dstFileName);
}
#endif
if (!prefs->overwrite) {
if (g_display_prefs.displayLevel <= 1) {
/* No interaction possible */
DISPLAYLEVEL(1, "zstd: %s already exists; not overwritten \n",
dstFileName);
return NULL;
}
DISPLAY("zstd: %s already exists; ", dstFileName);
if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten \n", "yY", fCtx->hasStdinInput))
return NULL;
}
/* need to unlink */
FIO_removeFile(dstFileName);
}
{
#if defined(_WIN32)
/* Windows requires opening the file as a "binary" file to avoid
* mangling. This macro doesn't exist on unix. */
const int openflags = O_WRONLY|O_CREAT|O_TRUNC|O_BINARY;
const int fd = _open(dstFileName, openflags, mode);
FILE* f = NULL;
if (fd != -1) {
f = _fdopen(fd, "wb");
}
#else
const int openflags = O_WRONLY|O_CREAT|O_TRUNC;
const int fd = open(dstFileName, openflags, mode);
FILE* f = NULL;
if (fd != -1) {
f = fdopen(fd, "wb");
}
#endif
if (f == NULL) {
DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
} else {
/* An increased buffer size can provide a significant performance
* boost on some platforms. Note that providing a NULL buf with a
* size that's not 0 is not defined in ANSI C, but is defined in an
* extension. There are three possibilities here:
* 1. Libc supports the extended version and everything is good.
* 2. Libc ignores the size when buf is NULL, in which case
* everything will continue as if we didn't call `setvbuf()`.
* 3. We fail the call and execution continues but a warning
* message might be shown.
* In all cases due execution continues. For now, I believe that
* this is a more cost-effective solution than managing the buffers
* allocations ourselves (will require an API change).
*/
if (setvbuf(f, NULL, _IOFBF, 1 MB)) {
DISPLAYLEVEL(2, "Warning: setvbuf failed for %s\n", dstFileName);
}
}
return f;
}
}
/* FIO_getDictFileStat() :
*/
static void FIO_getDictFileStat(const char* fileName, stat_t* dictFileStat) {
assert(dictFileStat != NULL);
if (fileName == NULL) return;
if (!UTIL_stat(fileName, dictFileStat)) {
EXM_THROW(31, "Stat failed on dictionary file %s: %s", fileName, strerror(errno));
}
if (!UTIL_isRegularFileStat(dictFileStat)) {
EXM_THROW(32, "Dictionary %s must be a regular file.", fileName);
}
}
/* FIO_setDictBufferMalloc() :
* allocates a buffer, pointed by `dict->dictBuffer`,
* loads `filename` content into it, up to DICTSIZE_MAX bytes.
* @return : loaded size
* if fileName==NULL, returns 0 and a NULL pointer
*/
static size_t FIO_setDictBufferMalloc(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
{
FILE* fileHandle;
U64 fileSize;
void** bufferPtr = &dict->dictBuffer;
assert(bufferPtr != NULL);
assert(dictFileStat != NULL);
*bufferPtr = NULL;
if (fileName == NULL) return 0;
DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
fileHandle = fopen(fileName, "rb");
if (fileHandle == NULL) {
EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
}
fileSize = UTIL_getFileSizeStat(dictFileStat);
{
size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
if (fileSize > dictSizeMax) {
EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
}
}
*bufferPtr = malloc((size_t)fileSize);
if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
{ size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
if (readSize != fileSize) {
EXM_THROW(35, "Error reading dictionary file %s : %s",
fileName, strerror(errno));
}
}
fclose(fileHandle);
return (size_t)fileSize;
}
#if (PLATFORM_POSIX_VERSION > 0)
#include <sys/mman.h>
static void FIO_munmap(FIO_Dict_t* dict)
{
munmap(dict->dictBuffer, dict->dictBufferSize);
dict->dictBuffer = NULL;
dict->dictBufferSize = 0;
}
static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
{
int fileHandle;
U64 fileSize;
void** bufferPtr = &dict->dictBuffer;
assert(bufferPtr != NULL);
assert(dictFileStat != NULL);
*bufferPtr = NULL;
if (fileName == NULL) return 0;
DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
fileHandle = open(fileName, O_RDONLY);
if (fileHandle == -1) {
EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
}
fileSize = UTIL_getFileSizeStat(dictFileStat);
{
size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
if (fileSize > dictSizeMax) {
EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
}
}
*bufferPtr = mmap(NULL, (size_t)fileSize, PROT_READ, MAP_PRIVATE, fileHandle, 0);
if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
close(fileHandle);
return (size_t)fileSize;
}
#elif defined(_MSC_VER) || defined(_WIN32)
#include <windows.h>
static void FIO_munmap(FIO_Dict_t* dict)
{
UnmapViewOfFile(dict->dictBuffer);
CloseHandle(dict->dictHandle);
dict->dictBuffer = NULL;
dict->dictBufferSize = 0;
}
static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
{
HANDLE fileHandle, mapping;
U64 fileSize;
void** bufferPtr = &dict->dictBuffer;
assert(bufferPtr != NULL);
assert(dictFileStat != NULL);
*bufferPtr = NULL;
if (fileName == NULL) return 0;
DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
fileHandle = CreateFileA(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
if (fileHandle == INVALID_HANDLE_VALUE) {
EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
}
fileSize = UTIL_getFileSizeStat(dictFileStat);
{
size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
if (fileSize > dictSizeMax) {
EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
}
}
mapping = CreateFileMapping(fileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
if (mapping == NULL) {
EXM_THROW(35, "Couldn't map dictionary %s: %s", fileName, strerror(errno));
}
*bufferPtr = MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, (DWORD)fileSize); /* we can only cast to DWORD here because dictSize <= 2GB */
if (*bufferPtr==NULL) EXM_THROW(36, "%s", strerror(errno));
dict->dictHandle = fileHandle;
return (size_t)fileSize;
}
#else
static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
{
return FIO_setDictBufferMalloc(dict, fileName, prefs, dictFileStat);
}
static void FIO_munmap(FIO_Dict_t* dict) {
free(dict->dictBuffer);
dict->dictBuffer = NULL;
dict->dictBufferSize = 0;
}
#endif
static void FIO_freeDict(FIO_Dict_t* dict) {
if (dict->dictBufferType == FIO_mallocDict) {
free(dict->dictBuffer);
dict->dictBuffer = NULL;
dict->dictBufferSize = 0;
} else if (dict->dictBufferType == FIO_mmapDict) {
FIO_munmap(dict);
} else {
assert(0); /* Should not reach this case */
}
}
static void FIO_initDict(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat, FIO_dictBufferType_t dictBufferType) {
dict->dictBufferType = dictBufferType;
if (dict->dictBufferType == FIO_mallocDict) {
dict->dictBufferSize = FIO_setDictBufferMalloc(dict, fileName, prefs, dictFileStat);
} else if (dict->dictBufferType == FIO_mmapDict) {
dict->dictBufferSize = FIO_setDictBufferMMap(dict, fileName, prefs, dictFileStat);
} else {
assert(0); /* Should not reach this case */
}
}
/* FIO_checkFilenameCollisions() :
* Checks for and warns if there are any files that would have the same output path
*/
int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) {
const char **filenameTableSorted, *prevElem, *filename;
unsigned u;
filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles);
if (!filenameTableSorted) {
DISPLAYLEVEL(1, "Allocation error during filename collision checking \n");
return 1;
}
for (u = 0; u < nbFiles; ++u) {
filename = strrchr(filenameTable[u], PATH_SEP);
if (filename == NULL) {
filenameTableSorted[u] = filenameTable[u];
} else {
filenameTableSorted[u] = filename+1;
}
}
qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr);
prevElem = filenameTableSorted[0];
for (u = 1; u < nbFiles; ++u) {
if (strcmp(prevElem, filenameTableSorted[u]) == 0) {
DISPLAYLEVEL(2, "WARNING: Two files have same filename: %s\n", prevElem);
}
prevElem = filenameTableSorted[u];
}
free((void*)filenameTableSorted);
return 0;
}
static const char*
extractFilename(const char* path, char separator)
{
const char* search = strrchr(path, separator);
if (search == NULL) return path;
return search+1;
}
/* FIO_createFilename_fromOutDir() :
* Takes a source file name and specified output directory, and
* allocates memory for and returns a pointer to final path.
* This function never returns an error (it may abort() in case of pb)
*/
static char*
FIO_createFilename_fromOutDir(const char* path, const char* outDirName, const size_t suffixLen)
{
const char* filenameStart;
char separator;
char* result;
#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
separator = '\\';
#else
separator = '/';
#endif
filenameStart = extractFilename(path, separator);
#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
filenameStart = extractFilename(filenameStart, '/'); /* sometimes, '/' separator is also used on Windows (mingw+msys2) */
#endif
result = (char*) calloc(1, strlen(outDirName) + 1 + strlen(filenameStart) + suffixLen + 1);
if (!result) {
EXM_THROW(30, "zstd: FIO_createFilename_fromOutDir: %s", strerror(errno));
}
memcpy(result, outDirName, strlen(outDirName));
if (outDirName[strlen(outDirName)-1] == separator) {
memcpy(result + strlen(outDirName), filenameStart, strlen(filenameStart));
} else {
memcpy(result + strlen(outDirName), &separator, 1);
memcpy(result + strlen(outDirName) + 1, filenameStart, strlen(filenameStart));
}
return result;
}
/* FIO_highbit64() :
* gives position of highest bit.
* note : only works for v > 0 !
*/
static unsigned FIO_highbit64(unsigned long long v)
{
unsigned count = 0;
assert(v != 0);
v >>= 1;
while (v) { v >>= 1; count++; }
return count;
}
static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs,
unsigned long long const dictSize,
unsigned long long const maxSrcFileSize)
{
unsigned long long maxSize = MAX(prefs->memLimit, MAX(dictSize, maxSrcFileSize));
unsigned const maxWindowSize = (1U << ZSTD_WINDOWLOG_MAX);
if (maxSize == UTIL_FILESIZE_UNKNOWN)
EXM_THROW(42, "Using --patch-from with stdin requires --stream-size");
assert(maxSize != UTIL_FILESIZE_UNKNOWN);
if (maxSize > maxWindowSize)
EXM_THROW(42, "Can't handle files larger than %u GB\n", maxWindowSize/(1 GB));
FIO_setMemLimit(prefs, (unsigned)maxSize);
}
/* FIO_multiFilesConcatWarning() :
* This function handles logic when processing multiple files with -o or -c, displaying the appropriate warnings/prompts.
* Returns 1 if the console should abort, 0 if console should proceed.
*
* If output is stdout or test mode is active, check that `--rm` disabled.
*
* If there is just 1 file to process, zstd will proceed as usual.
* If each file get processed into its own separate destination file, proceed as usual.
*
* When multiple files are processed into a single output,
* display a warning message, then disable --rm if it's set.
*
* If -f is specified or if output is stdout, just proceed.
* If output is set with -o, prompt for confirmation.
*/
static int FIO_multiFilesConcatWarning(const FIO_ctx_t* fCtx, FIO_prefs_t* prefs, const char* outFileName, int displayLevelCutoff)
{
if (fCtx->hasStdoutOutput) {
if (prefs->removeSrcFile)
/* this should not happen ; hard fail, to protect user's data
* note: this should rather be an assert(), but we want to be certain that user's data will not be wiped out in case it nonetheless happen */
EXM_THROW(43, "It's not allowed to remove input files when processed output is piped to stdout. "
"This scenario is not supposed to be possible. "
"This is a programming error. File an issue for it to be fixed.");
}
if (prefs->testMode) {
if (prefs->removeSrcFile)
/* this should not happen ; hard fail, to protect user's data
* note: this should rather be an assert(), but we want to be certain that user's data will not be wiped out in case it nonetheless happen */
EXM_THROW(43, "Test mode shall not remove input files! "
"This scenario is not supposed to be possible. "
"This is a programming error. File an issue for it to be fixed.");
return 0;
}
if (fCtx->nbFilesTotal == 1) return 0;
assert(fCtx->nbFilesTotal > 1);
if (!outFileName) return 0;
if (fCtx->hasStdoutOutput) {
DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into stdout. \n");
} else {
DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into a single output file: %s \n", outFileName);
}
DISPLAYLEVEL(2, "The concatenated output CANNOT regenerate original file names nor directory structure. \n")
/* multi-input into single output : --rm is not allowed */
if (prefs->removeSrcFile) {
DISPLAYLEVEL(2, "Since it's a destructive operation, input files will not be removed. \n");
prefs->removeSrcFile = 0;
}
if (fCtx->hasStdoutOutput) return 0;
if (prefs->overwrite) return 0;
/* multiple files concatenated into single destination file using -o without -f */
if (g_display_prefs.displayLevel <= displayLevelCutoff) {
/* quiet mode => no prompt => fail automatically */
DISPLAYLEVEL(1, "Concatenating multiple processed inputs into a single output loses file metadata. \n");
DISPLAYLEVEL(1, "Aborting. \n");
return 1;
}
/* normal mode => prompt */
return UTIL_requireUserConfirmation("Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput);
}
static ZSTD_inBuffer setInBuffer(const void* buf, size_t s, size_t pos)
{
ZSTD_inBuffer i;
i.src = buf;
i.size = s;
i.pos = pos;
return i;
}
static ZSTD_outBuffer setOutBuffer(void* buf, size_t s, size_t pos)
{
ZSTD_outBuffer o;
o.dst = buf;
o.size = s;
o.pos = pos;
return o;
}
#ifndef ZSTD_NOCOMPRESS
/* **********************************************************************
* Compression
************************************************************************/
typedef struct {
FIO_Dict_t dict;
const char* dictFileName;
stat_t dictFileStat;
ZSTD_CStream* cctx;
WritePoolCtx_t *writeCtx;
ReadPoolCtx_t *readCtx;
} cRess_t;
/** ZSTD_cycleLog() :
* condition for correct operation : hashLog > 1 */
static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
{
U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
assert(hashLog > 1);
return hashLog - btScale;
}
static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs,
ZSTD_compressionParameters* comprParams,
unsigned long long const dictSize,
unsigned long long const maxSrcFileSize,
int cLevel)
{
unsigned const fileWindowLog = FIO_highbit64(maxSrcFileSize) + 1;
ZSTD_compressionParameters const cParams = ZSTD_getCParams(cLevel, (size_t)maxSrcFileSize, (size_t)dictSize);
FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, maxSrcFileSize);
if (fileWindowLog > ZSTD_WINDOWLOG_MAX)
DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n");
comprParams->windowLog = MAX(ZSTD_WINDOWLOG_MIN, MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog));
if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) {
if (!prefs->ldmFlag)
DISPLAYLEVEL(2, "long mode automatically triggered\n");
FIO_setLdmFlag(prefs, 1);
}
if (cParams.strategy >= ZSTD_btopt) {
DISPLAYLEVEL(3, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n");
DISPLAYLEVEL(3, "- Use --single-thread mode in the zstd cli\n");
DISPLAYLEVEL(3, "- Set a larger targetLength (e.g. --zstd=targetLength=4096)\n");
DISPLAYLEVEL(3, "- Set a larger chainLog (e.g. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX);
DISPLAYLEVEL(3, "Also consider playing around with searchLog and hashLog\n");
}
}
static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
const char* dictFileName, unsigned long long const maxSrcFileSize,
int cLevel, ZSTD_compressionParameters comprParams) {
int useMMap = prefs->mmapDict == ZSTD_ps_enable;
int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable;
FIO_dictBufferType_t dictBufferType;
cRess_t ress;
memset(&ress, 0, sizeof(ress));
DISPLAYLEVEL(6, "FIO_createCResources \n");
ress.cctx = ZSTD_createCCtx();
if (ress.cctx == NULL)
EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx",
strerror(errno));
FIO_getDictFileStat(dictFileName, &ress.dictFileStat);
/* need to update memLimit before calling createDictBuffer
* because of memLimit check inside it */
if (prefs->patchFromMode) {
U64 const dictSize = UTIL_getFileSizeStat(&ress.dictFileStat);
unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize;
useMMap |= dictSize > prefs->memLimit;
FIO_adjustParamsForPatchFromMode(prefs, &comprParams, dictSize, ssSize > 0 ? ssSize : maxSrcFileSize, cLevel);
}
dictBufferType = (useMMap && !forceNoUseMMap) ? FIO_mmapDict : FIO_mallocDict;
FIO_initDict(&ress.dict, dictFileName, prefs, &ress.dictFileStat, dictBufferType); /* works with dictFileName==NULL */
ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_CStreamOutSize());
ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_CStreamInSize());
/* Advanced parameters, including dictionary */
if (dictFileName && (ress.dict.dictBuffer==NULL))
EXM_THROW(32, "allocation error : can't create dictBuffer");
ress.dictFileName = dictFileName;
if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog)
comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT;
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, prefs->contentSize) ); /* always enable content size when available (note: supposed to be default) */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
/* compression level */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
/* max compressed block size */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
/* source size hint */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) );
/* long distance matching */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) );
if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) {
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) );
}
if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) {
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) );
}
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_useRowMatchFinder, prefs->useRowMatchFinder));
/* compression parameters */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, (int)comprParams.strategy) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) );
/* multi-threading */
#ifdef ZSTD_MULTITHREAD
DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers);
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) );
if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) {
DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog);
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) );
}
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
#endif
/* dictionary */
if (prefs->patchFromMode) {
CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dict.dictBuffer, ress.dict.dictBufferSize) );
} else {
CHECK( ZSTD_CCtx_loadDictionary_byReference(ress.cctx, ress.dict.dictBuffer, ress.dict.dictBufferSize) );
}
return ress;
}
static void FIO_freeCResources(cRess_t* const ress)
{
FIO_freeDict(&(ress->dict));
AIO_WritePool_free(ress->writeCtx);
AIO_ReadPool_free(ress->readCtx);
ZSTD_freeCStream(ress->cctx); /* never fails */
}
#ifdef ZSTD_GZCOMPRESS
static unsigned long long
FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but not changed */
const char* srcFileName, U64 const srcFileSize,
int compressionLevel, U64* readsize)
{
unsigned long long inFileSize = 0, outFileSize = 0;
z_stream strm;
IOJob_t *writeJob = NULL;
if (compressionLevel > Z_BEST_COMPRESSION)
compressionLevel = Z_BEST_COMPRESSION;
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
{ int const ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED,
15 /* maxWindowLogSize */ + 16 /* gzip only */,
8, Z_DEFAULT_STRATEGY); /* see https://www.zlib.net/manual.html */
if (ret != Z_OK) {
EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret);
} }
writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
strm.next_in = 0;
strm.avail_in = 0;
strm.next_out = (Bytef*)writeJob->buffer;
strm.avail_out = (uInt)writeJob->bufferSize;
while (1) {
int ret;
if (strm.avail_in == 0) {
AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize());
if (ress->readCtx->srcBufferLoaded == 0) break;
inFileSize += ress->readCtx->srcBufferLoaded;
strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer;
strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded;
}
{
size_t const availBefore = strm.avail_in;
ret = deflate(&strm, Z_NO_FLUSH);
AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in);
}
if (ret != Z_OK)
EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret);
{ size_t const cSize = writeJob->bufferSize - strm.avail_out;
if (cSize) {
writeJob->usedBufferSize = cSize;
AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
outFileSize += cSize;
strm.next_out = (Bytef*)writeJob->buffer;
strm.avail_out = (uInt)writeJob->bufferSize;
} }
if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
DISPLAYUPDATE_PROGRESS(
"\rRead : %u MB ==> %.2f%% ",
(unsigned)(inFileSize>>20),
(double)outFileSize/(double)inFileSize*100)
} else {
DISPLAYUPDATE_PROGRESS(
"\rRead : %u / %u MB ==> %.2f%% ",
(unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
(double)outFileSize/(double)inFileSize*100);
} }
while (1) {
int const ret = deflate(&strm, Z_FINISH);
{ size_t const cSize = writeJob->bufferSize - strm.avail_out;
if (cSize) {
writeJob->usedBufferSize = cSize;
AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
outFileSize += cSize;
strm.next_out = (Bytef*)writeJob->buffer;
strm.avail_out = (uInt)writeJob->bufferSize;
} }
if (ret == Z_STREAM_END) break;
if (ret != Z_BUF_ERROR)
EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret);
}
{ int const ret = deflateEnd(&strm);
if (ret != Z_OK) {
EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret);
} }
*readsize = inFileSize;
AIO_WritePool_releaseIoJob(writeJob);
AIO_WritePool_sparseWriteEnd(ress->writeCtx);
return outFileSize;
}
#endif
#ifdef ZSTD_LZMACOMPRESS
static unsigned long long
FIO_compressLzmaFrame(cRess_t* ress,
const char* srcFileName, U64 const srcFileSize,
int compressionLevel, U64* readsize, int plain_lzma)
{
unsigned long long inFileSize = 0, outFileSize = 0;
lzma_stream strm = LZMA_STREAM_INIT;
lzma_action action = LZMA_RUN;
lzma_ret ret;
IOJob_t *writeJob = NULL;
if (compressionLevel < 0) compressionLevel = 0;
if (compressionLevel > 9) compressionLevel = 9;
if (plain_lzma) {
lzma_options_lzma opt_lzma;
if (lzma_lzma_preset(&opt_lzma, compressionLevel))
EXM_THROW(81, "zstd: %s: lzma_lzma_preset error", srcFileName);
ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */
if (ret != LZMA_OK)
EXM_THROW(82, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret);
} else {
ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */
if (ret != LZMA_OK)
EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret);
}
writeJob =AIO_WritePool_acquireJob(ress->writeCtx);
strm.next_out = (BYTE*)writeJob->buffer;
strm.avail_out = writeJob->bufferSize;
strm.next_in = 0;
strm.avail_in = 0;
while (1) {
if (strm.avail_in == 0) {
size_t const inSize = AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize());
if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH;
inFileSize += inSize;
strm.next_in = (BYTE const*)ress->readCtx->srcBuffer;
strm.avail_in = ress->readCtx->srcBufferLoaded;
}
{
size_t const availBefore = strm.avail_in;
ret = lzma_code(&strm, action);
AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in);
}
if (ret != LZMA_OK && ret != LZMA_STREAM_END)
EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret);
{ size_t const compBytes = writeJob->bufferSize - strm.avail_out;
if (compBytes) {
writeJob->usedBufferSize = compBytes;
AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
outFileSize += compBytes;
strm.next_out = (BYTE*)writeJob->buffer;
strm.avail_out = writeJob->bufferSize;
} }
if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%",
(unsigned)(inFileSize>>20),
(double)outFileSize/(double)inFileSize*100)
else
DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%",
(unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
(double)outFileSize/(double)inFileSize*100);
if (ret == LZMA_STREAM_END) break;
}
lzma_end(&strm);
*readsize = inFileSize;
AIO_WritePool_releaseIoJob(writeJob);
AIO_WritePool_sparseWriteEnd(ress->writeCtx);
return outFileSize;
}
#endif
#ifdef ZSTD_LZ4COMPRESS
#if LZ4_VERSION_NUMBER <= 10600
#define LZ4F_blockLinked blockLinked
#define LZ4F_max64KB max64KB
#endif
static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
static unsigned long long
FIO_compressLz4Frame(cRess_t* ress,
const char* srcFileName, U64 const srcFileSize,
int compressionLevel, int checksumFlag,
U64* readsize)
{
const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB);
unsigned long long inFileSize = 0, outFileSize = 0;
LZ4F_preferences_t prefs;
LZ4F_compressionContext_t ctx;
IOJob_t* writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
if (LZ4F_isError(errorCode))
EXM_THROW(31, "zstd: failed to create lz4 compression context");
memset(&prefs, 0, sizeof(prefs));
assert(blockSize <= ress->readCtx->base.jobBufferSize);
/* autoflush off to mitigate a bug in lz4<=1.9.3 for compression level 12 */
prefs.autoFlush = 0;
prefs.compressionLevel = compressionLevel;
prefs.frameInfo.blockMode = LZ4F_blockLinked;
prefs.frameInfo.blockSizeID = LZ4F_max64KB;
prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)checksumFlag;
#if LZ4_VERSION_NUMBER >= 10600
prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize;
#endif
assert(LZ4F_compressBound(blockSize, &prefs) <= writeJob->bufferSize);
{
size_t headerSize = LZ4F_compressBegin(ctx, writeJob->buffer, writeJob->bufferSize, &prefs);
if (LZ4F_isError(headerSize))
EXM_THROW(33, "File header generation failed : %s",
LZ4F_getErrorName(headerSize));
writeJob->usedBufferSize = headerSize;
AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
outFileSize += headerSize;
/* Read first block */
inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
/* Main Loop */
while (ress->readCtx->srcBufferLoaded) {
size_t inSize = MIN(blockSize, ress->readCtx->srcBufferLoaded);
size_t const outSize = LZ4F_compressUpdate(ctx, writeJob->buffer, writeJob->bufferSize,
ress->readCtx->srcBuffer, inSize, NULL);
if (LZ4F_isError(outSize))
EXM_THROW(35, "zstd: %s: lz4 compression failed : %s",
srcFileName, LZ4F_getErrorName(outSize));
outFileSize += outSize;
if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%",
(unsigned)(inFileSize>>20),
(double)outFileSize/(double)inFileSize*100)
} else {
DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%",
(unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
(double)outFileSize/(double)inFileSize*100);
}
/* Write Block */
writeJob->usedBufferSize = outSize;
AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
/* Read next block */
AIO_ReadPool_consumeBytes(ress->readCtx, inSize);
inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
}
/* End of Stream mark */
headerSize = LZ4F_compressEnd(ctx, writeJob->buffer, writeJob->bufferSize, NULL);
if (LZ4F_isError(headerSize))
EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s",
srcFileName, LZ4F_getErrorName(headerSize));
writeJob->usedBufferSize = headerSize;
AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
outFileSize += headerSize;
}
*readsize = inFileSize;
LZ4F_freeCompressionContext(ctx);
AIO_WritePool_releaseIoJob(writeJob);
AIO_WritePool_sparseWriteEnd(ress->writeCtx);
return outFileSize;
}
#endif
static unsigned long long
FIO_compressZstdFrame(FIO_ctx_t* const fCtx,
FIO_prefs_t* const prefs,
const cRess_t* ressPtr,
const char* srcFileName, U64 fileSize,
int compressionLevel, U64* readsize)
{
cRess_t const ress = *ressPtr;
IOJob_t *writeJob = AIO_WritePool_acquireJob(ressPtr->writeCtx);
U64 compressedfilesize = 0;
ZSTD_EndDirective directive = ZSTD_e_continue;
U64 pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
/* stats */
ZSTD_frameProgression previous_zfp_update = { 0, 0, 0, 0, 0, 0 };
ZSTD_frameProgression previous_zfp_correction = { 0, 0, 0, 0, 0, 0 };
typedef enum { noChange, slower, faster } speedChange_e;
speedChange_e speedChange = noChange;
unsigned flushWaiting = 0;
unsigned inputPresented = 0;
unsigned inputBlocked = 0;
unsigned lastJobID = 0;
UTIL_time_t lastAdaptTime = UTIL_getTime();
U64 const adaptEveryMicro = REFRESH_RATE;
UTIL_HumanReadableSize_t const file_hrs = UTIL_makeHumanReadableSize(fileSize);
DISPLAYLEVEL(6, "compression using zstd format \n");
/* init */
if (fileSize != UTIL_FILESIZE_UNKNOWN) {
pledgedSrcSize = fileSize;
CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
} else if (prefs->streamSrcSize > 0) {
/* unknown source size; use the declared stream size */
pledgedSrcSize = prefs->streamSrcSize;
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
}
{
int windowLog;
UTIL_HumanReadableSize_t windowSize;
CHECK(ZSTD_CCtx_getParameter(ress.cctx, ZSTD_c_windowLog, &windowLog));
if (windowLog == 0) {
if (prefs->ldmFlag) {
/* If long mode is set without a window size libzstd will set this size internally */
windowLog = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
} else {
const ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, fileSize, 0);
windowLog = (int)cParams.windowLog;
}
}
windowSize = UTIL_makeHumanReadableSize(MAX(1ULL, MIN(1ULL << windowLog, pledgedSrcSize)));
DISPLAYLEVEL(4, "Decompression will require %.*f%s of memory\n", windowSize.precision, windowSize.value, windowSize.suffix);
}
(void)srcFileName;
/* Main compression loop */
do {
size_t stillToFlush;
/* Fill input Buffer */
size_t const inSize = AIO_ReadPool_fillBuffer(ress.readCtx, ZSTD_CStreamInSize());
ZSTD_inBuffer inBuff = setInBuffer( ress.readCtx->srcBuffer, ress.readCtx->srcBufferLoaded, 0 );
DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize);
*readsize += inSize;
if ((ress.readCtx->srcBufferLoaded == 0) || (*readsize == fileSize))
directive = ZSTD_e_end;
stillToFlush = 1;
while ((inBuff.pos != inBuff.size) /* input buffer must be entirely ingested */
|| (directive == ZSTD_e_end && stillToFlush != 0) ) {
size_t const oldIPos = inBuff.pos;
ZSTD_outBuffer outBuff = setOutBuffer( writeJob->buffer, writeJob->bufferSize, 0 );
size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx);
CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive));
AIO_ReadPool_consumeBytes(ress.readCtx, inBuff.pos - oldIPos);
/* count stats */
inputPresented++;
if (oldIPos == inBuff.pos) inputBlocked++; /* input buffer is full and can't take any more : input speed is faster than consumption rate */
if (!toFlushNow) flushWaiting = 1;
/* Write compressed stream */
DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n",
(unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos);
if (outBuff.pos) {
writeJob->usedBufferSize = outBuff.pos;
AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
compressedfilesize += outBuff.pos;
}
/* adaptive mode : statistics measurement and speed correction */
if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) {
ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
lastAdaptTime = UTIL_getTime();
/* check output speed */
if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */
unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced;
unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed;
assert(zfp.produced >= previous_zfp_update.produced);
assert(prefs->nbWorkers >= 1);
/* test if compression is blocked
* either because output is slow and all buffers are full
* or because input is slow and no job can start while waiting for at least one buffer to be filled.
* note : exclude starting part, since currentJobID > 1 */
if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/
&& (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */
) {
DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n")
speedChange = slower;
}
previous_zfp_update = zfp;
if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */
&& (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */
) {
DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed);
speedChange = slower;
}
flushWaiting = 0;
}
/* course correct only if there is at least one new job completed */
if (zfp.currentJobID > lastJobID) {
DISPLAYLEVEL(6, "compression level adaptation check \n")
/* check input speed */
if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */
if (inputBlocked <= 0) {
DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n");
speedChange = slower;
} else if (speedChange == noChange) {
unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested;
unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed;
unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced;
unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed;
previous_zfp_correction = zfp;
assert(inputPresented > 0);
DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n",
inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100,
(unsigned)newlyIngested, (unsigned)newlyConsumed,
(unsigned)newlyFlushed, (unsigned)newlyProduced);
if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */
&& (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */
&& (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */
) {
DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n",
newlyIngested, newlyConsumed, newlyProduced, newlyFlushed);
speedChange = faster;
}
}
inputBlocked = 0;
inputPresented = 0;
}
if (speedChange == slower) {
DISPLAYLEVEL(6, "slower speed , higher compression \n")
compressionLevel ++;
if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel();
if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel;
compressionLevel += (compressionLevel == 0); /* skip 0 */
ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
}
if (speedChange == faster) {
DISPLAYLEVEL(6, "faster speed , lighter compression \n")
compressionLevel --;
if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel;
compressionLevel -= (compressionLevel == 0); /* skip 0 */
ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
}
speedChange = noChange;
lastJobID = zfp.currentJobID;
} /* if (zfp.currentJobID > lastJobID) */
} /* if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) */
/* display notification */
if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) {
ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
double const cShare = (double)zfp.produced / (double)(zfp.consumed + !zfp.consumed/*avoid div0*/) * 100;
UTIL_HumanReadableSize_t const buffered_hrs = UTIL_makeHumanReadableSize(zfp.ingested - zfp.consumed);
UTIL_HumanReadableSize_t const consumed_hrs = UTIL_makeHumanReadableSize(zfp.consumed);
UTIL_HumanReadableSize_t const produced_hrs = UTIL_makeHumanReadableSize(zfp.produced);
DELAY_NEXT_UPDATE();
/* display progress notifications */
DISPLAY_PROGRESS("\r%79s\r", ""); /* Clear out the current displayed line */
if (g_display_prefs.displayLevel >= 3) {
/* Verbose progress update */
DISPLAY_PROGRESS(
"(L%i) Buffered:%5.*f%s - Consumed:%5.*f%s - Compressed:%5.*f%s => %.2f%% ",
compressionLevel,
buffered_hrs.precision, buffered_hrs.value, buffered_hrs.suffix,
consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix,
produced_hrs.precision, produced_hrs.value, produced_hrs.suffix,
cShare );
} else {
/* Require level 2 or forcibly displayed progress counter for summarized updates */
if (fCtx->nbFilesTotal > 1) {
size_t srcFileNameSize = strlen(srcFileName);
/* Ensure that the string we print is roughly the same size each time */
if (srcFileNameSize > 18) {
const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
DISPLAY_PROGRESS("Compress: %u/%u files. Current: ...%s ",
fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName);
} else {
DISPLAY_PROGRESS("Compress: %u/%u files. Current: %*s ",
fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName);
}
}
DISPLAY_PROGRESS("Read:%6.*f%4s ", consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix);
if (fileSize != UTIL_FILESIZE_UNKNOWN)
DISPLAY_PROGRESS("/%6.*f%4s", file_hrs.precision, file_hrs.value, file_hrs.suffix);
DISPLAY_PROGRESS(" ==> %2.f%%", cShare);
}
} /* if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) */
} /* while ((inBuff.pos != inBuff.size) */
} while (directive != ZSTD_e_end);
if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) {
EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B",
(unsigned long long)*readsize, (unsigned long long)fileSize);
}
AIO_WritePool_releaseIoJob(writeJob);
AIO_WritePool_sparseWriteEnd(ressPtr->writeCtx);
return compressedfilesize;
}
/*! FIO_compressFilename_internal() :
* same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
* @return : 0 : compression completed correctly,
* 1 : missing or pb opening srcFileName
*/
static int
FIO_compressFilename_internal(FIO_ctx_t* const fCtx,
FIO_prefs_t* const prefs,
cRess_t ress,
const char* dstFileName, const char* srcFileName,
int compressionLevel)
{
UTIL_time_t const timeStart = UTIL_getTime();
clock_t const cpuStart = clock();
U64 readsize = 0;
U64 compressedfilesize = 0;
U64 const fileSize = UTIL_getFileSize(srcFileName);
DISPLAYLEVEL(5, "%s: %llu bytes \n", srcFileName, (unsigned long long)fileSize);
/* compression format selection */
switch (prefs->compressionType) {
default:
case FIO_zstdCompression:
compressedfilesize = FIO_compressZstdFrame(fCtx, prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize);
break;
case FIO_gzipCompression:
#ifdef ZSTD_GZCOMPRESS
compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
#else
(void)compressionLevel;
EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n",
srcFileName);
#endif
break;
case FIO_xzCompression:
case FIO_lzmaCompression:
#ifdef ZSTD_LZMACOMPRESS
compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, prefs->compressionType==FIO_lzmaCompression);
#else
(void)compressionLevel;
EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n",
srcFileName);
#endif
break;
case FIO_lz4Compression:
#ifdef ZSTD_LZ4COMPRESS
compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, prefs->checksumFlag, &readsize);
#else
(void)compressionLevel;
EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n",
srcFileName);
#endif
break;
}
/* Status */
fCtx->totalBytesInput += (size_t)readsize;
fCtx->totalBytesOutput += (size_t)compressedfilesize;
DISPLAY_PROGRESS("\r%79s\r", "");
if (FIO_shouldDisplayFileSummary(fCtx)) {
UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) readsize);
UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) compressedfilesize);
if (readsize == 0) {
DISPLAY_SUMMARY("%-20s : (%6.*f%s => %6.*f%s, %s) \n",
srcFileName,
hr_isize.precision, hr_isize.value, hr_isize.suffix,
hr_osize.precision, hr_osize.value, hr_osize.suffix,
dstFileName);
} else {
DISPLAY_SUMMARY("%-20s :%6.2f%% (%6.*f%s => %6.*f%s, %s) \n",
srcFileName,
(double)compressedfilesize / (double)readsize * 100,
hr_isize.precision, hr_isize.value, hr_isize.suffix,
hr_osize.precision, hr_osize.value, hr_osize.suffix,
dstFileName);
}
}
/* Elapsed Time and CPU Load */
{ clock_t const cpuEnd = clock();
double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC;
U64 const timeLength_ns = UTIL_clockSpanNano(timeStart);
double const timeLength_s = (double)timeLength_ns / 1000000000;
double const cpuLoad_pct = (cpuLoad_s / timeLength_s) * 100;
DISPLAYLEVEL(4, "%-20s : Completed in %.2f sec (cpu load : %.0f%%)\n",
srcFileName, timeLength_s, cpuLoad_pct);
}
return 0;
}
/*! FIO_compressFilename_dstFile() :
* open dstFileName, or pass-through if ress.file != NULL,
* then start compression with FIO_compressFilename_internal().
* Manages source removal (--rm) and file permissions transfer.
* note : ress.srcFile must be != NULL,
* so reach this function through FIO_compressFilename_srcFile().
* @return : 0 : compression completed correctly,
* 1 : pb
*/
static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
FIO_prefs_t* const prefs,
cRess_t ress,
const char* dstFileName,
const char* srcFileName,
const stat_t* srcFileStat,
int compressionLevel)
{
int closeDstFile = 0;
int result;
int transferStat = 0;
int dstFd = -1;
assert(AIO_ReadPool_getFile(ress.readCtx) != NULL);
if (AIO_WritePool_getFile(ress.writeCtx) == NULL) {
int dstFileInitialPermissions = DEFAULT_FILE_PERMISSIONS;
if ( strcmp (srcFileName, stdinmark)
&& strcmp (dstFileName, stdoutmark)
&& UTIL_isRegularFileStat(srcFileStat) ) {
transferStat = 1;
dstFileInitialPermissions = TEMPORARY_FILE_PERMISSIONS;
}
closeDstFile = 1;
DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName);
{ FILE *dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFileInitialPermissions);
if (dstFile==NULL) return 1; /* could not open dstFileName */
dstFd = fileno(dstFile);
AIO_WritePool_setFile(ress.writeCtx, dstFile);
}
/* Must only be added after FIO_openDstFile() succeeds.
* Otherwise we may delete the destination file if it already exists,
* and the user presses Ctrl-C when asked if they wish to overwrite.
*/
addHandler(dstFileName);
}
result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
if (closeDstFile) {
clearHandler();
if (transferStat) {
UTIL_setFDStat(dstFd, dstFileName, srcFileStat);
}
DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName);
if (AIO_WritePool_closeFile(ress.writeCtx)) { /* error closing file */
DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
result=1;
}
if (transferStat) {
UTIL_utime(dstFileName, srcFileStat);
}
if ( (result != 0) /* operation failure */
&& strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
) {
FIO_removeFile(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */
}
}
return result;
}
/* List used to compare file extensions (used with --exclude-compressed flag)
* Different from the suffixList and should only apply to ZSTD compress operationResult
*/
static const char *compressedFileExtensions[] = {
ZSTD_EXTENSION,
TZSTD_EXTENSION,
GZ_EXTENSION,
TGZ_EXTENSION,
LZMA_EXTENSION,
XZ_EXTENSION,
TXZ_EXTENSION,
LZ4_EXTENSION,
TLZ4_EXTENSION,
".7z",
".aa3",
".aac",
".aar",
".ace",
".alac",
".ape",
".apk",
".apng",
".arc",
".archive",
".arj",
".ark",
".asf",
".avi",
".avif",
".ba",
".br",
".bz2",
".cab",
".cdx",
".chm",
".cr2",
".divx",
".dmg",
".dng",
".docm",
".docx",
".dotm",
".dotx",
".dsft",
".ear",
".eftx",
".emz",
".eot",
".epub",
".f4v",
".flac",
".flv",
".gho",
".gif",
".gifv",
".gnp",
".iso",
".jar",
".jpeg",
".jpg",
".jxl",
".lz",
".lzh",
".m4a",
".m4v",
".mkv",
".mov",
".mp2",
".mp3",
".mp4",
".mpa",
".mpc",
".mpe",
".mpeg",
".mpg",
".mpl",
".mpv",
".msi",
".odp",
".ods",
".odt",
".ogg",
".ogv",
".otp",
".ots",
".ott",
".pea",
".png",
".pptx",
".qt",
".rar",
".s7z",
".sfx",
".sit",
".sitx",
".sqx",
".svgz",
".swf",
".tbz2",
".tib",
".tlz",
".vob",
".war",
".webm",
".webp",
".wma",
".wmv",
".woff",
".woff2",
".wvl",
".xlsx",
".xpi",
".xps",
".zip",
".zipx",
".zoo",
".zpaq",
NULL
};
/*! FIO_compressFilename_srcFile() :
* @return : 0 : compression completed correctly,
* 1 : missing or pb opening srcFileName
*/
static int
FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx,
FIO_prefs_t* const prefs,
cRess_t ress,
const char* dstFileName,
const char* srcFileName,
int compressionLevel)
{
int result;
FILE* srcFile;
stat_t srcFileStat;
U64 fileSize = UTIL_FILESIZE_UNKNOWN;
DISPLAYLEVEL(6, "FIO_compressFilename_srcFile: %s \n", srcFileName);
if (strcmp(srcFileName, stdinmark)) {
if (UTIL_stat(srcFileName, &srcFileStat)) {
/* failure to stat at all is handled during opening */
/* ensure src is not a directory */
if (UTIL_isDirectoryStat(&srcFileStat)) {
DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
return 1;
}
/* ensure src is not the same as dict (if present) */
if (ress.dictFileName != NULL && UTIL_isSameFileStat(srcFileName, ress.dictFileName, &srcFileStat, &ress.dictFileStat)) {
DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName);
return 1;
}
}
}
/* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used
* YES => ZSTD will skip compression of the file and will return 0.
* NO => ZSTD will resume with compress operation.
*/
if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) {
DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName);
return 0;
}
srcFile = FIO_openSrcFile(prefs, srcFileName, &srcFileStat);
if (srcFile == NULL) return 1; /* srcFile could not be opened */
/* Don't use AsyncIO for small files */
if (strcmp(srcFileName, stdinmark)) /* Stdin doesn't have stats */
fileSize = UTIL_getFileSizeStat(&srcFileStat);
if(fileSize != UTIL_FILESIZE_UNKNOWN && fileSize < ZSTD_BLOCKSIZE_MAX * 3) {
AIO_ReadPool_setAsync(ress.readCtx, 0);
AIO_WritePool_setAsync(ress.writeCtx, 0);
} else {
AIO_ReadPool_setAsync(ress.readCtx, 1);
AIO_WritePool_setAsync(ress.writeCtx, 1);
}
AIO_ReadPool_setFile(ress.readCtx, srcFile);
result = FIO_compressFilename_dstFile(
fCtx, prefs, ress,
dstFileName, srcFileName,
&srcFileStat, compressionLevel);
AIO_ReadPool_closeFile(ress.readCtx);
if ( prefs->removeSrcFile /* --rm */
&& result == 0 /* success */
&& strcmp(srcFileName, stdinmark) /* exception : don't erase stdin */
) {
/* We must clear the handler, since after this point calling it would
* delete both the source and destination files.
*/
clearHandler();
if (FIO_removeFile(srcFileName))
EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno));
}
return result;
}
static const char*
checked_index(const char* options[], size_t length, size_t index) {
assert(index < length);
/* Necessary to avoid warnings since -O3 will omit the above `assert` */
(void) length;
return options[index];
}
#define INDEX(options, index) checked_index((options), sizeof(options) / sizeof(char*), (size_t)(index))
void FIO_displayCompressionParameters(const FIO_prefs_t* prefs)
{
static const char* formatOptions[5] = {ZSTD_EXTENSION, GZ_EXTENSION, XZ_EXTENSION,
LZMA_EXTENSION, LZ4_EXTENSION};
static const char* sparseOptions[3] = {" --no-sparse", "", " --sparse"};
static const char* checkSumOptions[3] = {" --no-check", "", " --check"};
static const char* rowMatchFinderOptions[3] = {"", " --no-row-match-finder", " --row-match-finder"};
static const char* compressLiteralsOptions[3] = {"", " --compress-literals", " --no-compress-literals"};
assert(g_display_prefs.displayLevel >= 4);
DISPLAY("--format=%s", formatOptions[prefs->compressionType]);
DISPLAY("%s", INDEX(sparseOptions, prefs->sparseFileSupport));
DISPLAY("%s", prefs->dictIDFlag ? "" : " --no-dictID");
DISPLAY("%s", INDEX(checkSumOptions, prefs->checksumFlag));
DISPLAY(" --block-size=%d", prefs->blockSize);
if (prefs->adaptiveMode)
DISPLAY(" --adapt=min=%d,max=%d", prefs->minAdaptLevel, prefs->maxAdaptLevel);
DISPLAY("%s", INDEX(rowMatchFinderOptions, prefs->useRowMatchFinder));
DISPLAY("%s", prefs->rsyncable ? " --rsyncable" : "");
if (prefs->streamSrcSize)
DISPLAY(" --stream-size=%u", (unsigned) prefs->streamSrcSize);
if (prefs->srcSizeHint)
DISPLAY(" --size-hint=%d", prefs->srcSizeHint);
if (prefs->targetCBlockSize)
DISPLAY(" --target-compressed-block-size=%u", (unsigned) prefs->targetCBlockSize);
DISPLAY("%s", INDEX(compressLiteralsOptions, prefs->literalCompressionMode));
DISPLAY(" --memory=%u", prefs->memLimit ? prefs->memLimit : 128 MB);
DISPLAY(" --threads=%d", prefs->nbWorkers);
DISPLAY("%s", prefs->excludeCompressedFiles ? " --exclude-compressed" : "");
DISPLAY(" --%scontent-size", prefs->contentSize ? "" : "no-");
DISPLAY("\n");
}
#undef INDEX
int FIO_compressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const char* dstFileName,
const char* srcFileName, const char* dictFileName,
int compressionLevel, ZSTD_compressionParameters comprParams)
{
cRess_t ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams);
int const result = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
#define DISPLAY_LEVEL_DEFAULT 2
FIO_freeCResources(&ress);
return result;
}
/* FIO_determineCompressedName() :
* create a destination filename for compressed srcFileName.
* @return a pointer to it.
* This function never returns an error (it may abort() in case of pb)
*/
static const char*
FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix)
{
static size_t dfnbCapacity = 0;
static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
char* outDirFilename = NULL;
size_t sfnSize = strlen(srcFileName);
size_t const srcSuffixLen = strlen(suffix);
if(!strcmp(srcFileName, stdinmark)) {
return stdoutmark;
}
if (outDirName) {
outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen);
sfnSize = strlen(outDirFilename);
assert(outDirFilename != NULL);
}
if (dfnbCapacity <= sfnSize+srcSuffixLen+1) {
/* resize buffer for dstName */
free(dstFileNameBuffer);
dfnbCapacity = sfnSize + srcSuffixLen + 30;
dstFileNameBuffer = (char*)malloc(dfnbCapacity);
if (!dstFileNameBuffer) {
EXM_THROW(30, "zstd: %s", strerror(errno));
}
}
assert(dstFileNameBuffer != NULL);
if (outDirFilename) {
memcpy(dstFileNameBuffer, outDirFilename, sfnSize);
free(outDirFilename);
} else {
memcpy(dstFileNameBuffer, srcFileName, sfnSize);
}
memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */);
return dstFileNameBuffer;
}
static unsigned long long FIO_getLargestFileSize(const char** inFileNames, unsigned nbFiles)
{
size_t i;
unsigned long long fileSize, maxFileSize = 0;
for (i = 0; i < nbFiles; i++) {
fileSize = UTIL_getFileSize(inFileNames[i]);
maxFileSize = fileSize > maxFileSize ? fileSize : maxFileSize;
}
return maxFileSize;
}
/* FIO_compressMultipleFilenames() :
* compress nbFiles files
* into either one destination (outFileName),
* or into one file each (outFileName == NULL, but suffix != NULL),
* or into a destination folder (specified with -O)
*/
int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx,
FIO_prefs_t* const prefs,
const char** inFileNamesTable,
const char* outMirroredRootDirName,
const char* outDirName,
const char* outFileName, const char* suffix,
const char* dictFileName, int compressionLevel,
ZSTD_compressionParameters comprParams)
{
int status;
int error = 0;
cRess_t ress = FIO_createCResources(prefs, dictFileName,
FIO_getLargestFileSize(inFileNamesTable, (unsigned)fCtx->nbFilesTotal),
compressionLevel, comprParams);
/* init */
assert(outFileName != NULL || suffix != NULL);
if (outFileName != NULL) { /* output into a single destination (stdout typically) */
FILE *dstFile;
if (FIO_multiFilesConcatWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
FIO_freeCResources(&ress);
return 1;
}
dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
if (dstFile == NULL) { /* could not open outFileName */
error = 1;
} else {
AIO_WritePool_setFile(ress.writeCtx, dstFile);
for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
status = FIO_compressFilename_srcFile(fCtx, prefs, ress, outFileName, inFileNamesTable[fCtx->currFileIdx], compressionLevel);
if (!status) fCtx->nbFilesProcessed++;
error |= status;
}
if (AIO_WritePool_closeFile(ress.writeCtx))
EXM_THROW(29, "Write error (%s) : cannot properly close %s",
strerror(errno), outFileName);
}
} else {
if (outMirroredRootDirName)
UTIL_mirrorSourceFilesDirectories(inFileNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
const char* const srcFileName = inFileNamesTable[fCtx->currFileIdx];
const char* dstFileName = NULL;
if (outMirroredRootDirName) {
char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName);
if (validMirroredDirName) {
dstFileName = FIO_determineCompressedName(srcFileName, validMirroredDirName, suffix);
free(validMirroredDirName);
} else {
DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot compress '%s' into '%s' \n", srcFileName, outMirroredRootDirName);
error=1;
continue;
}
} else {
dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */
}
status = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
if (!status) fCtx->nbFilesProcessed++;
error |= status;
}
if (outDirName)
FIO_checkFilenameCollisions(inFileNamesTable , (unsigned)fCtx->nbFilesTotal);
}
if (FIO_shouldDisplayMultipleFileSummary(fCtx)) {
UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesInput);
UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesOutput);
DISPLAY_PROGRESS("\r%79s\r", "");
if (fCtx->totalBytesInput == 0) {
DISPLAY_SUMMARY("%3d files compressed : (%6.*f%4s => %6.*f%4s)\n",
fCtx->nbFilesProcessed,
hr_isize.precision, hr_isize.value, hr_isize.suffix,
hr_osize.precision, hr_osize.value, hr_osize.suffix);
} else {
DISPLAY_SUMMARY("%3d files compressed : %.2f%% (%6.*f%4s => %6.*f%4s)\n",
fCtx->nbFilesProcessed,
(double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100,
hr_isize.precision, hr_isize.value, hr_isize.suffix,
hr_osize.precision, hr_osize.value, hr_osize.suffix);
}
}
FIO_freeCResources(&ress);
return error;
}
#endif /* #ifndef ZSTD_NOCOMPRESS */
#ifndef ZSTD_NODECOMPRESS
/* **************************************************************************
* Decompression
***************************************************************************/
typedef struct {
FIO_Dict_t dict;
ZSTD_DStream* dctx;
WritePoolCtx_t *writeCtx;
ReadPoolCtx_t *readCtx;
} dRess_t;
static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName)
{
int useMMap = prefs->mmapDict == ZSTD_ps_enable;
int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable;
stat_t statbuf;
dRess_t ress;
memset(&statbuf, 0, sizeof(statbuf));
memset(&ress, 0, sizeof(ress));
FIO_getDictFileStat(dictFileName, &statbuf);
if (prefs->patchFromMode){
U64 const dictSize = UTIL_getFileSizeStat(&statbuf);
useMMap |= dictSize > prefs->memLimit;
FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, 0 /* just use the dict size */);
}
/* Allocation */
ress.dctx = ZSTD_createDStream();
if (ress.dctx==NULL)
EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno));
CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) );
CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag));
/* dictionary */
{
FIO_dictBufferType_t dictBufferType = (useMMap && !forceNoUseMMap) ? FIO_mmapDict : FIO_mallocDict;
FIO_initDict(&ress.dict, dictFileName, prefs, &statbuf, dictBufferType);
CHECK(ZSTD_DCtx_reset(ress.dctx, ZSTD_reset_session_only) );
if (prefs->patchFromMode){
CHECK(ZSTD_DCtx_refPrefix(ress.dctx, ress.dict.dictBuffer, ress.dict.dictBufferSize));
} else {
CHECK(ZSTD_DCtx_loadDictionary_byReference(ress.dctx, ress.dict.dictBuffer, ress.dict.dictBufferSize));
}
}
ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_DStreamOutSize());
ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_DStreamInSize());
return ress;
}
static void FIO_freeDResources(dRess_t ress)
{
FIO_freeDict(&(ress.dict));
CHECK( ZSTD_freeDStream(ress.dctx) );
AIO_WritePool_free(ress.writeCtx);
AIO_ReadPool_free(ress.readCtx);
}
/* FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode
* @return : 0 (no error) */
static int FIO_passThrough(dRess_t *ress)
{
size_t const blockSize = MIN(MIN(64 KB, ZSTD_DStreamInSize()), ZSTD_DStreamOutSize());
IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
while(ress->readCtx->srcBufferLoaded) {
size_t writeSize;
writeSize = MIN(blockSize, ress->readCtx->srcBufferLoaded);
assert(writeSize <= writeJob->bufferSize);
memcpy(writeJob->buffer, ress->readCtx->srcBuffer, writeSize);
writeJob->usedBufferSize = writeSize;
AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
AIO_ReadPool_consumeBytes(ress->readCtx, writeSize);
AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
}
assert(ress->readCtx->reachedEof);
AIO_WritePool_releaseIoJob(writeJob);
AIO_WritePool_sparseWriteEnd(ress->writeCtx);
return 0;
}
/* FIO_zstdErrorHelp() :
* detailed error message when requested window size is too large */
static void
FIO_zstdErrorHelp(const FIO_prefs_t* const prefs,
const dRess_t* ress,
size_t err,
const char* srcFileName)
{
ZSTD_frameHeader header;
/* Help message only for one specific error */
if (ZSTD_getErrorCode(err) != ZSTD_error_frameParameter_windowTooLarge)
return;
/* Try to decode the frame header */
err = ZSTD_getFrameHeader(&header, ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded);
if (err == 0) {
unsigned long long const windowSize = header.windowSize;
unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0);
assert(prefs->memLimit > 0);
DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u \n",
srcFileName, windowSize, prefs->memLimit);
if (windowLog <= ZSTD_WINDOWLOG_MAX) {
unsigned const windowMB = (unsigned)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0));
assert(windowSize < (U64)(1ULL << 52)); /* ensure now overflow for windowMB */
DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB \n",
srcFileName, windowLog, windowMB);
return;
} }
DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u; not supported \n",
srcFileName, ZSTD_WINDOWLOG_MAX);
}
/** FIO_decompressFrame() :
* @return : size of decoded zstd frame, or an error code
*/
#define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2))
static unsigned long long
FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress,
const FIO_prefs_t* const prefs,
const char* srcFileName,
U64 alreadyDecoded) /* for multi-frames streams */
{
U64 frameSize = 0;
IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
/* display last 20 characters only when not --verbose */
{ size_t const srcFileLength = strlen(srcFileName);
if ((srcFileLength>20) && (g_display_prefs.displayLevel<3))
srcFileName += srcFileLength-20;
}
ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only);
/* Header loading : ensures ZSTD_getFrameHeader() will succeed */
AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_FRAMEHEADERSIZE_MAX);
/* Main decompression Loop */
while (1) {
ZSTD_inBuffer inBuff = setInBuffer( ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded, 0 );
ZSTD_outBuffer outBuff= setOutBuffer( writeJob->buffer, writeJob->bufferSize, 0 );
size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff);
UTIL_HumanReadableSize_t const hrs = UTIL_makeHumanReadableSize(alreadyDecoded+frameSize);
if (ZSTD_isError(readSizeHint)) {
DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n",
srcFileName, ZSTD_getErrorName(readSizeHint));
FIO_zstdErrorHelp(prefs, ress, readSizeHint, srcFileName);
AIO_WritePool_releaseIoJob(writeJob);
return FIO_ERROR_FRAME_DECODING;
}
/* Write block */
writeJob->usedBufferSize = outBuff.pos;
AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
frameSize += outBuff.pos;
if (fCtx->nbFilesTotal > 1) {
size_t srcFileNameSize = strlen(srcFileName);
if (srcFileNameSize > 18) {
const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
DISPLAYUPDATE_PROGRESS(
"\rDecompress: %2u/%2u files. Current: ...%s : %.*f%s... ",
fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix);
} else {
DISPLAYUPDATE_PROGRESS("\rDecompress: %2u/%2u files. Current: %s : %.*f%s... ",
fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, hrs.precision, hrs.value, hrs.suffix);
}
} else {
DISPLAYUPDATE_PROGRESS("\r%-20.20s : %.*f%s... ",
srcFileName, hrs.precision, hrs.value, hrs.suffix);
}
AIO_ReadPool_consumeBytes(ress->readCtx, inBuff.pos);
if (readSizeHint == 0) break; /* end of frame */
/* Fill input buffer */
{ size_t const toDecode = MIN(readSizeHint, ZSTD_DStreamInSize()); /* support large skippable frames */
if (ress->readCtx->srcBufferLoaded < toDecode) {
size_t const readSize = AIO_ReadPool_fillBuffer(ress->readCtx, toDecode);
if (readSize==0) {
DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n",
srcFileName);
AIO_WritePool_releaseIoJob(writeJob);
return FIO_ERROR_FRAME_DECODING;
}
} } }
AIO_WritePool_releaseIoJob(writeJob);
AIO_WritePool_sparseWriteEnd(ress->writeCtx);
return frameSize;
}
#ifdef ZSTD_GZDECOMPRESS
static unsigned long long
FIO_decompressGzFrame(dRess_t* ress, const char* srcFileName)
{
unsigned long long outFileSize = 0;
z_stream strm;
int flush = Z_NO_FLUSH;
int decodingError = 0;
IOJob_t *writeJob = NULL;
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.next_in = 0;
strm.avail_in = 0;
/* see https://www.zlib.net/manual.html */
if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK)
return FIO_ERROR_FRAME_DECODING;
writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
strm.next_out = (Bytef*)writeJob->buffer;
strm.avail_out = (uInt)writeJob->bufferSize;
strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded;
strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer;
for ( ; ; ) {
int ret;
if (strm.avail_in == 0) {
AIO_ReadPool_consumeAndRefill(ress->readCtx);
if (ress->readCtx->srcBufferLoaded == 0) flush = Z_FINISH;
strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer;
strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded;
}
ret = inflate(&strm, flush);
if (ret == Z_BUF_ERROR) {
DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName);
decodingError = 1; break;
}
if (ret != Z_OK && ret != Z_STREAM_END) {
DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret);
decodingError = 1; break;
}
{ size_t const decompBytes = writeJob->bufferSize - strm.avail_out;
if (decompBytes) {
writeJob->usedBufferSize = decompBytes;
AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
outFileSize += decompBytes;
strm.next_out = (Bytef*)writeJob->buffer;
strm.avail_out = (uInt)writeJob->bufferSize;
}
}
if (ret == Z_STREAM_END) break;
}
AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in);
if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */
&& (decodingError==0) ) {
DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName);
decodingError = 1;
}
AIO_WritePool_releaseIoJob(writeJob);
AIO_WritePool_sparseWriteEnd(ress->writeCtx);
return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
}
#endif
#ifdef ZSTD_LZMADECOMPRESS
static unsigned long long
FIO_decompressLzmaFrame(dRess_t* ress,
const char* srcFileName, int plain_lzma)
{
unsigned long long outFileSize = 0;
lzma_stream strm = LZMA_STREAM_INIT;
lzma_action action = LZMA_RUN;
lzma_ret initRet;
int decodingError = 0;
IOJob_t *writeJob = NULL;
strm.next_in = 0;
strm.avail_in = 0;
if (plain_lzma) {
initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */
} else {
initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */
}
if (initRet != LZMA_OK) {
DISPLAYLEVEL(1, "zstd: %s: %s error %d \n",
plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder",
srcFileName, initRet);
return FIO_ERROR_FRAME_DECODING;
}
writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
strm.next_out = (BYTE*)writeJob->buffer;
strm.avail_out = writeJob->bufferSize;
strm.next_in = (BYTE const*)ress->readCtx->srcBuffer;
strm.avail_in = ress->readCtx->srcBufferLoaded;
for ( ; ; ) {
lzma_ret ret;
if (strm.avail_in == 0) {
AIO_ReadPool_consumeAndRefill(ress->readCtx);
if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH;
strm.next_in = (BYTE const*)ress->readCtx->srcBuffer;
strm.avail_in = ress->readCtx->srcBufferLoaded;
}
ret = lzma_code(&strm, action);
if (ret == LZMA_BUF_ERROR) {
DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName);
decodingError = 1; break;
}
if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n",
srcFileName, ret);
decodingError = 1; break;
}
{ size_t const decompBytes = writeJob->bufferSize - strm.avail_out;
if (decompBytes) {
writeJob->usedBufferSize = decompBytes;
AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
outFileSize += decompBytes;
strm.next_out = (BYTE*)writeJob->buffer;
strm.avail_out = writeJob->bufferSize;
} }
if (ret == LZMA_STREAM_END) break;
}
AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in);
lzma_end(&strm);
AIO_WritePool_releaseIoJob(writeJob);
AIO_WritePool_sparseWriteEnd(ress->writeCtx);
return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
}
#endif
#ifdef ZSTD_LZ4DECOMPRESS
static unsigned long long
FIO_decompressLz4Frame(dRess_t* ress, const char* srcFileName)
{
unsigned long long filesize = 0;
LZ4F_errorCode_t nextToLoad = 4;
LZ4F_decompressionContext_t dCtx;
LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
int decodingError = 0;
IOJob_t *writeJob = NULL;
if (LZ4F_isError(errorCode)) {
DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n");
return FIO_ERROR_FRAME_DECODING;
}
writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
/* Main Loop */
for (;nextToLoad;) {
size_t pos = 0;
size_t decodedBytes = writeJob->bufferSize;
int fullBufferDecoded = 0;
/* Read input */
AIO_ReadPool_fillBuffer(ress->readCtx, nextToLoad);
if(!ress->readCtx->srcBufferLoaded) break; /* reached end of file */
while ((pos < ress->readCtx->srcBufferLoaded) || fullBufferDecoded) { /* still to read, or still to flush */
/* Decode Input (at least partially) */
size_t remaining = ress->readCtx->srcBufferLoaded - pos;
decodedBytes = writeJob->bufferSize;
nextToLoad = LZ4F_decompress(dCtx, writeJob->buffer, &decodedBytes, (char*)(ress->readCtx->srcBuffer)+pos,
&remaining, NULL);
if (LZ4F_isError(nextToLoad)) {
DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n",
srcFileName, LZ4F_getErrorName(nextToLoad));
decodingError = 1; nextToLoad = 0; break;
}
pos += remaining;
assert(pos <= ress->readCtx->srcBufferLoaded);
fullBufferDecoded = decodedBytes == writeJob->bufferSize;
/* Write Block */
if (decodedBytes) {
UTIL_HumanReadableSize_t hrs;
writeJob->usedBufferSize = decodedBytes;
AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
filesize += decodedBytes;
hrs = UTIL_makeHumanReadableSize(filesize);
DISPLAYUPDATE_PROGRESS("\rDecompressed : %.*f%s ", hrs.precision, hrs.value, hrs.suffix);
}
if (!nextToLoad) break;
}
AIO_ReadPool_consumeBytes(ress->readCtx, pos);
}
if (nextToLoad!=0) {
DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName);
decodingError=1;
}
LZ4F_freeDecompressionContext(dCtx);
AIO_WritePool_releaseIoJob(writeJob);
AIO_WritePool_sparseWriteEnd(ress->writeCtx);
return decodingError ? FIO_ERROR_FRAME_DECODING : filesize;
}
#endif
/** FIO_decompressFrames() :
* Find and decode frames inside srcFile
* srcFile presumed opened and valid
* @return : 0 : OK
* 1 : error
*/
static int FIO_decompressFrames(FIO_ctx_t* const fCtx,
dRess_t ress, const FIO_prefs_t* const prefs,
const char* dstFileName, const char* srcFileName)
{
unsigned readSomething = 0;
unsigned long long filesize = 0;
int passThrough = prefs->passThrough;
if (passThrough == -1) {
/* If pass-through mode is not explicitly enabled or disabled,
* default to the legacy behavior of enabling it if we are writing
* to stdout with the overwrite flag enabled.
*/
passThrough = prefs->overwrite && !strcmp(dstFileName, stdoutmark);
}
assert(passThrough == 0 || passThrough == 1);
/* for each frame */
for ( ; ; ) {
/* check magic number -> version */
size_t const toRead = 4;
const BYTE* buf;
AIO_ReadPool_fillBuffer(ress.readCtx, toRead);
buf = (const BYTE*)ress.readCtx->srcBuffer;
if (ress.readCtx->srcBufferLoaded==0) {
if (readSomething==0) { /* srcFile is empty (which is invalid) */
DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName);
return 1;
} /* else, just reached frame boundary */
break; /* no more input */
}
readSomething = 1; /* there is at least 1 byte in srcFile */
if (ress.readCtx->srcBufferLoaded < toRead) { /* not enough input to check magic number */
if (passThrough) {
return FIO_passThrough(&ress);
}
DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName);
return 1;
}
if (ZSTD_isFrame(buf, ress.readCtx->srcBufferLoaded)) {
unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, prefs, srcFileName, filesize);
if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
filesize += frameSize;
} else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */
#ifdef ZSTD_GZDECOMPRESS
unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFileName);
if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
filesize += frameSize;
#else
DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName);
return 1;
#endif
} else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */
|| (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */
#ifdef ZSTD_LZMADECOMPRESS
unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFileName, buf[0] != 0xFD);
if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
filesize += frameSize;
#else
DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName);
return 1;
#endif
} else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) {
#ifdef ZSTD_LZ4DECOMPRESS
unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFileName);
if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
filesize += frameSize;
#else
DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName);
return 1;
#endif
} else if (passThrough) {
return FIO_passThrough(&ress);
} else {
DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName);
return 1;
} } /* for each frame */
/* Final Status */
fCtx->totalBytesOutput += (size_t)filesize;
DISPLAY_PROGRESS("\r%79s\r", "");
if (FIO_shouldDisplayFileSummary(fCtx))
DISPLAY_SUMMARY("%-20s: %llu bytes \n", srcFileName, filesize);
return 0;
}
/** FIO_decompressDstFile() :
open `dstFileName`, or pass-through if writeCtx's file is already != 0,
then start decompression process (FIO_decompressFrames()).
@return : 0 : OK
1 : operation aborted
*/
static int FIO_decompressDstFile(FIO_ctx_t* const fCtx,
FIO_prefs_t* const prefs,
dRess_t ress,
const char* dstFileName,
const char* srcFileName,
const stat_t* srcFileStat)
{
int result;
int releaseDstFile = 0;
int transferStat = 0;
int dstFd = 0;
if ((AIO_WritePool_getFile(ress.writeCtx) == NULL) && (prefs->testMode == 0)) {
FILE *dstFile;
int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
if ( strcmp(srcFileName, stdinmark) /* special case : don't transfer permissions from stdin */
&& strcmp(dstFileName, stdoutmark)
&& UTIL_isRegularFileStat(srcFileStat) ) {
transferStat = 1;
dstFilePermissions = TEMPORARY_FILE_PERMISSIONS;
}
releaseDstFile = 1;
dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
if (dstFile==NULL) return 1;
dstFd = fileno(dstFile);
AIO_WritePool_setFile(ress.writeCtx, dstFile);
/* Must only be added after FIO_openDstFile() succeeds.
* Otherwise we may delete the destination file if it already exists,
* and the user presses Ctrl-C when asked if they wish to overwrite.
*/
addHandler(dstFileName);
}
result = FIO_decompressFrames(fCtx, ress, prefs, dstFileName, srcFileName);
if (releaseDstFile) {
clearHandler();
if (transferStat) {
UTIL_setFDStat(dstFd, dstFileName, srcFileStat);
}
if (AIO_WritePool_closeFile(ress.writeCtx)) {
DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
result = 1;
}
if (transferStat) {
UTIL_utime(dstFileName, srcFileStat);
}
if ( (result != 0) /* operation failure */
&& strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
) {
FIO_removeFile(dstFileName); /* remove decompression artefact; note: don't do anything special if remove() fails */
}
}
return result;
}
/** FIO_decompressSrcFile() :
Open `srcFileName`, transfer control to decompressDstFile()
@return : 0 : OK
1 : error
*/
static int FIO_decompressSrcFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName)
{
FILE* srcFile;
stat_t srcFileStat;
int result;
U64 fileSize = UTIL_FILESIZE_UNKNOWN;
if (UTIL_isDirectory(srcFileName)) {
DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
return 1;
}
srcFile = FIO_openSrcFile(prefs, srcFileName, &srcFileStat);
if (srcFile==NULL) return 1;
/* Don't use AsyncIO for small files */
if (strcmp(srcFileName, stdinmark)) /* Stdin doesn't have stats */
fileSize = UTIL_getFileSizeStat(&srcFileStat);
if(fileSize != UTIL_FILESIZE_UNKNOWN && fileSize < ZSTD_BLOCKSIZE_MAX * 3) {
AIO_ReadPool_setAsync(ress.readCtx, 0);
AIO_WritePool_setAsync(ress.writeCtx, 0);
} else {
AIO_ReadPool_setAsync(ress.readCtx, 1);
AIO_WritePool_setAsync(ress.writeCtx, 1);
}
AIO_ReadPool_setFile(ress.readCtx, srcFile);
result = FIO_decompressDstFile(fCtx, prefs, ress, dstFileName, srcFileName, &srcFileStat);
AIO_ReadPool_setFile(ress.readCtx, NULL);
/* Close file */
if (fclose(srcFile)) {
DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); /* error should not happen */
return 1;
}
if ( prefs->removeSrcFile /* --rm */
&& (result==0) /* decompression successful */
&& strcmp(srcFileName, stdinmark) ) /* not stdin */ {
/* We must clear the handler, since after this point calling it would
* delete both the source and destination files.
*/
clearHandler();
if (FIO_removeFile(srcFileName)) {
/* failed to remove src file */
DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
return 1;
} }
return result;
}
int FIO_decompressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs,
const char* dstFileName, const char* srcFileName,
const char* dictFileName)
{
dRess_t const ress = FIO_createDResources(prefs, dictFileName);
int const decodingError = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
FIO_freeDResources(ress);
return decodingError;
}
static const char *suffixList[] = {
ZSTD_EXTENSION,
TZSTD_EXTENSION,
#ifndef ZSTD_NODECOMPRESS
ZSTD_ALT_EXTENSION,
#endif
#ifdef ZSTD_GZDECOMPRESS
GZ_EXTENSION,
TGZ_EXTENSION,
#endif
#ifdef ZSTD_LZMADECOMPRESS
LZMA_EXTENSION,
XZ_EXTENSION,
TXZ_EXTENSION,
#endif
#ifdef ZSTD_LZ4DECOMPRESS
LZ4_EXTENSION,
TLZ4_EXTENSION,
#endif
NULL
};
static const char *suffixListStr =
ZSTD_EXTENSION "/" TZSTD_EXTENSION
#ifdef ZSTD_GZDECOMPRESS
"/" GZ_EXTENSION "/" TGZ_EXTENSION
#endif
#ifdef ZSTD_LZMADECOMPRESS
"/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION
#endif
#ifdef ZSTD_LZ4DECOMPRESS
"/" LZ4_EXTENSION "/" TLZ4_EXTENSION
#endif
;
/* FIO_determineDstName() :
* create a destination filename from a srcFileName.
* @return a pointer to it.
* @return == NULL if there is an error */
static const char*
FIO_determineDstName(const char* srcFileName, const char* outDirName)
{
static size_t dfnbCapacity = 0;
static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
size_t dstFileNameEndPos;
char* outDirFilename = NULL;
const char* dstSuffix = "";
size_t dstSuffixLen = 0;
size_t sfnSize = strlen(srcFileName);
size_t srcSuffixLen;
const char* const srcSuffix = strrchr(srcFileName, '.');
if(!strcmp(srcFileName, stdinmark)) {
return stdoutmark;
}
if (srcSuffix == NULL) {
DISPLAYLEVEL(1,
"zstd: %s: unknown suffix (%s expected). "
"Can't derive the output file name. "
"Specify it with -o dstFileName. Ignoring.\n",
srcFileName, suffixListStr);
return NULL;
}
srcSuffixLen = strlen(srcSuffix);
{
const char** matchedSuffixPtr;
for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) {
if (!strcmp(*matchedSuffixPtr, srcSuffix)) {
break;
}
}
/* check suffix is authorized */
if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) {
DISPLAYLEVEL(1,
"zstd: %s: unknown suffix (%s expected). "
"Can't derive the output file name. "
"Specify it with -o dstFileName. Ignoring.\n",
srcFileName, suffixListStr);
return NULL;
}
if ((*matchedSuffixPtr)[1] == 't') {
dstSuffix = ".tar";
dstSuffixLen = strlen(dstSuffix);
}
}
if (outDirName) {
outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0);
sfnSize = strlen(outDirFilename);
assert(outDirFilename != NULL);
}
if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) {
/* allocate enough space to write dstFilename into it */
free(dstFileNameBuffer);
dfnbCapacity = sfnSize + 20;
dstFileNameBuffer = (char*)malloc(dfnbCapacity);
if (dstFileNameBuffer==NULL)
EXM_THROW(74, "%s : not enough memory for dstFileName",
strerror(errno));
}
/* return dst name == src name truncated from suffix */
assert(dstFileNameBuffer != NULL);
dstFileNameEndPos = sfnSize - srcSuffixLen;
if (outDirFilename) {
memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos);
free(outDirFilename);
} else {
memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos);
}
/* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar"
* extension on decompression. Also writes terminating null. */
strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix);
return dstFileNameBuffer;
/* note : dstFileNameBuffer memory is not going to be free */
}
int
FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx,
FIO_prefs_t* const prefs,
const char** srcNamesTable,
const char* outMirroredRootDirName,
const char* outDirName, const char* outFileName,
const char* dictFileName)
{
int status;
int error = 0;
dRess_t ress = FIO_createDResources(prefs, dictFileName);
if (outFileName) {
if (FIO_multiFilesConcatWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
FIO_freeDResources(ress);
return 1;
}
if (!prefs->testMode) {
FILE* dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
if (dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName);
AIO_WritePool_setFile(ress.writeCtx, dstFile);
}
for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) {
status = FIO_decompressSrcFile(fCtx, prefs, ress, outFileName, srcNamesTable[fCtx->currFileIdx]);
if (!status) fCtx->nbFilesProcessed++;
error |= status;
}
if ((!prefs->testMode) && (AIO_WritePool_closeFile(ress.writeCtx)))
EXM_THROW(72, "Write error : %s : cannot properly close output file",
strerror(errno));
} else {
if (outMirroredRootDirName)
UTIL_mirrorSourceFilesDirectories(srcNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { /* create dstFileName */
const char* const srcFileName = srcNamesTable[fCtx->currFileIdx];
const char* dstFileName = NULL;
if (outMirroredRootDirName) {
char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName);
if (validMirroredDirName) {
dstFileName = FIO_determineDstName(srcFileName, validMirroredDirName);
free(validMirroredDirName);
} else {
DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot decompress '%s' into '%s'\n", srcFileName, outMirroredRootDirName);
}
} else {
dstFileName = FIO_determineDstName(srcFileName, outDirName);
}
if (dstFileName == NULL) { error=1; continue; }
status = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
if (!status) fCtx->nbFilesProcessed++;
error |= status;
}
if (outDirName)
FIO_checkFilenameCollisions(srcNamesTable , (unsigned)fCtx->nbFilesTotal);
}
if (FIO_shouldDisplayMultipleFileSummary(fCtx)) {
DISPLAY_PROGRESS("\r%79s\r", "");
DISPLAY_SUMMARY("%d files decompressed : %6llu bytes total \n",
fCtx->nbFilesProcessed, (unsigned long long)fCtx->totalBytesOutput);
}
FIO_freeDResources(ress);
return error;
}
/* **************************************************************************
* .zst file info (--list command)
***************************************************************************/
typedef struct {
U64 decompressedSize;
U64 compressedSize;
U64 windowSize;
int numActualFrames;
int numSkippableFrames;
int decompUnavailable;
int usesCheck;
BYTE checksum[4];
U32 nbFiles;
unsigned dictID;
} fileInfo_t;
typedef enum {
info_success=0,
info_frame_error=1,
info_not_zstd=2,
info_file_error=3,
info_truncated_input=4
} InfoError;
#define ERROR_IF(c,n,...) { \
if (c) { \
DISPLAYLEVEL(1, __VA_ARGS__); \
DISPLAYLEVEL(1, " \n"); \
return n; \
} \
}
static InfoError
FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile)
{
/* begin analyzing frame */
for ( ; ; ) {
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile);
if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN(ZSTD_f_zstd1)) {
if ( feof(srcFile)
&& (numBytesRead == 0)
&& (info->compressedSize > 0)
&& (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) {
unsigned long long file_position = (unsigned long long) LONG_TELL(srcFile);
unsigned long long file_size = (unsigned long long) info->compressedSize;
ERROR_IF(file_position != file_size, info_truncated_input,
"Error: seeked to position %llu, which is beyond file size of %llu\n",
file_position,
file_size);
break; /* correct end of file => success */
}
ERROR_IF(feof(srcFile), info_not_zstd, "Error: reached end of file with incomplete frame");
ERROR_IF(1, info_frame_error, "Error: did not reach end of file but ran out of frames");
}
{ U32 const magicNumber = MEM_readLE32(headerBuffer);
/* Zstandard frame */
if (magicNumber == ZSTD_MAGICNUMBER) {
ZSTD_frameHeader header;
U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead);
if ( frameContentSize == ZSTD_CONTENTSIZE_ERROR
|| frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN ) {
info->decompUnavailable = 1;
} else {
info->decompressedSize += frameContentSize;
}
ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0,
info_frame_error, "Error: could not decode frame header");
if (info->dictID != 0 && info->dictID != header.dictID) {
DISPLAY("WARNING: File contains multiple frames with different dictionary IDs. Showing dictID 0 instead");
info->dictID = 0;
} else {
info->dictID = header.dictID;
}
info->windowSize = header.windowSize;
/* move to the end of the frame header */
{ size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead);
ERROR_IF(ZSTD_isError(headerSize), info_frame_error, "Error: could not determine frame header size");
ERROR_IF(fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR) != 0,
info_frame_error, "Error: could not move to end of frame header");
}
/* skip all blocks in the frame */
{ int lastBlock = 0;
do {
BYTE blockHeaderBuffer[3];
ERROR_IF(fread(blockHeaderBuffer, 1, 3, srcFile) != 3,
info_frame_error, "Error while reading block header");
{ U32 const blockHeader = MEM_readLE24(blockHeaderBuffer);
U32 const blockTypeID = (blockHeader >> 1) & 3;
U32 const isRLE = (blockTypeID == 1);
U32 const isWrongBlock = (blockTypeID == 3);
long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3);
ERROR_IF(isWrongBlock, info_frame_error, "Error: unsupported block type");
lastBlock = blockHeader & 1;
ERROR_IF(fseek(srcFile, blockSize, SEEK_CUR) != 0,
info_frame_error, "Error: could not skip to end of block");
}
} while (lastBlock != 1);
}
/* check if checksum is used */
{ BYTE const frameHeaderDescriptor = headerBuffer[4];
int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2;
if (contentChecksumFlag) {
info->usesCheck = 1;
ERROR_IF(fread(info->checksum, 1, 4, srcFile) != 4,
info_frame_error, "Error: could not read checksum");
} }
info->numActualFrames++;
}
/* Skippable frame */
else if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
U32 const frameSize = MEM_readLE32(headerBuffer + 4);
long const seek = (long)(8 + frameSize - numBytesRead);
ERROR_IF(LONG_SEEK(srcFile, seek, SEEK_CUR) != 0,
info_frame_error, "Error: could not find end of skippable frame");
info->numSkippableFrames++;
}
/* unknown content */
else {
return info_not_zstd;
}
} /* magic number analysis */
} /* end analyzing frames */
return info_success;
}
static InfoError
getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName)
{
InfoError status;
stat_t srcFileStat;
FILE* const srcFile = FIO_openSrcFile(NULL, inFileName, &srcFileStat);
ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName);
info->compressedSize = UTIL_getFileSizeStat(&srcFileStat);
status = FIO_analyzeFrames(info, srcFile);
fclose(srcFile);
info->nbFiles = 1;
return status;
}
/** getFileInfo() :
* Reads information from file, stores in *info
* @return : InfoError status
*/
static InfoError
getFileInfo(fileInfo_t* info, const char* srcFileName)
{
ERROR_IF(!UTIL_isRegularFile(srcFileName),
info_file_error, "Error : %s is not a file", srcFileName);
return getFileInfo_fileConfirmed(info, srcFileName);
}
static void
displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel)
{
UTIL_HumanReadableSize_t const window_hrs = UTIL_makeHumanReadableSize(info->windowSize);
UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(info->compressedSize);
UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(info->decompressedSize);
double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/(double)info->compressedSize;
const char* const checkString = (info->usesCheck ? "XXH64" : "None");
if (displayLevel <= 2) {
if (!info->decompUnavailable) {
DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %s\n",
info->numSkippableFrames + info->numActualFrames,
info->numSkippableFrames,
compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
ratio, checkString, inFileName);
} else {
DISPLAYOUT("%6d %5d %6.*f%4s %5s %s\n",
info->numSkippableFrames + info->numActualFrames,
info->numSkippableFrames,
compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
checkString, inFileName);
}
} else {
DISPLAYOUT("%s \n", inFileName);
DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames);
if (info->numSkippableFrames)
DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames);
DISPLAYOUT("DictID: %u\n", info->dictID);
DISPLAYOUT("Window Size: %.*f%s (%llu B)\n",
window_hrs.precision, window_hrs.value, window_hrs.suffix,
(unsigned long long)info->windowSize);
DISPLAYOUT("Compressed Size: %.*f%s (%llu B)\n",
compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
(unsigned long long)info->compressedSize);
if (!info->decompUnavailable) {
DISPLAYOUT("Decompressed Size: %.*f%s (%llu B)\n",
decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
(unsigned long long)info->decompressedSize);
DISPLAYOUT("Ratio: %.4f\n", ratio);
}
if (info->usesCheck && info->numActualFrames == 1) {
DISPLAYOUT("Check: %s %02x%02x%02x%02x\n", checkString,
info->checksum[3], info->checksum[2],
info->checksum[1], info->checksum[0]
);
} else {
DISPLAYOUT("Check: %s\n", checkString);
}
DISPLAYOUT("\n");
}
}
static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2)
{
fileInfo_t total;
memset(&total, 0, sizeof(total));
total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames;
total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames;
total.compressedSize = fi1.compressedSize + fi2.compressedSize;
total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize;
total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable;
total.usesCheck = fi1.usesCheck & fi2.usesCheck;
total.nbFiles = fi1.nbFiles + fi2.nbFiles;
return total;
}
static int
FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel)
{
fileInfo_t info;
memset(&info, 0, sizeof(info));
{ InfoError const error = getFileInfo(&info, inFileName);
switch (error) {
case info_frame_error:
/* display error, but provide output */
DISPLAYLEVEL(1, "Error while parsing \"%s\" \n", inFileName);
break;
case info_not_zstd:
DISPLAYOUT("File \"%s\" not compressed by zstd \n", inFileName);
if (displayLevel > 2) DISPLAYOUT("\n");
return 1;
case info_file_error:
/* error occurred while opening the file */
if (displayLevel > 2) DISPLAYOUT("\n");
return 1;
case info_truncated_input:
DISPLAYOUT("File \"%s\" is truncated \n", inFileName);
if (displayLevel > 2) DISPLAYOUT("\n");
return 1;
case info_success:
default:
break;
}
displayInfo(inFileName, &info, displayLevel);
*total = FIO_addFInfo(*total, info);
assert(error == info_success || error == info_frame_error);
return (int)error;
}
}
int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel)
{
/* ensure no specified input is stdin (needs fseek() capability) */
{ unsigned u;
for (u=0; u<numFiles;u++) {
ERROR_IF(!strcmp (filenameTable[u], stdinmark),
1, "zstd: --list does not support reading from standard input");
} }
if (numFiles == 0) {
if (!UTIL_isConsole(stdin)) {
DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n");
}
DISPLAYLEVEL(1, "No files given \n");
return 1;
}
if (displayLevel <= 2) {
DISPLAYOUT("Frames Skips Compressed Uncompressed Ratio Check Filename\n");
}
{ int error = 0;
fileInfo_t total;
memset(&total, 0, sizeof(total));
total.usesCheck = 1;
/* --list each file, and check for any error */
{ unsigned u;
for (u=0; u<numFiles;u++) {
error |= FIO_listFile(&total, filenameTable[u], displayLevel);
} }
if (numFiles > 1 && displayLevel <= 2) { /* display total */
UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(total.compressedSize);
UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(total.decompressedSize);
double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/(double)total.compressedSize;
const char* const checkString = (total.usesCheck ? "XXH64" : "");
DISPLAYOUT("----------------------------------------------------------------- \n");
if (total.decompUnavailable) {
DISPLAYOUT("%6d %5d %6.*f%4s %5s %u files\n",
total.numSkippableFrames + total.numActualFrames,
total.numSkippableFrames,
compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
checkString, (unsigned)total.nbFiles);
} else {
DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %u files\n",
total.numSkippableFrames + total.numActualFrames,
total.numSkippableFrames,
compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
ratio, checkString, (unsigned)total.nbFiles);
} }
return error;
}
}
#endif /* #ifndef ZSTD_NODECOMPRESS */