mirror of
https://github.com/facebook/zstd.git
synced 2024-11-23 20:16:44 +08:00
Merge pull request #1855 from shashank0791/shtavi-filter-precompressed-files
Added "--exclude-compressed" flag feature
This commit is contained in:
commit
4ef97aaea7
@ -319,6 +319,8 @@ struct FIO_prefs_s {
|
||||
/* Computation resources preferences */
|
||||
unsigned memLimit;
|
||||
int nbWorkers;
|
||||
|
||||
int excludeCompressedFiles;
|
||||
};
|
||||
|
||||
|
||||
@ -359,6 +361,7 @@ FIO_prefs_t* FIO_createPreferences(void)
|
||||
ret->srcSizeHint = 0;
|
||||
ret->testMode = 0;
|
||||
ret->literalCompressionMode = ZSTD_lcm_auto;
|
||||
ret->excludeCompressedFiles = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -402,6 +405,8 @@ void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) {
|
||||
prefs->nbWorkers = nbWorkers;
|
||||
}
|
||||
|
||||
void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; }
|
||||
|
||||
void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) {
|
||||
if (blockSize && prefs->nbWorkers==0)
|
||||
DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n");
|
||||
@ -1425,6 +1430,21 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs,
|
||||
return result;
|
||||
}
|
||||
|
||||
/* List used to compare file extensions (used with --exclude-compressed flag)
|
||||
* Different from the suffixList and should only apply to ZSTD compress operationResult
|
||||
*/
|
||||
static const char *compressedFileExtensions[] = {
|
||||
ZSTD_EXTENSION,
|
||||
TZSTD_EXTENSION,
|
||||
GZ_EXTENSION,
|
||||
TGZ_EXTENSION,
|
||||
LZMA_EXTENSION,
|
||||
XZ_EXTENSION,
|
||||
TXZ_EXTENSION,
|
||||
LZ4_EXTENSION,
|
||||
TLZ4_EXTENSION,
|
||||
NULL
|
||||
};
|
||||
|
||||
/*! FIO_compressFilename_srcFile() :
|
||||
* @return : 0 : compression completed correctly,
|
||||
@ -1451,6 +1471,15 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs,
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used
|
||||
* YES => ZSTD will skip compression of the file and will return 0.
|
||||
* NO => ZSTD will resume with compress operation.
|
||||
*/
|
||||
if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) {
|
||||
DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ress.srcFile = FIO_openSrcFile(srcFileName);
|
||||
if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */
|
||||
|
||||
|
@ -93,6 +93,7 @@ void FIO_setLiteralCompressionMode(
|
||||
|
||||
void FIO_setNoProgress(unsigned noProgress);
|
||||
void FIO_setNotificationLevel(int level);
|
||||
void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles);
|
||||
|
||||
/*-*************************************
|
||||
* Single File functions
|
||||
|
@ -326,6 +326,27 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
|
||||
|
||||
#endif /* #ifdef _WIN32 */
|
||||
|
||||
int UTIL_isCompressedFile(const char *inputName, const char *extensionList[])
|
||||
{
|
||||
const char* ext = UTIL_getFileExtension(inputName);
|
||||
while(*extensionList!=NULL)
|
||||
{
|
||||
const int isCompressedExtension = strcmp(ext,*extensionList);
|
||||
if(isCompressedExtension==0)
|
||||
return 1;
|
||||
++extensionList;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*Utility function to get file extension from file */
|
||||
const char* UTIL_getFileExtension(const char* infilename)
|
||||
{
|
||||
const char* extension = strrchr(infilename, '.');
|
||||
if(!extension || extension==infilename) return "";
|
||||
return extension;
|
||||
}
|
||||
|
||||
/*
|
||||
* UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories,
|
||||
* and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb).
|
||||
|
@ -40,7 +40,6 @@ extern "C" {
|
||||
#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */
|
||||
#include "mem.h" /* U32, U64 */
|
||||
|
||||
|
||||
/*-************************************************************
|
||||
* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
|
||||
***************************************************************/
|
||||
@ -135,6 +134,8 @@ U32 UTIL_isDirectory(const char* infilename);
|
||||
int UTIL_getFileStat(const char* infilename, stat_t* statbuf);
|
||||
int UTIL_isSameFile(const char* file1, const char* file2);
|
||||
int UTIL_compareStr(const void *p1, const void *p2);
|
||||
int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]);
|
||||
const char* UTIL_getFileExtension(const char* infilename);
|
||||
|
||||
U32 UTIL_isFIFO(const char* infilename);
|
||||
U32 UTIL_isLink(const char* infilename);
|
||||
|
@ -136,6 +136,7 @@ static int usage_advanced(const char* programName)
|
||||
DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n");
|
||||
DISPLAY( " -c : force write to standard output, even if it is the console\n");
|
||||
DISPLAY( " -l : print information about zstd compressed files \n");
|
||||
DISPLAY( "--exclude-compressed: only compress files that are not previously compressed \n");
|
||||
#ifndef ZSTD_NOCOMPRESS
|
||||
DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
|
||||
DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
|
||||
@ -708,7 +709,7 @@ int main(int argCount, const char* argv[])
|
||||
if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_lcm_huffman; continue; }
|
||||
if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; }
|
||||
if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; }
|
||||
|
||||
if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; }
|
||||
/* long commands with arguments */
|
||||
#ifndef ZSTD_NODICT
|
||||
if (longCommandWArg(&argument, "--train-cover")) {
|
||||
|
@ -215,6 +215,37 @@ $ZSTD tmp -c --compress-literals -19 | $ZSTD -t
|
||||
$ZSTD -b --fast=1 -i0e1 tmp --compress-literals
|
||||
$ZSTD -b --fast=1 -i0e1 tmp --no-compress-literals
|
||||
|
||||
println "test: --exclude-compressed flag"
|
||||
rm -rf precompressedFilterTestDir
|
||||
mkdir -p precompressedFilterTestDir
|
||||
./datagen $size > precompressedFilterTestDir/input.5
|
||||
./datagen $size > precompressedFilterTestDir/input.6
|
||||
$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir
|
||||
sleep 5
|
||||
./datagen $size > precompressedFilterTestDir/input.7
|
||||
./datagen $size > precompressedFilterTestDir/input.8
|
||||
$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir
|
||||
test ! -f precompressedFilterTestDir/input.5.zst.zst
|
||||
test ! -f precompressedFilterTestDir/input.6.zst.zst
|
||||
file1timestamp=`date -r precompressedFilterTestDir/input.5.zst +%s`
|
||||
file2timestamp=`date -r precompressedFilterTestDir/input.7.zst +%s`
|
||||
if [[ $file2timestamp -ge $file1timestamp ]]; then
|
||||
println "Test is successful. input.5.zst is precompressed and therefore not compressed/modified again."
|
||||
else
|
||||
println "Test is not successful"
|
||||
fi
|
||||
#File Extension check.
|
||||
./datagen $size > precompressedFilterTestDir/input.zstbar
|
||||
$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir
|
||||
#ZSTD should compress input.zstbar
|
||||
test -f precompressedFilterTestDir/input.zstbar.zst
|
||||
#Check without the --exclude-compressed flag
|
||||
$ZSTD --long --rm -r precompressedFilterTestDir
|
||||
#Files should get compressed again without the --exclude-compressed flag.
|
||||
test -f precompressedFilterTestDir/input.5.zst.zst
|
||||
test -f precompressedFilterTestDir/input.6.zst.zst
|
||||
println "Test completed"
|
||||
|
||||
println "test : file removal"
|
||||
$ZSTD -f --rm tmp
|
||||
test ! -f tmp # tmp should no longer be present
|
||||
|
Loading…
Reference in New Issue
Block a user