Merge pull request #1855 from shashank0791/shtavi-filter-precompressed-files

Added "--exclude-compressed" flag feature
This commit is contained in:
Yann Collet 2019-10-30 12:14:41 -07:00 committed by GitHub
commit 4ef97aaea7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 86 additions and 2 deletions

View File

@ -319,6 +319,8 @@ struct FIO_prefs_s {
/* Computation resources preferences */
unsigned memLimit;
int nbWorkers;
int excludeCompressedFiles;
};
@ -359,6 +361,7 @@ FIO_prefs_t* FIO_createPreferences(void)
ret->srcSizeHint = 0;
ret->testMode = 0;
ret->literalCompressionMode = ZSTD_lcm_auto;
ret->excludeCompressedFiles = 0;
return ret;
}
@ -402,6 +405,8 @@ void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) {
prefs->nbWorkers = nbWorkers;
}
void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; }
void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) {
if (blockSize && prefs->nbWorkers==0)
DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n");
@ -1425,6 +1430,21 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs,
return result;
}
/* List used to compare file extensions (used with --exclude-compressed flag)
* Different from the suffixList and should only apply to ZSTD compress operationResult
*/
static const char *compressedFileExtensions[] = {
ZSTD_EXTENSION,
TZSTD_EXTENSION,
GZ_EXTENSION,
TGZ_EXTENSION,
LZMA_EXTENSION,
XZ_EXTENSION,
TXZ_EXTENSION,
LZ4_EXTENSION,
TLZ4_EXTENSION,
NULL
};
/*! FIO_compressFilename_srcFile() :
* @return : 0 : compression completed correctly,
@ -1451,6 +1471,15 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs,
return 1;
}
/* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used
* YES => ZSTD will skip compression of the file and will return 0.
* NO => ZSTD will resume with compress operation.
*/
if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) {
DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName);
return 0;
}
ress.srcFile = FIO_openSrcFile(srcFileName);
if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */

View File

@ -93,6 +93,7 @@ void FIO_setLiteralCompressionMode(
void FIO_setNoProgress(unsigned noProgress);
void FIO_setNotificationLevel(int level);
void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles);
/*-*************************************
* Single File functions

View File

@ -326,6 +326,27 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
#endif /* #ifdef _WIN32 */
int UTIL_isCompressedFile(const char *inputName, const char *extensionList[])
{
const char* ext = UTIL_getFileExtension(inputName);
while(*extensionList!=NULL)
{
const int isCompressedExtension = strcmp(ext,*extensionList);
if(isCompressedExtension==0)
return 1;
++extensionList;
}
return 0;
}
/*Utility function to get file extension from file */
const char* UTIL_getFileExtension(const char* infilename)
{
const char* extension = strrchr(infilename, '.');
if(!extension || extension==infilename) return "";
return extension;
}
/*
* UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories,
* and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb).

View File

@ -40,7 +40,6 @@ extern "C" {
#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */
#include "mem.h" /* U32, U64 */
/*-************************************************************
* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
***************************************************************/
@ -135,6 +134,8 @@ U32 UTIL_isDirectory(const char* infilename);
int UTIL_getFileStat(const char* infilename, stat_t* statbuf);
int UTIL_isSameFile(const char* file1, const char* file2);
int UTIL_compareStr(const void *p1, const void *p2);
int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]);
const char* UTIL_getFileExtension(const char* infilename);
U32 UTIL_isFIFO(const char* infilename);
U32 UTIL_isLink(const char* infilename);

View File

@ -136,6 +136,7 @@ static int usage_advanced(const char* programName)
DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n");
DISPLAY( " -c : force write to standard output, even if it is the console\n");
DISPLAY( " -l : print information about zstd compressed files \n");
DISPLAY( "--exclude-compressed: only compress files that are not previously compressed \n");
#ifndef ZSTD_NOCOMPRESS
DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
@ -708,7 +709,7 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_lcm_huffman; continue; }
if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; }
if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; }
if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; }
/* long commands with arguments */
#ifndef ZSTD_NODICT
if (longCommandWArg(&argument, "--train-cover")) {

View File

@ -215,6 +215,37 @@ $ZSTD tmp -c --compress-literals -19 | $ZSTD -t
$ZSTD -b --fast=1 -i0e1 tmp --compress-literals
$ZSTD -b --fast=1 -i0e1 tmp --no-compress-literals
println "test: --exclude-compressed flag"
rm -rf precompressedFilterTestDir
mkdir -p precompressedFilterTestDir
./datagen $size > precompressedFilterTestDir/input.5
./datagen $size > precompressedFilterTestDir/input.6
$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir
sleep 5
./datagen $size > precompressedFilterTestDir/input.7
./datagen $size > precompressedFilterTestDir/input.8
$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir
test ! -f precompressedFilterTestDir/input.5.zst.zst
test ! -f precompressedFilterTestDir/input.6.zst.zst
file1timestamp=`date -r precompressedFilterTestDir/input.5.zst +%s`
file2timestamp=`date -r precompressedFilterTestDir/input.7.zst +%s`
if [[ $file2timestamp -ge $file1timestamp ]]; then
println "Test is successful. input.5.zst is precompressed and therefore not compressed/modified again."
else
println "Test is not successful"
fi
#File Extension check.
./datagen $size > precompressedFilterTestDir/input.zstbar
$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir
#ZSTD should compress input.zstbar
test -f precompressedFilterTestDir/input.zstbar.zst
#Check without the --exclude-compressed flag
$ZSTD --long --rm -r precompressedFilterTestDir
#Files should get compressed again without the --exclude-compressed flag.
test -f precompressedFilterTestDir/input.5.zst.zst
test -f precompressedFilterTestDir/input.6.zst.zst
println "Test completed"
println "test : file removal"
$ZSTD -f --rm tmp
test ! -f tmp # tmp should no longer be present