mirror of
https://github.com/facebook/zstd.git
synced 2024-11-24 03:16:48 +08:00
largeNbDicts compatible with multiple source files
splitting is disabled by default, but can be re-enabled using usual command -B# update commands to look like zstd ones
This commit is contained in:
parent
a5a77965d3
commit
4086b2871b
@ -16,7 +16,7 @@ CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/common -I$(LIBDIR)/dictBuilder -I$(PROGDIR)
|
||||
|
||||
CFLAGS ?= -O3
|
||||
DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
||||
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
|
||||
-Wstrict-aliasing=1 -Wswitch-enum \
|
||||
-Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
|
||||
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
||||
-Wredundant-decls
|
||||
|
@ -35,12 +35,22 @@
|
||||
#define KB *(1<<10)
|
||||
#define MB *(1<<20)
|
||||
|
||||
#define BLOCKSIZE_DEFAULT (4 KB)
|
||||
#define BLOCKSIZE_DEFAULT 0 /* no slicing into blocks */
|
||||
#define DICTSIZE (4 KB)
|
||||
#define CLEVEL_DEFAULT 3
|
||||
|
||||
#define BENCH_TIME_DEFAULT_S 6
|
||||
#define RUN_TIME_DEFAULT_MS 1000
|
||||
#define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS)
|
||||
|
||||
#define DISPLAY_LEVEL_DEFAULT 3
|
||||
|
||||
#define BENCH_SIZE_MAX (1200 MB)
|
||||
|
||||
|
||||
/*--- Macros ---*/
|
||||
#define CONTROL(c) assert(c)
|
||||
|
||||
|
||||
/*--- Display Macros ---*/
|
||||
|
||||
@ -59,12 +69,17 @@ typedef struct {
|
||||
|
||||
static const buffer_t kBuffNull = { NULL, 0, 0 };
|
||||
|
||||
|
||||
static buffer_t fillBuffer_fromHandle(buffer_t buff, FILE* f)
|
||||
/* @return : kBuffNull if any error */
|
||||
static buffer_t createBuffer(size_t capacity)
|
||||
{
|
||||
size_t const readSize = fread(buff.ptr, 1, buff.capacity, f);
|
||||
buff.size = readSize;
|
||||
return buff;
|
||||
void* const ptr = malloc(capacity);
|
||||
if (ptr==NULL) return kBuffNull;
|
||||
|
||||
buffer_t buffer;
|
||||
buffer.ptr = ptr;
|
||||
buffer.capacity = capacity;
|
||||
buffer.size = 0;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
static void freeBuffer(buffer_t buff)
|
||||
@ -72,20 +87,24 @@ static void freeBuffer(buffer_t buff)
|
||||
free(buff.ptr);
|
||||
}
|
||||
|
||||
|
||||
static void fillBuffer_fromHandle(buffer_t* buff, FILE* f)
|
||||
{
|
||||
size_t const readSize = fread(buff->ptr, 1, buff->capacity, f);
|
||||
buff->size = readSize;
|
||||
}
|
||||
|
||||
/* @return : kBuffNull if any error */
|
||||
static buffer_t createBuffer_fromHandle(FILE* f, size_t bufferSize)
|
||||
{
|
||||
void* const buffer = malloc(bufferSize);
|
||||
if (buffer==NULL) return kBuffNull;
|
||||
|
||||
{ buffer_t buff = { buffer, 0, bufferSize };
|
||||
buff = fillBuffer_fromHandle(buff, f);
|
||||
if (buff.size != buff.capacity) {
|
||||
freeBuffer(buff);
|
||||
return kBuffNull;
|
||||
}
|
||||
return buff;
|
||||
buffer_t buff = createBuffer(bufferSize);
|
||||
if (buff.ptr == NULL) return kBuffNull;
|
||||
fillBuffer_fromHandle(&buff, f);
|
||||
if (buff.size != buff.capacity) {
|
||||
freeBuffer(buff);
|
||||
return kBuffNull;
|
||||
}
|
||||
return buff;
|
||||
}
|
||||
|
||||
/* @return : kBuffNull if any error */
|
||||
@ -107,78 +126,14 @@ static buffer_t createBuffer_fromFile(const char* fileName)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*--- buffer_collection_t ---*/
|
||||
|
||||
typedef struct {
|
||||
void** buffers;
|
||||
size_t* capacities;
|
||||
size_t nbBuffers;
|
||||
} buffer_collection_t;
|
||||
|
||||
static const buffer_collection_t kNullCollection = { NULL, NULL, 0 };
|
||||
|
||||
static void freeCollection(buffer_collection_t collection)
|
||||
static buffer_t
|
||||
createDictionaryBuffer(const char* dictionaryName,
|
||||
const void* srcBuffer,
|
||||
const size_t* srcBlockSizes, unsigned nbBlocks)
|
||||
{
|
||||
free(collection.buffers);
|
||||
free(collection.capacities);
|
||||
}
|
||||
|
||||
/* returns .buffers=NULL if operation fails */
|
||||
buffer_collection_t splitBuffer(buffer_t srcBuffer, size_t blockSize)
|
||||
{
|
||||
size_t const nbBlocks = (srcBuffer.size + (blockSize-1)) / blockSize;
|
||||
|
||||
void** const buffers = malloc(nbBlocks * sizeof(void*));
|
||||
size_t* const capacities = malloc(nbBlocks * sizeof(size_t*));
|
||||
if ((buffers==NULL) || capacities==NULL) {
|
||||
free(buffers);
|
||||
free(capacities);
|
||||
return kNullCollection;
|
||||
}
|
||||
|
||||
char* newBlockPtr = (char*)srcBuffer.ptr;
|
||||
char* const srcEnd = newBlockPtr + srcBuffer.size;
|
||||
assert(nbBlocks >= 1);
|
||||
for (size_t blockNb = 0; blockNb < nbBlocks-1; blockNb++) {
|
||||
buffers[blockNb] = newBlockPtr;
|
||||
capacities[blockNb] = blockSize;
|
||||
newBlockPtr += blockSize;
|
||||
}
|
||||
|
||||
/* last block */
|
||||
assert(newBlockPtr <= srcEnd);
|
||||
size_t const lastBlockSize = (srcEnd - newBlockPtr);
|
||||
buffers[nbBlocks-1] = newBlockPtr;
|
||||
capacities[nbBlocks-1] = lastBlockSize;
|
||||
|
||||
buffer_collection_t result;
|
||||
result.buffers = buffers;
|
||||
result.capacities = capacities;
|
||||
result.nbBuffers = nbBlocks;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* shrinkSizes() :
|
||||
* update sizes in buffer collection */
|
||||
void shrinkSizes(buffer_collection_t collection,
|
||||
const size_t* sizes) /* presumed same size as collection */
|
||||
{
|
||||
size_t const nbBlocks = collection.nbBuffers;
|
||||
for (size_t blockNb = 0; blockNb < nbBlocks; blockNb++) {
|
||||
assert(sizes[blockNb] <= collection.capacities[blockNb]);
|
||||
collection.capacities[blockNb] = sizes[blockNb];
|
||||
}
|
||||
}
|
||||
|
||||
/*--- dictionary creation ---*/
|
||||
|
||||
buffer_t createDictionary(const char* dictionary,
|
||||
const void* srcBuffer, size_t* srcBlockSizes, unsigned nbBlocks)
|
||||
{
|
||||
if (dictionary) {
|
||||
DISPLAYLEVEL(3, "loading dictionary %s \n", dictionary);
|
||||
return createBuffer_fromFile(dictionary);
|
||||
if (dictionaryName) {
|
||||
DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName);
|
||||
return createBuffer_fromFile(dictionaryName);
|
||||
} else {
|
||||
DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n", DICTSIZE);
|
||||
void* const dictBuffer = malloc(DICTSIZE);
|
||||
@ -199,6 +154,229 @@ buffer_t createDictionary(const char* dictionary,
|
||||
}
|
||||
|
||||
|
||||
/*! BMK_loadFiles() :
|
||||
* Loads `buffer`, with content from files listed within `fileNamesTable`.
|
||||
* Fills `buffer` entirely.
|
||||
* @return : 0 on success, !=0 on error */
|
||||
static int loadFiles(void* buffer, size_t bufferSize,
|
||||
size_t* fileSizes,
|
||||
const char* const * fileNamesTable, unsigned nbFiles)
|
||||
{
|
||||
size_t pos = 0, totalSize = 0;
|
||||
|
||||
for (unsigned n=0; n<nbFiles; n++) {
|
||||
U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
|
||||
if (UTIL_isDirectory(fileNamesTable[n])) {
|
||||
fileSizes[n] = 0; fileSize = 0;
|
||||
continue;
|
||||
}
|
||||
if (fileSize == UTIL_FILESIZE_UNKNOWN) {
|
||||
fileSizes[n] = 0; fileSize = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
FILE* const f = fopen(fileNamesTable[n], "rb");
|
||||
assert(f!=NULL);
|
||||
|
||||
assert(pos <= bufferSize);
|
||||
assert(fileSize <= bufferSize - pos);
|
||||
|
||||
{ size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
|
||||
assert(readSize == fileSize);
|
||||
pos += readSize;
|
||||
}
|
||||
fileSizes[n] = (size_t)fileSize;
|
||||
totalSize += (size_t)fileSize;
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
assert(totalSize == bufferSize);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*--- slice_collection_t ---*/
|
||||
|
||||
typedef struct {
|
||||
void** slicePtrs;
|
||||
size_t* capacities;
|
||||
size_t nbSlices;
|
||||
} slice_collection_t;
|
||||
|
||||
static const slice_collection_t kNullCollection = { NULL, NULL, 0 };
|
||||
|
||||
static void freeSliceCollection(slice_collection_t collection)
|
||||
{
|
||||
free(collection.slicePtrs);
|
||||
free(collection.capacities);
|
||||
}
|
||||
|
||||
/* shrinkSizes() :
|
||||
* downsizes sizes of slices within collection, according to `newSizes`.
|
||||
* every `newSizes` entry must be <= than its corresponding collection size */
|
||||
void shrinkSizes(slice_collection_t collection,
|
||||
const size_t* newSizes) /* presumed same size as collection */
|
||||
{
|
||||
size_t const nbSlices = collection.nbSlices;
|
||||
for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) {
|
||||
assert(newSizes[blockNb] <= collection.capacities[blockNb]);
|
||||
collection.capacities[blockNb] = newSizes[blockNb];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
slice_collection_t splitSlices(slice_collection_t srcSlices, size_t blockSize)
|
||||
{
|
||||
if (blockSize==0) blockSize = (size_t)(-1); /* means "do not cut" */
|
||||
size_t nbBlocks = 0;
|
||||
for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
|
||||
size_t pos = 0;
|
||||
while (pos <= srcSlices.capacities[ssnb]) {
|
||||
nbBlocks++;
|
||||
pos += blockSize;
|
||||
}
|
||||
}
|
||||
|
||||
void** const sliceTable = (void**)malloc(nbBlocks * sizeof(*sliceTable));
|
||||
size_t* const capacities = (size_t*)malloc(nbBlocks * sizeof(*capacities));
|
||||
if (sliceTable == NULL || capacities == NULL) {
|
||||
free(sliceTable);
|
||||
free(capacities);
|
||||
return kNullCollection;
|
||||
}
|
||||
|
||||
size_t blockNb = 0;
|
||||
for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
|
||||
size_t pos = 0;
|
||||
char* const ptr = (char*)srcSlices.slicePtrs[ssnb];
|
||||
while (pos < srcSlices.capacities[ssnb]) {
|
||||
size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos);
|
||||
sliceTable[blockNb] = ptr + pos;
|
||||
capacities[blockNb] = size;
|
||||
blockNb++;
|
||||
pos += blockSize;
|
||||
}
|
||||
}
|
||||
assert(blockNb == nbBlocks);
|
||||
|
||||
slice_collection_t result;
|
||||
result.nbSlices = nbBlocks;
|
||||
result.slicePtrs = sliceTable;
|
||||
result.capacities = capacities;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static size_t sliceCollection_totalCapacity(slice_collection_t sc)
|
||||
{
|
||||
size_t totalSize = 0;
|
||||
for (size_t n=0; n<sc.nbSlices; n++)
|
||||
totalSize += sc.capacities[n];
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
|
||||
/* --- buffer collection --- */
|
||||
|
||||
typedef struct {
|
||||
buffer_t buffer;
|
||||
slice_collection_t slices;
|
||||
} buffer_collection_t;
|
||||
|
||||
|
||||
static void freeBufferCollection(buffer_collection_t bc)
|
||||
{
|
||||
freeBuffer(bc.buffer);
|
||||
freeSliceCollection(bc.slices);
|
||||
}
|
||||
|
||||
|
||||
static buffer_collection_t
|
||||
createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)
|
||||
{
|
||||
size_t const bufferSize = sliceCollection_totalCapacity(sc);
|
||||
|
||||
buffer_t buffer = createBuffer(bufferSize);
|
||||
CONTROL(buffer.ptr != NULL);
|
||||
|
||||
size_t const nbSlices = sc.nbSlices;
|
||||
void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
|
||||
CONTROL(slices != NULL);
|
||||
|
||||
size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
|
||||
CONTROL(capacities != NULL);
|
||||
|
||||
char* const ptr = (char*)buffer.ptr;
|
||||
size_t pos = 0;
|
||||
for (size_t n=0; n < nbSlices; n++) {
|
||||
capacities[n] = sc.capacities[n];
|
||||
slices[n] = ptr + pos;
|
||||
pos += capacities[n];
|
||||
}
|
||||
|
||||
buffer_collection_t result;
|
||||
result.buffer = buffer;
|
||||
result.slices.nbSlices = nbSlices;
|
||||
result.slices.capacities = capacities;
|
||||
result.slices.slicePtrs = slices;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* @return : kBuffNull if any error */
|
||||
static buffer_collection_t
|
||||
createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles)
|
||||
{
|
||||
U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
|
||||
assert(totalSizeToLoad <= BENCH_SIZE_MAX);
|
||||
size_t const loadedSize = (size_t)totalSizeToLoad;
|
||||
void* const srcBuffer = malloc(loadedSize);
|
||||
assert(srcBuffer != NULL);
|
||||
|
||||
assert(nbFiles > 0);
|
||||
size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes));
|
||||
assert(fileSizes != NULL);
|
||||
|
||||
/* Load input buffer */
|
||||
int const errorCode = loadFiles(srcBuffer, loadedSize,
|
||||
fileSizes,
|
||||
fileNamesTable, nbFiles);
|
||||
assert(errorCode == 0);
|
||||
|
||||
void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable));
|
||||
assert(sliceTable != NULL);
|
||||
|
||||
char* const ptr = (char*)srcBuffer;
|
||||
size_t pos = 0;
|
||||
unsigned fileNb = 0;
|
||||
for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) {
|
||||
sliceTable[fileNb] = ptr + pos;
|
||||
pos += fileSizes[fileNb];
|
||||
}
|
||||
assert(pos == loadedSize);
|
||||
assert(fileNb == nbFiles);
|
||||
|
||||
|
||||
buffer_t buffer;
|
||||
buffer.ptr = srcBuffer;
|
||||
buffer.capacity = loadedSize;
|
||||
buffer.size = loadedSize;
|
||||
|
||||
slice_collection_t slices;
|
||||
slices.slicePtrs = sliceTable;
|
||||
slices.capacities = fileSizes;
|
||||
slices.nbSlices = nbFiles;
|
||||
|
||||
buffer_collection_t bc;
|
||||
bc.buffer = buffer;
|
||||
bc.slices = slices;
|
||||
return bc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*--- ddict_collection_t ---*/
|
||||
|
||||
typedef struct {
|
||||
@ -260,12 +438,12 @@ void shuffleDictionaries(ddict_collection_t dicts)
|
||||
* or 0 if error.
|
||||
*/
|
||||
static size_t compressBlocks(size_t* cSizes, /* optional (can be NULL). If present, must contain at least nbBlocks fields */
|
||||
buffer_collection_t dstBlockBuffers,
|
||||
buffer_collection_t srcBlockBuffers,
|
||||
slice_collection_t dstBlockBuffers,
|
||||
slice_collection_t srcBlockBuffers,
|
||||
ZSTD_CDict* cdict, int cLevel)
|
||||
{
|
||||
size_t const nbBlocks = srcBlockBuffers.nbBuffers;
|
||||
assert(dstBlockBuffers.nbBuffers == srcBlockBuffers.nbBuffers);
|
||||
size_t const nbBlocks = srcBlockBuffers.nbSlices;
|
||||
assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices);
|
||||
|
||||
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
assert(cctx != NULL);
|
||||
@ -275,16 +453,16 @@ static size_t compressBlocks(size_t* cSizes, /* optional (can be NULL). If pre
|
||||
size_t cBlockSize;
|
||||
if (cdict == NULL) {
|
||||
cBlockSize = ZSTD_compressCCtx(cctx,
|
||||
dstBlockBuffers.buffers[blockNb], dstBlockBuffers.capacities[blockNb],
|
||||
srcBlockBuffers.buffers[blockNb], srcBlockBuffers.capacities[blockNb],
|
||||
dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
|
||||
srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
|
||||
cLevel);
|
||||
} else {
|
||||
cBlockSize = ZSTD_compress_usingCDict(cctx,
|
||||
dstBlockBuffers.buffers[blockNb], dstBlockBuffers.capacities[blockNb],
|
||||
srcBlockBuffers.buffers[blockNb], srcBlockBuffers.capacities[blockNb],
|
||||
dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
|
||||
srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
|
||||
cdict);
|
||||
}
|
||||
assert(!ZSTD_isError(cBlockSize));
|
||||
CONTROL(!ZSTD_isError(cBlockSize));
|
||||
if (cSizes) cSizes[blockNb] = cBlockSize;
|
||||
totalCSize += cBlockSize;
|
||||
}
|
||||
@ -337,31 +515,32 @@ size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity
|
||||
}
|
||||
|
||||
|
||||
#define BENCH_TIME_DEFAULT_MS 6000
|
||||
#define RUN_TIME_DEFAULT_MS 1000
|
||||
|
||||
static int benchMem(buffer_collection_t dstBlocks,
|
||||
buffer_collection_t srcBlocks,
|
||||
ddict_collection_t dictionaries)
|
||||
static int benchMem(slice_collection_t dstBlocks,
|
||||
slice_collection_t srcBlocks,
|
||||
ddict_collection_t dictionaries,
|
||||
int nbRounds)
|
||||
{
|
||||
assert(dstBlocks.nbBuffers == srcBlocks.nbBuffers);
|
||||
assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
|
||||
|
||||
unsigned const ms_per_round = RUN_TIME_DEFAULT_MS;
|
||||
unsigned const total_time_ms = nbRounds * ms_per_round;
|
||||
|
||||
double bestSpeed = 0.;
|
||||
|
||||
BMK_timedFnState_t* const benchState =
|
||||
BMK_createTimedFnState(BENCH_TIME_DEFAULT_MS, RUN_TIME_DEFAULT_MS);
|
||||
BMK_createTimedFnState(total_time_ms, ms_per_round);
|
||||
decompressInstructions di = createDecompressInstructions(dictionaries);
|
||||
|
||||
for (;;) {
|
||||
BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState,
|
||||
decompress, &di,
|
||||
NULL, NULL,
|
||||
dstBlocks.nbBuffers,
|
||||
(const void* const *)srcBlocks.buffers, srcBlocks.capacities,
|
||||
dstBlocks.buffers, dstBlocks.capacities,
|
||||
dstBlocks.nbSlices,
|
||||
(const void* const *)srcBlocks.slicePtrs, srcBlocks.capacities,
|
||||
dstBlocks.slicePtrs, dstBlocks.capacities,
|
||||
NULL);
|
||||
CONTROL(BMK_isSuccessful_runOutcome(outcome));
|
||||
|
||||
assert(BMK_isSuccessful_runOutcome(outcome));
|
||||
BMK_runTime_t const result = BMK_extract_runTime(outcome);
|
||||
U64 const dTime_ns = result.nanoSecPerRun;
|
||||
double const dTime_sec = (double)dTime_ns / 1000000000;
|
||||
@ -381,65 +560,87 @@ static int benchMem(buffer_collection_t dstBlocks,
|
||||
}
|
||||
|
||||
|
||||
/* bench() :
|
||||
* fileName : file to load for benchmarking purpose
|
||||
* dictionary : optional (can be NULL), file to load as dictionary,
|
||||
/*! bench() :
|
||||
* fileName : file to load for benchmarking purpose
|
||||
* dictionary : optional (can be NULL), file to load as dictionary,
|
||||
* if none provided : will be calculated on the fly by the program.
|
||||
* @return : 0 is success, 1+ otherwise */
|
||||
int bench(const char* fileName, const char* dictionary,
|
||||
size_t blockSize, int clevel, unsigned nbDictMax)
|
||||
int bench(const char** fileNameTable, unsigned nbFiles,
|
||||
const char* dictionary,
|
||||
size_t blockSize, int clevel, unsigned nbDictMax, int nbRounds)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
DISPLAYLEVEL(3, "loading %s... \n", fileName);
|
||||
buffer_t const srcBuffer = createBuffer_fromFile(fileName);
|
||||
assert(srcBuffer.ptr != NULL);
|
||||
DISPLAYLEVEL(3, "loading %u files... \n", nbFiles);
|
||||
buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles);
|
||||
CONTROL(srcs.buffer.ptr != NULL);
|
||||
buffer_t srcBuffer = srcs.buffer;
|
||||
size_t const srcSize = srcBuffer.size;
|
||||
DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n",
|
||||
(double)srcSize / (1 MB));
|
||||
|
||||
buffer_collection_t const srcBlockBuffers = splitBuffer(srcBuffer, blockSize);
|
||||
assert(srcBlockBuffers.buffers != NULL);
|
||||
unsigned const nbBlocks = (unsigned)srcBlockBuffers.nbBuffers;
|
||||
DISPLAYLEVEL(3, "split input into %u blocks of max size %u bytes \n",
|
||||
nbBlocks, (unsigned)blockSize);
|
||||
slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize);
|
||||
unsigned const nbBlocks = (unsigned)(srcSlices.nbSlices);
|
||||
DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks);
|
||||
if (blockSize)
|
||||
DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
|
||||
DISPLAYLEVEL(3, "\n");
|
||||
|
||||
size_t const dstBlockSize = ZSTD_compressBound(blockSize);
|
||||
size_t const dstBufferCapacity = nbBlocks * dstBlockSize;
|
||||
void* const dstPtr = malloc(dstBufferCapacity);
|
||||
assert(dstPtr != NULL);
|
||||
buffer_t dstBuffer;
|
||||
dstBuffer.ptr = dstPtr;
|
||||
dstBuffer.capacity = dstBufferCapacity;
|
||||
dstBuffer.size = dstBufferCapacity;
|
||||
|
||||
buffer_collection_t const dstBlockBuffers = splitBuffer(dstBuffer, dstBlockSize);
|
||||
assert(dstBlockBuffers.buffers != NULL);
|
||||
size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
|
||||
CONTROL(dstCapacities != NULL);
|
||||
size_t dstBufferCapacity = 0;
|
||||
for (size_t bnb=0; bnb<nbBlocks; bnb++) {
|
||||
dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]);
|
||||
dstBufferCapacity += dstCapacities[bnb];
|
||||
}
|
||||
|
||||
buffer_t dstBuffer = createBuffer(dstBufferCapacity);
|
||||
CONTROL(dstBuffer.ptr != NULL);
|
||||
|
||||
void** const sliceTable = (void**)malloc(nbBlocks * sizeof(*sliceTable));
|
||||
CONTROL(sliceTable != NULL);
|
||||
|
||||
{ char* const ptr = (char*)dstBuffer.ptr;
|
||||
size_t pos = 0;
|
||||
for (size_t snb=0; snb < nbBlocks; snb++) {
|
||||
sliceTable[snb] = ptr + pos;
|
||||
pos += dstCapacities[snb];
|
||||
} }
|
||||
|
||||
slice_collection_t dstSlices;
|
||||
dstSlices.capacities = dstCapacities;
|
||||
dstSlices.slicePtrs = sliceTable;
|
||||
dstSlices.nbSlices = nbBlocks;
|
||||
|
||||
|
||||
/* dictionary determination */
|
||||
buffer_t const dictBuffer = createDictionary(dictionary,
|
||||
buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
|
||||
srcBuffer.ptr,
|
||||
srcBlockBuffers.capacities, nbBlocks);
|
||||
assert(dictBuffer.ptr != NULL);
|
||||
srcSlices.capacities, nbBlocks);
|
||||
CONTROL(dictBuffer.ptr != NULL);
|
||||
|
||||
ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel);
|
||||
assert(cdict != NULL);
|
||||
CONTROL(cdict != NULL);
|
||||
|
||||
size_t const cTotalSizeNoDict = compressBlocks(NULL, dstBlockBuffers, srcBlockBuffers, NULL, clevel);
|
||||
assert(cTotalSizeNoDict != 0);
|
||||
size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel);
|
||||
CONTROL(cTotalSizeNoDict != 0);
|
||||
DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n",
|
||||
clevel,
|
||||
(double)srcSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
|
||||
|
||||
size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
|
||||
assert(cSizes != NULL);
|
||||
CONTROL(cSizes != NULL);
|
||||
|
||||
size_t const cTotalSize = compressBlocks(cSizes, dstBlockBuffers, srcBlockBuffers, cdict, clevel);
|
||||
assert(cTotalSize != 0);
|
||||
size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel);
|
||||
CONTROL(cTotalSize != 0);
|
||||
DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n",
|
||||
(unsigned)dictBuffer.size,
|
||||
(double)srcSize / cTotalSize, (unsigned)cTotalSize);
|
||||
|
||||
/* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
|
||||
shrinkSizes(dstSlices, cSizes);
|
||||
|
||||
size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
|
||||
unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks;
|
||||
size_t const allDictMem = dictMem * nbDicts;
|
||||
@ -447,43 +648,31 @@ int bench(const char* fileName, const char* dictionary,
|
||||
nbDicts, (double)allDictMem / (1 MB));
|
||||
|
||||
ddict_collection_t const dictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts);
|
||||
assert(dictionaries.ddicts != NULL);
|
||||
CONTROL(dictionaries.ddicts != NULL);
|
||||
|
||||
shuffleDictionaries(dictionaries);
|
||||
// for (size_t u = 0; u < dictionaries.nbDDict; u++) DISPLAY("dict address : %p \n", dictionaries.ddicts[u]); /* check dictionary addresses */
|
||||
|
||||
void* const resultPtr = malloc(srcSize);
|
||||
assert(resultPtr != NULL);
|
||||
buffer_t resultBuffer;
|
||||
resultBuffer.ptr = resultPtr;
|
||||
resultBuffer.capacity = srcSize;
|
||||
resultBuffer.size = srcSize;
|
||||
buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices);
|
||||
CONTROL(resultCollection.buffer.ptr != NULL);
|
||||
|
||||
buffer_collection_t const resultBlockBuffers = splitBuffer(resultBuffer, blockSize);
|
||||
assert(resultBlockBuffers.buffers != NULL);
|
||||
|
||||
shrinkSizes(dstBlockBuffers, cSizes);
|
||||
|
||||
result = benchMem(resultBlockBuffers, dstBlockBuffers, dictionaries);
|
||||
result = benchMem(resultCollection.slices, dstSlices, dictionaries, nbRounds);
|
||||
|
||||
/* free all heap objects in reverse order */
|
||||
freeCollection(resultBlockBuffers);
|
||||
free(resultPtr);
|
||||
freeBufferCollection(resultCollection);
|
||||
freeDDictCollection(dictionaries);
|
||||
free(cSizes);
|
||||
ZSTD_freeCDict(cdict);
|
||||
freeBuffer(dictBuffer);
|
||||
freeCollection(dstBlockBuffers);
|
||||
freeSliceCollection(dstSlices);
|
||||
freeBuffer(dstBuffer);
|
||||
freeCollection(srcBlockBuffers);
|
||||
freeBuffer(srcBuffer);
|
||||
freeSliceCollection(srcSlices);
|
||||
freeBufferCollection(srcs);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/* --- Command Line --- */
|
||||
|
||||
/*! readU32FromChar() :
|
||||
@ -533,33 +722,57 @@ int bad_usage(const char* exeName)
|
||||
DISPLAY (" bad usage : \n");
|
||||
DISPLAY (" %s filename [Options] \n", exeName);
|
||||
DISPLAY ("Options : \n");
|
||||
DISPLAY ("--clevel=# : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
|
||||
DISPLAY ("--blockSize=# : cut input into blocks of size # (default: %u) \n", BLOCKSIZE_DEFAULT);
|
||||
DISPLAY ("--dictionary=# : use # as a dictionary (default: create one) \n");
|
||||
DISPLAY ("--nbDicts=# : set nb of dictionaries to # (default: one per block) \n");
|
||||
DISPLAY ("-r : recursively load all files in subdirectories (default: off) \n");
|
||||
DISPLAY ("-B# : split input into blocks of size # (default: no split) \n");
|
||||
DISPLAY ("-# : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
|
||||
DISPLAY ("-D # : use # as a dictionary (default: create one) \n");
|
||||
DISPLAY ("-i# : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S);
|
||||
DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
int main (int argc, const char** argv)
|
||||
{
|
||||
int recursiveMode = 0;
|
||||
int nbRounds = BENCH_TIME_DEFAULT_S;
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc < 2) return bad_usage(exeName);
|
||||
const char* const fileName = argv[1];
|
||||
|
||||
const char** nameTable = (const char**)malloc(argc * sizeof(const char*));
|
||||
assert(nameTable != NULL);
|
||||
unsigned nameIdx = 0;
|
||||
|
||||
const char* dictionary = NULL;
|
||||
int cLevel = CLEVEL_DEFAULT;
|
||||
size_t blockSize = BLOCKSIZE_DEFAULT;
|
||||
size_t nbDicts = 0; /* auto, 1 dict per block */
|
||||
size_t nbDicts = 0; /* determine nbDicts automatically: 1 dictionary per block */
|
||||
|
||||
for (int argNb = 2; argNb < argc ; argNb++) {
|
||||
for (int argNb = 1; argNb < argc ; argNb++) {
|
||||
const char* argument = argv[argNb];
|
||||
if (longCommandWArg(&argument, "--clevel=")) { cLevel = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
|
||||
if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; }
|
||||
if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; }
|
||||
if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; }
|
||||
if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
|
||||
return bad_usage(exeName);
|
||||
if (longCommandWArg(&argument, "--clevel=")) { cLevel = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "-")) { cLevel = readU32FromChar(&argument); continue; }
|
||||
/* anything that's not a command is a filename */
|
||||
nameTable[nameIdx++] = argument;
|
||||
}
|
||||
|
||||
return bench(fileName, dictionary, blockSize, cLevel, nbDicts);
|
||||
const char** filenameTable = nameTable;
|
||||
unsigned nbFiles = nameIdx;
|
||||
char* buffer_containing_filenames = NULL;
|
||||
|
||||
if (recursiveMode) {
|
||||
#ifndef UTIL_HAS_CREATEFILELIST
|
||||
assert(0); /* missing capability, do not run */
|
||||
#endif
|
||||
filenameTable = UTIL_createFileList(nameTable, nameIdx, &buffer_containing_filenames, &nbFiles, 1 /* follow_links */);
|
||||
}
|
||||
|
||||
return bench(filenameTable, nbFiles, dictionary, blockSize, cLevel, nbDicts, nbRounds);
|
||||
}
|
||||
|
@ -951,8 +951,9 @@ static size_t BMK_findMaxMem(U64 requiredMem)
|
||||
* Loads `buffer` with content of files listed within `fileNamesTable`.
|
||||
* At most, fills `buffer` entirely. */
|
||||
static int BMK_loadFiles(void* buffer, size_t bufferSize,
|
||||
size_t* fileSizes, const char* const * const fileNamesTable,
|
||||
unsigned nbFiles, int displayLevel)
|
||||
size_t* fileSizes,
|
||||
const char* const * fileNamesTable, unsigned nbFiles,
|
||||
int displayLevel)
|
||||
{
|
||||
size_t pos = 0, totalSize = 0;
|
||||
unsigned n;
|
||||
@ -973,9 +974,10 @@ static int BMK_loadFiles(void* buffer, size_t bufferSize,
|
||||
if (f==NULL) EXM_THROW_INT(10, "impossible to open file %s", fileNamesTable[n]);
|
||||
DISPLAYUPDATE(2, "Loading %s... \r", fileNamesTable[n]);
|
||||
if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */
|
||||
{ size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
|
||||
if (readSize != (size_t)fileSize) EXM_THROW_INT(11, "could not read %s", fileNamesTable[n]);
|
||||
pos += readSize; }
|
||||
{ size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
|
||||
if (readSize != (size_t)fileSize) EXM_THROW_INT(11, "could not read %s", fileNamesTable[n]);
|
||||
pos += readSize;
|
||||
}
|
||||
fileSizes[n] = (size_t)fileSize;
|
||||
totalSize += (size_t)fileSize;
|
||||
fclose(f);
|
||||
|
@ -526,7 +526,10 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
|
||||
* After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer)
|
||||
* In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called.
|
||||
*/
|
||||
UTIL_STATIC const char** UTIL_createFileList(const char **inputNames, unsigned inputNamesNb, char** allocatedBuffer, unsigned* allocatedNamesNb, int followLinks)
|
||||
UTIL_STATIC const char**
|
||||
UTIL_createFileList(const char **inputNames, unsigned inputNamesNb,
|
||||
char** allocatedBuffer, unsigned* allocatedNamesNb,
|
||||
int followLinks)
|
||||
{
|
||||
size_t pos;
|
||||
unsigned i, nbFiles;
|
||||
|
Loading…
Reference in New Issue
Block a user