Merge pull request #1749 from facebook/rmadapt

removed adaptive-compression
This commit is contained in:
Yann Collet 2019-08-28 12:26:29 -07:00 committed by GitHub
commit 9589e8e4bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 0 additions and 1741 deletions

View File

@ -99,7 +99,6 @@ man:
contrib: lib
$(MAKE) -C contrib/pzstd all
$(MAKE) -C contrib/seekable_format/examples all
$(MAKE) -C contrib/adaptive-compression all
$(MAKE) -C contrib/largeNbDicts all
.PHONY: cleanTabs
@ -116,7 +115,6 @@ clean:
@$(MAKE) -C contrib/gen_html $@ > $(VOID)
@$(MAKE) -C contrib/pzstd $@ > $(VOID)
@$(MAKE) -C contrib/seekable_format/examples $@ > $(VOID)
@$(MAKE) -C contrib/adaptive-compression $@ > $(VOID)
@$(MAKE) -C contrib/largeNbDicts $@ > $(VOID)
@$(RM) zstd$(EXT) zstdmt$(EXT) tmp*
@$(RM) -r lz4

View File

@ -1,3 +0,0 @@
# binaries generated
adapt
datagen

View File

@ -1,76 +0,0 @@
ZSTDDIR = ../../lib
PRGDIR = ../../programs
ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c
ZSTDCOMP_FILES := $(ZSTDDIR)/compress/*.c
ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/*.c
ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES)
MULTITHREAD_LDFLAGS = -pthread
DEBUGFLAGS= -g -DZSTD_DEBUG=1
CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR)
CFLAGS ?= -O3
CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
-Wstrict-prototypes -Wundef \
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
-Wredundant-decls
CFLAGS += $(DEBUGFLAGS)
CFLAGS += $(MOREFLAGS)
FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MULTITHREAD_LDFLAGS)
all: adapt datagen
adapt: $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c adapt.c
$(CC) $(FLAGS) $^ -o $@
adapt-debug: $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c adapt.c
$(CC) $(FLAGS) -DDEBUG_MODE=2 $^ -o adapt
datagen : $(PRGDIR)/datagen.c datagencli.c
$(CC) $(FLAGS) $^ -o $@
test-adapt-correctness: datagen adapt
@./test-correctness.sh
@echo "test correctness complete"
test-adapt-performance: datagen adapt
@./test-performance.sh
@echo "test performance complete"
clean:
@$(RM) -f adapt datagen
@$(RM) -rf *.dSYM
@$(RM) -f tmp*
@$(RM) -f tests/*.zst
@$(RM) -f tests/tmp*
@echo "finished cleaning"
#-----------------------------------------------------------------------------
# make install is validated only for Linux, macOS, BSD, Hurd and Solaris targets
#-----------------------------------------------------------------------------
ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
ifneq (,$(filter $(shell uname),SunOS))
INSTALL ?= ginstall
else
INSTALL ?= install
endif
PREFIX ?= /usr/local
DESTDIR ?=
BINDIR ?= $(PREFIX)/bin
INSTALL_PROGRAM ?= $(INSTALL) -m 755
install: adapt
@echo Installing binaries
@$(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/
@$(INSTALL_PROGRAM) adapt $(DESTDIR)$(BINDIR)/zstd-adaptive
@echo zstd-adaptive installation completed
uninstall:
@$(RM) $(DESTDIR)$(BINDIR)/zstd-adaptive
@echo zstd-adaptive programs successfully uninstalled
endif

View File

@ -1,91 +0,0 @@
### Summary
`adapt` is a new compression tool targeted at optimizing performance across network connections and pipelines. The tool is aimed at sensing network speeds and adapting compression level based on network or pipe speeds.
In situations where the compression level does not appropriately match the network/pipe speed, compression may be bottlenecking the entire pipeline or the files may not be compressed as much as they potentially could be, therefore losing efficiency. It also becomes quite impractical to manually measure and set an optimalcompression level (which could potentially change over time).
### Using `adapt`
In order to build and use the tool, you can simply run `make adapt` in the `adaptive-compression` directory under `contrib`. This will generate an executable available for use. Another possible method of installation is running `make install`, which will create and install the binary as the command `zstd-adaptive`.
Similar to many other compression utilities, `zstd-adaptive` can be invoked by using the following format:
`zstd-adaptive [options] [file(s)]`
Supported options for the above format are described below.
`zstd-adaptive` also supports reading from `stdin` and writing to `stdout`, which is potentially more useful. By default, if no files are given, `zstd-adaptive` reads from and writes to standard I/O. Therefore, you can simply insert it within a pipeline like so:
`cat FILE | zstd-adaptive | ssh "cat - > tmp.zst"`
If a file is provided, it is also possible to force writing to stdout using the `-c` flag like so:
`zstd-adaptive -c FILE | ssh "cat - > tmp.zst"`
Several options described below can be used to control the behavior of `zstd-adaptive`. More specifically, using the `-l#` and `-u#` flags will will set upper and lower bounds so that the compression level will always be within that range. The `-i#` flag can also be used to change the initial compression level. If an initial compression level is not provided, the initial compression level will be chosen such that it is within the appropriate range (it becomes equal to the lower bound).
### Options
`-oFILE` : write output to `FILE`
`-i#` : provide initial compression level (must within the appropriate bounds)
`-h` : display help/information
`-f` : force the compression level to stay constant
`-c` : force write to `stdout`
`-p` : hide progress bar
`-q` : quiet mode -- do not show progress bar or other information
`-l#` : set a lower bound on the compression level (default is 1)
`-u#` : set an upper bound on the compression level (default is 22)
### Benchmarking / Test results
#### Artificial Tests
These artificial tests were run by using the `pv` command line utility in order to limit pipe speeds (25 MB/s read and 5 MB/s write limits were chosen to mimic severe throughput constraints). A 40 GB backup file was sent through a pipeline, compressed, and written out to a file. Compression time, size, and ratio were computed. Data for `zstd -15` was excluded from these tests because the test runs quite long.
<table>
<tr><th> 25 MB/s read limit </th></tr>
<tr><td>
| Compressor Name | Ratio | Compressed Size | Compression Time |
|:----------------|------:|----------------:|-----------------:|
| zstd -3 | 2.108 | 20.718 GB | 29m 48.530s |
| zstd-adaptive | 2.230 | 19.581 GB | 29m 48.798s |
</td><tr>
</table>
<table>
<tr><th> 5 MB/s write limit </th></tr>
<tr><td>
| Compressor Name | Ratio | Compressed Size | Compression Time |
|:----------------|------:|----------------:|-----------------:|
| zstd -3 | 2.108 | 20.718 GB | 1h 10m 43.076s |
| zstd-adaptive | 2.249 | 19.412 GB | 1h 06m 15.577s |
</td></tr>
</table>
The commands used for this test generally followed the form:
`cat FILE | pv -L 25m -q | COMPRESSION | pv -q > tmp.zst # impose 25 MB/s read limit`
`cat FILE | pv -q | COMPRESSION | pv -L 5m -q > tmp.zst # impose 5 MB/s write limit`
#### SSH Tests
The following tests were performed by piping a relatively large backup file (approximately 80 GB) through compression and over SSH to be stored on a server. The test data includes statistics for time and compressed size on `zstd` at several compression levels, as well as `zstd-adaptive`. The data highlights the potential advantages that `zstd-adaptive` has over using a low static compression level and the negative imapcts that using an excessively high static compression level can have on
pipe throughput.
| Compressor Name | Ratio | Compressed Size | Compression Time |
|:----------------|------:|----------------:|-----------------:|
| zstd -3 | 2.212 | 32.426 GB | 1h 17m 59.756s |
| zstd -15 | 2.374 | 30.213 GB | 2h 56m 59.441s |
| zstd-adaptive | 2.315 | 30.993 GB | 1h 18m 52.860s |
The commands used for this test generally followed the form:
`cat FILE | COMPRESSION | ssh dev "cat - > tmp.zst"`

File diff suppressed because it is too large Load Diff

View File

@ -1,129 +0,0 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
/*-************************************
* Dependencies
**************************************/
#include "util.h" /* Compiler options */
#include <stdio.h> /* fprintf, stderr */
#include "datagen.h" /* RDG_generate */
/*-************************************
* Constants
**************************************/
#define KB *(1 <<10)
#define MB *(1 <<20)
#define GB *(1U<<30)
#define SIZE_DEFAULT ((64 KB) + 1)
#define SEED_DEFAULT 0
#define COMPRESSIBILITY_DEFAULT 50
/*-************************************
* Macros
**************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static unsigned displayLevel = 2;
/*-*******************************************************
* Command line
*********************************************************/
static int usage(const char* programName)
{
DISPLAY( "Compressible data generator\n");
DISPLAY( "Usage :\n");
DISPLAY( " %s [args]\n", programName);
DISPLAY( "\n");
DISPLAY( "Arguments :\n");
DISPLAY( " -g# : generate # data (default:%i)\n", SIZE_DEFAULT);
DISPLAY( " -s# : Select seed (default:%i)\n", SEED_DEFAULT);
DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n",
COMPRESSIBILITY_DEFAULT);
DISPLAY( " -h : display help and exit\n");
return 0;
}
int main(int argc, const char** argv)
{
unsigned probaU32 = COMPRESSIBILITY_DEFAULT;
double litProba = 0.0;
U64 size = SIZE_DEFAULT;
U32 seed = SEED_DEFAULT;
const char* const programName = argv[0];
int argNb;
for(argNb=1; argNb<argc; argNb++) {
const char* argument = argv[argNb];
if(!argument) continue; /* Protection if argument empty */
/* Handle commands. Aggregated commands are allowed */
if (*argument=='-') {
argument++;
while (*argument!=0) {
switch(*argument)
{
case 'h':
return usage(programName);
case 'g':
argument++;
size=0;
while ((*argument>='0') && (*argument<='9'))
size *= 10, size += *argument++ - '0';
if (*argument=='K') { size <<= 10; argument++; }
if (*argument=='M') { size <<= 20; argument++; }
if (*argument=='G') { size <<= 30; argument++; }
if (*argument=='B') { argument++; }
break;
case 's':
argument++;
seed=0;
while ((*argument>='0') && (*argument<='9'))
seed *= 10, seed += *argument++ - '0';
break;
case 'P':
argument++;
probaU32 = 0;
while ((*argument>='0') && (*argument<='9'))
probaU32 *= 10, probaU32 += *argument++ - '0';
if (probaU32>100) probaU32 = 100;
break;
case 'L': /* hidden argument : Literal distribution probability */
argument++;
litProba=0.;
while ((*argument>='0') && (*argument<='9'))
litProba *= 10, litProba += *argument++ - '0';
if (litProba>100.) litProba=100.;
litProba /= 100.;
break;
case 'v':
displayLevel = 4;
argument++;
break;
default:
return usage(programName);
}
} } } /* for(argNb=1; argNb<argc; argNb++) */
DISPLAYLEVEL(4, "Compressible data Generator \n");
if (probaU32!=COMPRESSIBILITY_DEFAULT)
DISPLAYLEVEL(3, "Compressibility : %i%%\n", probaU32);
DISPLAYLEVEL(3, "Seed = %u \n", (unsigned)seed);
RDG_genStdout(size, (double)probaU32/100, litProba, seed);
DISPLAYLEVEL(1, "\n");
return 0;
}

View File

@ -1,252 +0,0 @@
echo "correctness tests -- general"
./datagen -s1 -g1GB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s2 -g500MB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s3 -g250MB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s4 -g125MB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s5 -g50MB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s6 -g25MB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s7 -g10MB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s8 -g5MB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s9 -g500KB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
echo -e "\ncorrectness tests -- streaming"
./datagen -s10 -g1GB > tmp
cat tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s11 -g100MB > tmp
cat tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s12 -g10MB > tmp
cat tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s13 -g1MB > tmp
cat tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s14 -g100KB > tmp
cat tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s15 -g10KB > tmp
cat tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
echo -e "\ncorrectness tests -- read limit"
./datagen -s16 -g1GB > tmp
pv -L 50m -q tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s17 -g100MB > tmp
pv -L 50m -q tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s18 -g10MB > tmp
pv -L 50m -q tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s19 -g1MB > tmp
pv -L 50m -q tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s20 -g100KB > tmp
pv -L 50m -q tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s21 -g10KB > tmp
pv -L 50m -q tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
echo -e "\ncorrectness tests -- write limit"
./datagen -s22 -g1GB > tmp
pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s23 -g100MB > tmp
pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s24 -g10MB > tmp
pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s25 -g1MB > tmp
pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s26 -g100KB > tmp
pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s27 -g10KB > tmp
pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
echo -e "\ncorrectness tests -- read and write limits"
./datagen -s28 -g1GB > tmp
pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s29 -g100MB > tmp
pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s30 -g10MB > tmp
pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s31 -g1MB > tmp
pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s32 -g100KB > tmp
pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s33 -g10KB > tmp
pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
echo -e "\ncorrectness tests -- forced compression level"
./datagen -s34 -g1GB > tmp
./adapt tmp -otmp.zst -i11 -f
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s35 -g100MB > tmp
./adapt tmp -otmp.zst -i11 -f
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s36 -g10MB > tmp
./adapt tmp -otmp.zst -i11 -f
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s37 -g1MB > tmp
./adapt tmp -otmp.zst -i11 -f
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s38 -g100KB > tmp
./adapt tmp -otmp.zst -i11 -f
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s39 -g10KB > tmp
./adapt tmp -otmp.zst -i11 -f
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
echo -e "\ncorrectness tests -- window size test"
./datagen -s39 -g1GB | pv -L 25m -q | ./adapt -i1 | pv -q > tmp.zst
zstd -d tmp.zst
rm tmp*
echo -e "\ncorrectness tests -- testing bounds"
./datagen -s40 -g1GB | pv -L 25m -q | ./adapt -i1 -u4 | pv -q > tmp.zst
rm tmp*
./datagen -s41 -g1GB | ./adapt -i14 -l4 > tmp.zst
rm tmp*
make clean

View File

@ -1,59 +0,0 @@
echo "testing time -- no limits set"
./datagen -s1 -g1GB > tmp
time ./adapt -otmp1.zst tmp
time zstd -1 -o tmp2.zst tmp
rm tmp*
./datagen -s2 -g2GB > tmp
time ./adapt -otmp1.zst tmp
time zstd -1 -o tmp2.zst tmp
rm tmp*
./datagen -s3 -g4GB > tmp
time ./adapt -otmp1.zst tmp
time zstd -1 -o tmp2.zst tmp
rm tmp*
echo -e "\ntesting compression ratio -- no limits set"
./datagen -s4 -g1GB > tmp
time ./adapt -otmp1.zst tmp
time zstd -1 -o tmp2.zst tmp
ls -l tmp1.zst tmp2.zst
rm tmp*
./datagen -s5 -g2GB > tmp
time ./adapt -otmp1.zst tmp
time zstd -1 -o tmp2.zst tmp
ls -l tmp1.zst tmp2.zst
rm tmp*
./datagen -s6 -g4GB > tmp
time ./adapt -otmp1.zst tmp
time zstd -1 -o tmp2.zst tmp
ls -l tmp1.zst tmp2.zst
rm tmp*
echo e "\ntesting performance at various compression levels -- no limits set"
./datagen -s7 -g1GB > tmp
echo "adapt"
time ./adapt -i5 -f tmp -otmp1.zst
echo "zstdcli"
time zstd -5 tmp -o tmp2.zst
ls -l tmp1.zst tmp2.zst
rm tmp*
./datagen -s8 -g1GB > tmp
echo "adapt"
time ./adapt -i10 -f tmp -otmp1.zst
echo "zstdcli"
time zstd -10 tmp -o tmp2.zst
ls -l tmp1.zst tmp2.zst
rm tmp*
./datagen -s9 -g1GB > tmp
echo "adapt"
time ./adapt -i15 -f tmp -otmp1.zst
echo "zstdcli"
time zstd -15 tmp -o tmp2.zst
ls -l tmp1.zst tmp2.zst
rm tmp*