mirror of
git://sourceware.org/git/bzip2.git
synced 2024-11-23 11:43:28 +08:00
bzip2-0.9.5d
This commit is contained in:
parent
977101ad5f
commit
f93cd82a9a
55
CHANGES
55
CHANGES
@ -43,3 +43,58 @@ In compress.c:
|
||||
do a bit better on small files. This _does_ effect
|
||||
bzip2.c.
|
||||
|
||||
|
||||
0.9.5a
|
||||
~~~~~~
|
||||
Major change: add a fallback sorting algorithm (blocksort.c)
|
||||
to give reasonable behaviour even for very repetitive inputs.
|
||||
Nuked --repetitive-best and --repetitive-fast since they are
|
||||
no longer useful.
|
||||
|
||||
Minor changes: mostly a whole bunch of small changes/
|
||||
bugfixes in the driver (bzip2.c). Changes pertaining to the
|
||||
user interface are:
|
||||
|
||||
allow decompression of symlink'd files to stdout
|
||||
decompress/test files even without .bz2 extension
|
||||
give more accurate error messages for I/O errors
|
||||
when compressing/decompressing to stdout, don't catch control-C
|
||||
read flags from BZIP2 and BZIP environment variables
|
||||
decline to break hard links to a file unless forced with -f
|
||||
allow -c flag even with no filenames
|
||||
preserve file ownerships as far as possible
|
||||
make -s -1 give the expected block size (100k)
|
||||
add a flag -q --quiet to suppress nonessential warnings
|
||||
stop decoding flags after --, so files beginning in - can be handled
|
||||
resolved inconsistent naming: bzcat or bz2cat ?
|
||||
bzip2 --help now returns 0
|
||||
|
||||
Programming-level changes are:
|
||||
|
||||
fixed syntax error in GET_LL4 for Borland C++ 5.02
|
||||
let bzBuffToBuffDecompress return BZ_DATA_ERROR{_MAGIC}
|
||||
fix overshoot of mode-string end in bzopen_or_bzdopen
|
||||
wrapped bzlib.h in #ifdef __cplusplus ... extern "C" { ... }
|
||||
close file handles under all error conditions
|
||||
added minor mods so it compiles with DJGPP out of the box
|
||||
fixed Makefile so it doesn't give problems with BSD make
|
||||
fix uninitialised memory reads in dlltest.c
|
||||
|
||||
0.9.5b
|
||||
~~~~~~
|
||||
Open stdin/stdout in binary mode for DJGPP.
|
||||
|
||||
0.9.5c
|
||||
~~~~~~
|
||||
Changed BZ_N_OVERSHOOT to be ... + 2 instead of ... + 1. The + 1
|
||||
version could cause the sorted order to be wrong in some extremely
|
||||
obscure cases. Also changed setting of quadrant in blocksort.c.
|
||||
|
||||
0.9.5d
|
||||
~~~~~~
|
||||
The only functional change is to make bzlibVersion() in the library
|
||||
return the correct string. This has no effect whatsoever on the
|
||||
functioning of the bzip2 program or library. Added a couple of casts
|
||||
so the library compiles without warnings at level 3 in MS Visual
|
||||
Studio 6.0. Included a Y2K statement in the file Y2K_INFO. All other
|
||||
changes are minor documentation changes.
|
||||
|
6
LICENSE
6
LICENSE
@ -1,6 +1,6 @@
|
||||
|
||||
This program, "bzip2" and associated library "libbzip2", are
|
||||
copyright (C) 1996-1998 Julian R Seward. All rights reserved.
|
||||
copyright (C) 1996-1999 Julian R Seward. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -33,7 +33,7 @@ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Julian Seward, Guildford, Surrey, UK.
|
||||
Julian Seward, Cambridge, UK.
|
||||
jseward@acm.org
|
||||
bzip2/libbzip2 version 0.9.0 of 28 June 1998
|
||||
bzip2/libbzip2 version 0.9.5 of 24 May 1999
|
||||
|
||||
|
89
Makefile
89
Makefile
@ -1,6 +1,7 @@
|
||||
|
||||
SHELL=/bin/sh
|
||||
CC=gcc
|
||||
CFLAGS=-Wall -O2 -fomit-frame-pointer -fno-strength-reduce
|
||||
CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce
|
||||
|
||||
OBJS= blocksort.o \
|
||||
huffman.o \
|
||||
@ -10,37 +11,93 @@ OBJS= blocksort.o \
|
||||
decompress.o \
|
||||
bzlib.o
|
||||
|
||||
all: lib bzip2 test
|
||||
all: libbz2.a bzip2 bzip2recover test
|
||||
|
||||
bzip2: lib
|
||||
$(CC) $(CFLAGS) -c bzip2.c
|
||||
bzip2: libbz2.a bzip2.o
|
||||
$(CC) $(CFLAGS) -o bzip2 bzip2.o -L. -lbz2
|
||||
$(CC) $(CFLAGS) -o bzip2recover bzip2recover.c
|
||||
|
||||
lib: $(OBJS)
|
||||
bzip2recover: bzip2recover.o
|
||||
$(CC) $(CFLAGS) -o bzip2recover bzip2recover.o
|
||||
|
||||
libbz2.a: $(OBJS)
|
||||
rm -f libbz2.a
|
||||
ar clq libbz2.a $(OBJS)
|
||||
ar cq libbz2.a $(OBJS)
|
||||
@if ( test -f /usr/bin/ranlib -o -f /bin/ranlib -o \
|
||||
-f /usr/ccs/bin/ranlib ) ; then \
|
||||
echo ranlib libbz2.a ; \
|
||||
ranlib libbz2.a ; \
|
||||
fi
|
||||
|
||||
test: bzip2
|
||||
@cat words1
|
||||
./bzip2 -1 < sample1.ref > sample1.rb2
|
||||
./bzip2 -2 < sample2.ref > sample2.rb2
|
||||
./bzip2 -d < sample1.bz2 > sample1.tst
|
||||
./bzip2 -d < sample2.bz2 > sample2.tst
|
||||
@cat words2
|
||||
./bzip2 -1 < sample1.ref > sample1.rb2
|
||||
./bzip2 -2 < sample2.ref > sample2.rb2
|
||||
./bzip2 -3 < sample3.ref > sample3.rb2
|
||||
./bzip2 -d < sample1.bz2 > sample1.tst
|
||||
./bzip2 -d < sample2.bz2 > sample2.tst
|
||||
./bzip2 -ds < sample3.bz2 > sample3.tst
|
||||
cmp sample1.bz2 sample1.rb2
|
||||
cmp sample2.bz2 sample2.rb2
|
||||
cmp sample3.bz2 sample3.rb2
|
||||
cmp sample1.tst sample1.ref
|
||||
cmp sample2.tst sample2.ref
|
||||
cmp sample3.tst sample3.ref
|
||||
@cat words3
|
||||
|
||||
PREFIX=/usr
|
||||
|
||||
install: bzip2 bzip2recover
|
||||
if ( test ! -d $(PREFIX)/bin ) ; then mkdir $(PREFIX)/bin ; fi
|
||||
if ( test ! -d $(PREFIX)/lib ) ; then mkdir $(PREFIX)/lib ; fi
|
||||
if ( test ! -d $(PREFIX)/man ) ; then mkdir $(PREFIX)/man ; fi
|
||||
if ( test ! -d $(PREFIX)/man/man1 ) ; then mkdir $(PREFIX)/man/man1 ; fi
|
||||
if ( test ! -d $(PREFIX)/include ) ; then mkdir $(PREFIX)/include ; fi
|
||||
cp -f bzip2 $(PREFIX)/bin/bzip2
|
||||
cp -f bzip2 $(PREFIX)/bin/bunzip2
|
||||
cp -f bzip2 $(PREFIX)/bin/bzcat
|
||||
cp -f bzip2recover $(PREFIX)/bin/bzip2recover
|
||||
chmod a+x $(PREFIX)/bin/bzip2
|
||||
chmod a+x $(PREFIX)/bin/bunzip2
|
||||
chmod a+x $(PREFIX)/bin/bzcat
|
||||
chmod a+x $(PREFIX)/bin/bzip2recover
|
||||
cp -f bzip2.1 $(PREFIX)/man/man1
|
||||
chmod a+r $(PREFIX)/man/man1/bzip2.1
|
||||
cp -f bzlib.h $(PREFIX)/include
|
||||
chmod a+r $(PREFIX)/include/bzlib.h
|
||||
cp -f libbz2.a $(PREFIX)/lib
|
||||
chmod a+r $(PREFIX)/lib/libbz2.a
|
||||
|
||||
clean:
|
||||
rm -f *.o libbz2.a bzip2 bzip2recover sample1.rb2 sample2.rb2 sample1.tst sample2.tst
|
||||
rm -f *.o libbz2.a bzip2 bzip2recover \
|
||||
sample1.rb2 sample2.rb2 sample3.rb2 \
|
||||
sample1.tst sample2.tst sample3.tst
|
||||
|
||||
.c.o: $*.o bzlib.h bzlib_private.h
|
||||
$(CC) $(CFLAGS) -c $*.c -o $*.o
|
||||
blocksort.o: blocksort.c
|
||||
$(CC) $(CFLAGS) -c blocksort.c
|
||||
huffman.o: huffman.c
|
||||
$(CC) $(CFLAGS) -c huffman.c
|
||||
crctable.o: crctable.c
|
||||
$(CC) $(CFLAGS) -c crctable.c
|
||||
randtable.o: randtable.c
|
||||
$(CC) $(CFLAGS) -c randtable.c
|
||||
compress.o: compress.c
|
||||
$(CC) $(CFLAGS) -c compress.c
|
||||
decompress.o: decompress.c
|
||||
$(CC) $(CFLAGS) -c decompress.c
|
||||
bzlib.o: bzlib.c
|
||||
$(CC) $(CFLAGS) -c bzlib.c
|
||||
bzip2.o: bzip2.c
|
||||
$(CC) $(CFLAGS) -c bzip2.c
|
||||
bzip2recover.o: bzip2recover.c
|
||||
$(CC) $(CFLAGS) -c bzip2recover.c
|
||||
|
||||
tarfile:
|
||||
tar cvf interim.tar *.c *.h Makefile manual.texi manual.ps LICENSE bzip2.1 bzip2.1.preformatted bzip2.txt words1 words2 words3 sample1.ref sample2.ref sample1.bz2 sample2.bz2 *.html README CHANGES libbz2.def libbz2.dsp dlltest.dsp
|
||||
tar cvf interim.tar blocksort.c huffman.c crctable.c \
|
||||
randtable.c compress.c decompress.c bzlib.c bzip2.c \
|
||||
bzip2recover.c bzlib.h bzlib_private.h Makefile manual.texi \
|
||||
manual.ps LICENSE bzip2.1 bzip2.1.preformatted bzip2.txt \
|
||||
words1 words2 words3 sample1.ref sample2.ref sample3.ref \
|
||||
sample1.bz2 sample2.bz2 sample3.bz2 dlltest.c \
|
||||
*.html README CHANGES libbz2.def libbz2.dsp \
|
||||
dlltest.dsp makefile.msc Y2K_INFO
|
||||
|
||||
|
78
README
78
README
@ -1,48 +1,44 @@
|
||||
|
||||
|
||||
This is the README for bzip2, a block-sorting file compressor, version
|
||||
0.9.0. This version is fully compatible with the previous public
|
||||
release, bzip2-0.1pl2.
|
||||
0.9.5d. This version is fully compatible with the previous public
|
||||
releases, bzip2-0.1pl2 and bzip2-0.9.0.
|
||||
|
||||
bzip2-0.9.0 is distributed under a BSD-style license. For details,
|
||||
bzip2-0.9.5 is distributed under a BSD-style license. For details,
|
||||
see the file LICENSE.
|
||||
|
||||
Complete documentation is available in Postscript form (manual.ps)
|
||||
or html (manual_toc.html). A plain-text version of the manual page is
|
||||
available as bzip2.txt.
|
||||
Complete documentation is available in Postscript form (manual.ps) or
|
||||
html (manual_toc.html). A plain-text version of the manual page is
|
||||
available as bzip2.txt. A statement about Y2K issues is now included
|
||||
in the file Y2K_INFO.
|
||||
|
||||
|
||||
HOW TO BUILD -- UNIX
|
||||
|
||||
Type `make'.
|
||||
Type `make'. This builds the library libbz2.a and then the
|
||||
programs bzip2 and bzip2recover. Six self-tests are run.
|
||||
If the self-tests complete ok, carry on to installation:
|
||||
|
||||
This creates binaries "bzip2" and "bzip2recover".
|
||||
|
||||
It also runs four compress-decompress tests to make sure things are
|
||||
working properly. If all goes well, you should be up & running.
|
||||
Please be sure to read the output from `make' just to be sure that the
|
||||
tests went ok.
|
||||
|
||||
To install bzip2 properly:
|
||||
|
||||
* Copy the binaries "bzip2" and "bzip2recover" to a publically visible
|
||||
place, possibly /usr/bin or /usr/local/bin.
|
||||
|
||||
* In that directory, make "bunzip2" and "bzcat" be symbolic links
|
||||
to "bzip2".
|
||||
|
||||
* Copy the manual page, bzip2.1, to the relevant place.
|
||||
Probably the right place is /usr/man/man1/.
|
||||
|
||||
If you want to program with the library, you'll need to copy libbz2.a
|
||||
and bzlib.h to /usr/lib and /usr/include respectively.
|
||||
To install in /usr/bin, /usr/lib, /usr/man and /usr/include, type
|
||||
make install
|
||||
To install somewhere else, eg, /xxx/yyy/{bin,lib,man,include}, type
|
||||
make install PREFIX=/xxx/yyy
|
||||
If you are (justifiably) paranoid and want to see what 'make install'
|
||||
is going to do, you can first do
|
||||
make -n install or
|
||||
make -n install PREFIX=/xxx/yyy respectively.
|
||||
The -n instructs make to show the commands it would execute, but
|
||||
not actually execute them.
|
||||
|
||||
|
||||
HOW TO BUILD -- Windows 95, NT, DOS, Mac, etc.
|
||||
|
||||
It's difficult for me to support compilation on all these platforms.
|
||||
My approach is to collect binaries for these platforms, and put them
|
||||
on my web page (http://www.muraroa.demon.co.uk). Look there.
|
||||
on my web page (http://www.muraroa.demon.co.uk). Look there. However
|
||||
(FWIW), bzip2-0.9.5 is very standard ANSI C and should compile
|
||||
unmodified with MS Visual C. For Win32, there is one important
|
||||
caveat: in bzip2.c, you must set BZ_UNIX to 0 and BZ_LCCWIN32 to 1
|
||||
before building.
|
||||
|
||||
|
||||
VALIDATION
|
||||
@ -112,26 +108,32 @@ WHAT'S NEW IN 0.9.0 (as compared to 0.1pl2) ?
|
||||
* Much more documentation, i.e., a proper user manual
|
||||
* Hopefully, improved portability (at least of the library)
|
||||
|
||||
WHAT'S NEW IN 0.9.5 ?
|
||||
|
||||
* Compression speed is much less sensitive to the input
|
||||
data than in previous versions. Specifically, the very
|
||||
slow performance caused by repetitive data is fixed.
|
||||
* Many small improvements in file and flag handling.
|
||||
* A Y2K statement.
|
||||
|
||||
I hope you find bzip2 useful. Feel free to contact me at
|
||||
jseward@acm.org
|
||||
if you have any suggestions or queries. Many people mailed me with
|
||||
comments, suggestions and patches after the releases of bzip-0.15,
|
||||
bzip-0.21 and bzip2-0.1pl2, and the changes in bzip2 are largely a
|
||||
result of this feedback. I thank you for your comments.
|
||||
bzip-0.21, bzip2-0.1pl2 and bzip2-0.9.0, and the changes in bzip2 are
|
||||
largely a result of this feedback. I thank you for your comments.
|
||||
|
||||
At least for the time being, bzip2's "home" is
|
||||
At least for the time being, bzip2's "home" is (or can be reached via)
|
||||
http://www.muraroa.demon.co.uk.
|
||||
|
||||
Julian Seward
|
||||
jseward@acm.org
|
||||
|
||||
Manchester, UK
|
||||
18 July 1996 (version 0.15)
|
||||
Cambridge, UK
|
||||
18 July 1996 (version 0.15)
|
||||
25 August 1996 (version 0.21)
|
||||
|
||||
Guildford, Surrey, UK
|
||||
7 August 1997 (bzip2, version 0.1)
|
||||
7 August 1997 (bzip2, version 0.1)
|
||||
29 August 1997 (bzip2, version 0.1pl2)
|
||||
23 August 1998 (bzip2, version 0.9.0)
|
||||
|
||||
8 June 1999 (bzip2, version 0.9.5)
|
||||
4 Sept 1999 (bzip2, version 0.9.5d)
|
||||
|
34
Y2K_INFO
Normal file
34
Y2K_INFO
Normal file
@ -0,0 +1,34 @@
|
||||
|
||||
Y2K status of bzip2 and libbzip2, versions 0.1, 0.9.0 and 0.9.5
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Informally speaking:
|
||||
bzip2 is a compression program built on top of libbzip2,
|
||||
a library which does the real work of compression and
|
||||
decompression. As far as I am aware, libbzip2 does not have
|
||||
any date-related code at all.
|
||||
|
||||
bzip2 itself copies dates from source to destination files
|
||||
when compressing or decompressing, using the 'stat' and 'utime'
|
||||
UNIX system calls. It doesn't examine, manipulate or store the
|
||||
dates in any way. So as far as I can see, there shouldn't be any
|
||||
problem with bzip2 providing 'stat' and 'utime' work correctly
|
||||
on your system.
|
||||
|
||||
On non-unix platforms (those for which BZ_UNIX in bzip2.c is
|
||||
not set to 1), bzip2 doesn't even do the date copying.
|
||||
|
||||
Overall, informally speaking, I don't think bzip2 or libbzip2
|
||||
have a Y2K problem.
|
||||
|
||||
Formally speaking:
|
||||
I am not prepared to offer you any assurance whatsoever
|
||||
regarding Y2K issues in my software. You alone assume the
|
||||
entire risk of using the software. The disclaimer of liability
|
||||
in the LICENSE file in the bzip2 source distribution continues
|
||||
to apply on this issue as with every other issue pertaining
|
||||
to the software.
|
||||
|
||||
Julian Seward
|
||||
Cambridge, UK
|
||||
25 August 1999
|
1248
blocksort.c
1248
blocksort.c
File diff suppressed because it is too large
Load Diff
564
bzip2.1
564
bzip2.1
@ -1,7 +1,7 @@
|
||||
.PU
|
||||
.TH bzip2 1
|
||||
.SH NAME
|
||||
bzip2, bunzip2 \- a block-sorting file compressor, v0.9.0
|
||||
bzip2, bunzip2 \- a block-sorting file compressor, v0.9.5
|
||||
.br
|
||||
bzcat \- decompresses files to stdout
|
||||
.br
|
||||
@ -10,7 +10,7 @@ bzip2recover \- recovers data from damaged bzip2 files
|
||||
.SH SYNOPSIS
|
||||
.ll +8
|
||||
.B bzip2
|
||||
.RB [ " \-cdfkstvzVL123456789 " ]
|
||||
.RB [ " \-cdfkqstvzVL123456789 " ]
|
||||
[
|
||||
.I "filenames \&..."
|
||||
]
|
||||
@ -33,195 +33,154 @@ bzip2recover \- recovers data from damaged bzip2 files
|
||||
|
||||
.SH DESCRIPTION
|
||||
.I bzip2
|
||||
compresses files using the Burrows-Wheeler block-sorting
|
||||
text compression algorithm, and Huffman coding.
|
||||
Compression is generally considerably
|
||||
better than that
|
||||
achieved by more conventional LZ77/LZ78-based compressors,
|
||||
and approaches the performance of the PPM family of statistical
|
||||
compressors.
|
||||
compresses files using the Burrows-Wheeler block sorting
|
||||
text compression algorithm, and Huffman coding. Compression is
|
||||
generally considerably better than that achieved by more conventional
|
||||
LZ77/LZ78-based compressors, and approaches the performance of the PPM
|
||||
family of statistical compressors.
|
||||
|
||||
The command-line options are deliberately very similar to
|
||||
those of
|
||||
.I GNU Gzip,
|
||||
.I GNU gzip,
|
||||
but they are not identical.
|
||||
|
||||
.I bzip2
|
||||
expects a list of file names to accompany the command-line flags.
|
||||
Each file is replaced by a compressed version of itself,
|
||||
with the name "original_name.bz2".
|
||||
Each compressed file has the same modification date and permissions
|
||||
as the corresponding original, so that these properties can be
|
||||
correctly restored at decompression time. File name handling is
|
||||
naive in the sense that there is no mechanism for preserving
|
||||
original file names, permissions and dates in filesystems
|
||||
which lack these concepts, or have serious file name length
|
||||
restrictions, such as MS-DOS.
|
||||
expects a list of file names to accompany the
|
||||
command-line flags. Each file is replaced by a compressed version of
|
||||
itself, with the name "original_name.bz2".
|
||||
Each compressed file
|
||||
has the same modification date, permissions, and, when possible,
|
||||
ownership as the corresponding original, so that these properties can
|
||||
be correctly restored at decompression time. File name handling is
|
||||
naive in the sense that there is no mechanism for preserving original
|
||||
file names, permissions, ownerships or dates in filesystems which lack
|
||||
these concepts, or have serious file name length restrictions, such as
|
||||
MS-DOS.
|
||||
|
||||
.I bzip2
|
||||
and
|
||||
.I bunzip2
|
||||
will by default not overwrite existing files;
|
||||
if you want this to happen, specify the \-f flag.
|
||||
will by default not overwrite existing
|
||||
files. If you want this to happen, specify the \-f flag.
|
||||
|
||||
If no file names are specified,
|
||||
.I bzip2
|
||||
compresses from standard input to standard output.
|
||||
In this case,
|
||||
compresses from standard
|
||||
input to standard output. In this case,
|
||||
.I bzip2
|
||||
will decline to write compressed output to a terminal, as
|
||||
this would be entirely incomprehensible and therefore pointless.
|
||||
will decline to
|
||||
write compressed output to a terminal, as this would be entirely
|
||||
incomprehensible and therefore pointless.
|
||||
|
||||
.I bunzip2
|
||||
(or
|
||||
.I bzip2 \-d
|
||||
) decompresses and restores all specified files whose names
|
||||
end in ".bz2".
|
||||
Files without this suffix are ignored.
|
||||
Again, supplying no filenames
|
||||
causes decompression from standard input to standard output.
|
||||
.I bzip2 \-d)
|
||||
decompresses all
|
||||
specified files. Files which were not created by
|
||||
.I bzip2
|
||||
will be detected and ignored, and a warning issued.
|
||||
.I bzip2
|
||||
attempts to guess the filename for the decompressed file
|
||||
from that of the compressed file as follows:
|
||||
|
||||
filename.bz2 becomes filename
|
||||
filename.bz becomes filename
|
||||
filename.tbz2 becomes filename.tar
|
||||
filename.tbz becomes filename.tar
|
||||
anyothername becomes anyothername.out
|
||||
|
||||
If the file does not end in one of the recognised endings,
|
||||
.I .bz2,
|
||||
.I .bz,
|
||||
.I .tbz2
|
||||
or
|
||||
.I .tbz,
|
||||
.I bzip2
|
||||
complains that it cannot
|
||||
guess the name of the original file, and uses the original name
|
||||
with
|
||||
.I .out
|
||||
appended.
|
||||
|
||||
As with compression, supplying no
|
||||
filenames causes decompression from
|
||||
standard input to standard output.
|
||||
|
||||
.I bunzip2
|
||||
will correctly decompress a file which is the concatenation
|
||||
of two or more compressed files. The result is the concatenation
|
||||
of the corresponding uncompressed files. Integrity testing
|
||||
(\-t) of concatenated compressed files is also supported.
|
||||
will correctly decompress a file which is the
|
||||
concatenation of two or more compressed files. The result is the
|
||||
concatenation of the corresponding uncompressed files. Integrity
|
||||
testing (\-t)
|
||||
of concatenated
|
||||
compressed files is also supported.
|
||||
|
||||
You can also compress or decompress files to
|
||||
the standard output by giving the \-c flag.
|
||||
Multiple files may be compressed and decompressed like this.
|
||||
The resulting outputs are fed sequentially to stdout.
|
||||
Compression of multiple files in this manner generates
|
||||
a stream containing multiple compressed file representations.
|
||||
Such a stream can be decompressed correctly only by
|
||||
You can also compress or decompress files to the standard output by
|
||||
giving the \-c flag. Multiple files may be compressed and
|
||||
decompressed like this. The resulting outputs are fed sequentially to
|
||||
stdout. Compression of multiple files
|
||||
in this manner generates a stream
|
||||
containing multiple compressed file representations. Such a stream
|
||||
can be decompressed correctly only by
|
||||
.I bzip2
|
||||
version 0.9.0 or later. Earlier versions of
|
||||
version 0.9.0 or
|
||||
later. Earlier versions of
|
||||
.I bzip2
|
||||
will stop after decompressing the first file in the stream.
|
||||
will stop after decompressing
|
||||
the first file in the stream.
|
||||
|
||||
.I bzcat
|
||||
(or
|
||||
.I bzip2 \-dc
|
||||
) decompresses all specified files to the standard output.
|
||||
.I bzip2 -dc)
|
||||
decompresses all specified files to
|
||||
the standard output.
|
||||
|
||||
Compression is always performed, even if the compressed file is
|
||||
slightly larger than the original. Files of less than about
|
||||
one hundred bytes tend to get larger, since the compression
|
||||
mechanism has a constant overhead in the region of 50 bytes.
|
||||
Random data (including the output of most file compressors)
|
||||
is coded at about 8.05 bits per byte, giving an expansion of
|
||||
around 0.5%.
|
||||
.I bzip2
|
||||
will read arguments from the environment variables
|
||||
.I BZIP2
|
||||
and
|
||||
.I BZIP,
|
||||
in that order, and will process them
|
||||
before any arguments read from the command line. This gives a
|
||||
convenient way to supply default arguments.
|
||||
|
||||
Compression is always performed, even if the compressed
|
||||
file is slightly
|
||||
larger than the original. Files of less than about one hundred bytes
|
||||
tend to get larger, since the compression mechanism has a constant
|
||||
overhead in the region of 50 bytes. Random data (including the output
|
||||
of most file compressors) is coded at about 8.05 bits per byte, giving
|
||||
an expansion of around 0.5%.
|
||||
|
||||
As a self-check for your protection,
|
||||
.I
|
||||
bzip2
|
||||
uses 32-bit CRCs to
|
||||
make sure that the decompressed version of a file is identical to the
|
||||
original. This guards against corruption of the compressed data, and
|
||||
against undetected bugs in
|
||||
.I bzip2
|
||||
uses 32-bit CRCs to make sure that the decompressed
|
||||
version of a file is identical to the original.
|
||||
This guards against corruption of the compressed data,
|
||||
and against undetected bugs in
|
||||
.I bzip2
|
||||
(hopefully very unlikely).
|
||||
The chances of data corruption going undetected is
|
||||
microscopic, about one chance in four billion
|
||||
for each file processed. Be aware, though, that the check
|
||||
occurs upon decompression, so it can only tell you that
|
||||
that something is wrong. It can't help you recover the
|
||||
original uncompressed data.
|
||||
You can use
|
||||
(hopefully very unlikely). The
|
||||
chances of data corruption going undetected is microscopic, about one
|
||||
chance in four billion for each file processed. Be aware, though, that
|
||||
the check occurs upon decompression, so it can only tell you that
|
||||
something is wrong. It can't help you
|
||||
recover the original uncompressed
|
||||
data. You can use
|
||||
.I bzip2recover
|
||||
to try to recover data from damaged files.
|
||||
to try to recover data from
|
||||
damaged files.
|
||||
|
||||
Return values:
|
||||
0 for a normal exit,
|
||||
1 for environmental
|
||||
problems (file not found, invalid flags, I/O errors, &c),
|
||||
2 to indicate a corrupt compressed file,
|
||||
3 for an internal consistency error (eg, bug) which caused
|
||||
Return values: 0 for a normal exit, 1 for environmental problems (file
|
||||
not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt
|
||||
compressed file, 3 for an internal consistency error (eg, bug) which
|
||||
caused
|
||||
.I bzip2
|
||||
to panic.
|
||||
|
||||
.SH MEMORY MANAGEMENT
|
||||
.I Bzip2
|
||||
compresses large files in blocks. The block size affects both the
|
||||
compression ratio achieved, and the amount of memory needed both for
|
||||
compression and decompression. The flags \-1 through \-9
|
||||
specify the block size to be 100,000 bytes through 900,000 bytes
|
||||
(the default) respectively. At decompression-time, the block size used for
|
||||
compression is read from the header of the compressed file, and
|
||||
.I bunzip2
|
||||
then allocates itself just enough memory to decompress the file.
|
||||
Since block sizes are stored in compressed files, it follows that the flags
|
||||
\-1 to \-9
|
||||
are irrelevant to and so ignored during decompression.
|
||||
Compression and decompression requirements, in bytes, can be estimated as:
|
||||
|
||||
Compression: 400k + ( 7 x block size )
|
||||
|
||||
Decompression: 100k + ( 4 x block size ), or
|
||||
.br
|
||||
100k + ( 2.5 x block size )
|
||||
|
||||
Larger block sizes give rapidly diminishing marginal returns; most
|
||||
of the
|
||||
compression comes from the first two or three hundred k of block size,
|
||||
a fact worth bearing in mind when using
|
||||
.I bzip2
|
||||
on small machines. It is also important to appreciate that the
|
||||
decompression memory requirement is set at compression-time by the
|
||||
choice of block size.
|
||||
|
||||
For files compressed with the default 900k block size,
|
||||
.I bunzip2
|
||||
will require about 3700 kbytes to decompress.
|
||||
To support decompression of any file on a 4 megabyte machine,
|
||||
.I bunzip2
|
||||
has an option to decompress using approximately half this
|
||||
amount of memory, about 2300 kbytes. Decompression speed is
|
||||
also halved, so you should use this option only where necessary.
|
||||
The relevant flag is \-s.
|
||||
|
||||
In general, try and use the largest block size
|
||||
memory constraints allow, since that maximises the compression
|
||||
achieved. Compression and decompression
|
||||
speed are virtually unaffected by block size.
|
||||
|
||||
Another significant point applies to files which fit in a single
|
||||
block -- that means most files you'd encounter using a large
|
||||
block size. The amount of real memory touched is proportional
|
||||
to the size of the file, since the file is smaller than a block.
|
||||
For example, compressing a file 20,000 bytes long with the flag
|
||||
\-9
|
||||
will cause the compressor to allocate around
|
||||
6700k of memory, but only touch 400k + 20000 * 7 = 540
|
||||
kbytes of it. Similarly, the decompressor will allocate 3700k but
|
||||
only touch 100k + 20000 * 4 = 180 kbytes.
|
||||
|
||||
Here is a table which summarises the maximum memory usage for
|
||||
different block sizes. Also recorded is the total compressed
|
||||
size for 14 files of the Calgary Text Compression Corpus
|
||||
totalling 3,141,622 bytes. This column gives some feel for how
|
||||
compression varies with block size. These figures tend to understate
|
||||
the advantage of larger block sizes for larger files, since the
|
||||
Corpus is dominated by smaller files.
|
||||
|
||||
Compress Decompress Decompress Corpus
|
||||
Flag usage usage -s usage Size
|
||||
|
||||
-1 1100k 500k 350k 914704
|
||||
-2 1800k 900k 600k 877703
|
||||
-3 2500k 1300k 850k 860338
|
||||
-4 3200k 1700k 1100k 846899
|
||||
-5 3900k 2100k 1350k 845160
|
||||
-6 4600k 2500k 1600k 838626
|
||||
-7 5400k 2900k 1850k 834096
|
||||
-8 6000k 3300k 2100k 828642
|
||||
-9 6700k 3700k 2350k 828642
|
||||
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.B \-c --stdout
|
||||
Compress or decompress to standard output. \-c will decompress
|
||||
multiple files to stdout, but will only compress a single file to
|
||||
stdout.
|
||||
Compress or decompress to standard output.
|
||||
.TP
|
||||
.B \-d --decompress
|
||||
Force decompression.
|
||||
@ -229,15 +188,16 @@ Force decompression.
|
||||
.I bunzip2
|
||||
and
|
||||
.I bzcat
|
||||
are really the same program, and the decision about what actions
|
||||
to take is done on the basis of which name is
|
||||
used. This flag overrides that mechanism, and forces
|
||||
are
|
||||
really the same program, and the decision about what actions to take is
|
||||
done on the basis of which name is used. This flag overrides that
|
||||
mechanism, and forces
|
||||
.I bzip2
|
||||
to decompress.
|
||||
.TP
|
||||
.B \-z --compress
|
||||
The complement to \-d: forces compression, regardless of the invokation
|
||||
name.
|
||||
The complement to \-d: forces compression, regardless of the
|
||||
invokation name.
|
||||
.TP
|
||||
.B \-t --test
|
||||
Check integrity of the specified file(s), but don't decompress them.
|
||||
@ -246,24 +206,30 @@ This really performs a trial decompression and throws away the result.
|
||||
.B \-f --force
|
||||
Force overwrite of output files. Normally,
|
||||
.I bzip2
|
||||
will not overwrite existing output files.
|
||||
will not overwrite
|
||||
existing output files. Also forces
|
||||
.I bzip2
|
||||
to break hard links
|
||||
to files, which it otherwise wouldn't do.
|
||||
.TP
|
||||
.B \-k --keep
|
||||
Keep (don't delete) input files during compression or decompression.
|
||||
Keep (don't delete) input files during compression
|
||||
or decompression.
|
||||
.TP
|
||||
.B \-s --small
|
||||
Reduce memory usage, for compression, decompression and
|
||||
testing.
|
||||
Files are decompressed and tested using a modified algorithm which only
|
||||
Reduce memory usage, for compression, decompression and testing. Files
|
||||
are decompressed and tested using a modified algorithm which only
|
||||
requires 2.5 bytes per block byte. This means any file can be
|
||||
decompressed in 2300k of memory, albeit at about half the normal
|
||||
speed.
|
||||
decompressed in 2300k of memory, albeit at about half the normal speed.
|
||||
|
||||
During compression, -s selects a block size of 200k, which limits
|
||||
memory use to around the same figure, at the expense of your
|
||||
compression ratio. In short, if your machine is low on memory
|
||||
(8 megabytes or less), use -s for everything. See
|
||||
MEMORY MANAGEMENT above.
|
||||
During compression, \-s selects a block size of 200k, which limits
|
||||
memory use to around the same figure, at the expense of your compression
|
||||
ratio. In short, if your machine is low on memory (8 megabytes or
|
||||
less), use \-s for everything. See MEMORY MANAGEMENT below.
|
||||
.TP
|
||||
.B \-q --quiet
|
||||
Suppress non-essential warning messages. Messages pertaining to
|
||||
I/O errors and other critical events will not be suppressed.
|
||||
.TP
|
||||
.B \-v --verbose
|
||||
Verbose mode -- show the compression ratio for each file processed.
|
||||
@ -274,121 +240,177 @@ information which is primarily of interest for diagnostic purposes.
|
||||
Display the software version, license terms and conditions.
|
||||
.TP
|
||||
.B \-1 to \-9
|
||||
Set the block size to 100 k, 200 k .. 900 k when
|
||||
compressing. Has no effect when decompressing.
|
||||
See MEMORY MANAGEMENT above.
|
||||
Set the block size to 100 k, 200 k .. 900 k when compressing. Has no
|
||||
effect when decompressing. See MEMORY MANAGEMENT below.
|
||||
.TP
|
||||
.B \--repetitive-fast
|
||||
.I bzip2
|
||||
injects some small pseudo-random variations
|
||||
into very repetitive blocks to limit
|
||||
worst-case performance during compression.
|
||||
If sorting runs into difficulties, the block
|
||||
is randomised, and sorting is restarted.
|
||||
Very roughly,
|
||||
.I bzip2
|
||||
persists for three times as long as a well-behaved input
|
||||
would take before resorting to randomisation.
|
||||
This flag makes it give up much sooner.
|
||||
.B \--
|
||||
Treats all subsequent arguments as file names, even if they start
|
||||
with a dash. This is so you can handle files with names beginning
|
||||
with a dash, for example: bzip2 \-- \-myfilename.
|
||||
.TP
|
||||
.B \--repetitive-fast --repetitive-best
|
||||
These flags are redundant in versions 0.9.5 and above. They provided
|
||||
some coarse control over the behaviour of the sorting algorithm in
|
||||
earlier versions, which was sometimes useful. 0.9.5 and above have an
|
||||
improved algorithm which renders these flags irrelevant.
|
||||
|
||||
.TP
|
||||
.B \--repetitive-best
|
||||
Opposite of \--repetitive-fast; try a lot harder before
|
||||
resorting to randomisation.
|
||||
.SH MEMORY MANAGEMENT
|
||||
.I bzip2
|
||||
compresses large files in blocks. The block size affects
|
||||
both the compression ratio achieved, and the amount of memory needed for
|
||||
compression and decompression. The flags \-1 through \-9
|
||||
specify the block size to be 100,000 bytes through 900,000 bytes (the
|
||||
default) respectively. At decompression time, the block size used for
|
||||
compression is read from the header of the compressed file, and
|
||||
.I bunzip2
|
||||
then allocates itself just enough memory to decompress
|
||||
the file. Since block sizes are stored in compressed files, it follows
|
||||
that the flags \-1 to \-9 are irrelevant to and so ignored
|
||||
during decompression.
|
||||
|
||||
Compression and decompression requirements,
|
||||
in bytes, can be estimated as:
|
||||
|
||||
Compression: 400k + ( 8 x block size )
|
||||
|
||||
Decompression: 100k + ( 4 x block size ), or
|
||||
100k + ( 2.5 x block size )
|
||||
|
||||
Larger block sizes give rapidly diminishing marginal returns. Most of
|
||||
the compression comes from the first two or three hundred k of block
|
||||
size, a fact worth bearing in mind when using
|
||||
.I bzip2
|
||||
on small machines.
|
||||
It is also important to appreciate that the decompression memory
|
||||
requirement is set at compression time by the choice of block size.
|
||||
|
||||
For files compressed with the default 900k block size,
|
||||
.I bunzip2
|
||||
will require about 3700 kbytes to decompress. To support decompression
|
||||
of any file on a 4 megabyte machine,
|
||||
.I bunzip2
|
||||
has an option to
|
||||
decompress using approximately half this amount of memory, about 2300
|
||||
kbytes. Decompression speed is also halved, so you should use this
|
||||
option only where necessary. The relevant flag is -s.
|
||||
|
||||
In general, try and use the largest block size memory constraints allow,
|
||||
since that maximises the compression achieved. Compression and
|
||||
decompression speed are virtually unaffected by block size.
|
||||
|
||||
Another significant point applies to files which fit in a single block
|
||||
-- that means most files you'd encounter using a large block size. The
|
||||
amount of real memory touched is proportional to the size of the file,
|
||||
since the file is smaller than a block. For example, compressing a file
|
||||
20,000 bytes long with the flag -9 will cause the compressor to
|
||||
allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560
|
||||
kbytes of it. Similarly, the decompressor will allocate 3700k but only
|
||||
touch 100k + 20000 * 4 = 180 kbytes.
|
||||
|
||||
Here is a table which summarises the maximum memory usage for different
|
||||
block sizes. Also recorded is the total compressed size for 14 files of
|
||||
the Calgary Text Compression Corpus totalling 3,141,622 bytes. This
|
||||
column gives some feel for how compression varies with block size.
|
||||
These figures tend to understate the advantage of larger block sizes for
|
||||
larger files, since the Corpus is dominated by smaller files.
|
||||
|
||||
Compress Decompress Decompress Corpus
|
||||
Flag usage usage -s usage Size
|
||||
|
||||
-1 1200k 500k 350k 914704
|
||||
-2 2000k 900k 600k 877703
|
||||
-3 2800k 1300k 850k 860338
|
||||
-4 3600k 1700k 1100k 846899
|
||||
-5 4400k 2100k 1350k 845160
|
||||
-6 5200k 2500k 1600k 838626
|
||||
-7 6100k 2900k 1850k 834096
|
||||
-8 6800k 3300k 2100k 828642
|
||||
-9 7600k 3700k 2350k 828642
|
||||
|
||||
.SH RECOVERING DATA FROM DAMAGED FILES
|
||||
.I bzip2
|
||||
compresses files in blocks, usually 900kbytes long.
|
||||
Each block is handled independently. If a media or
|
||||
transmission error causes a multi-block .bz2
|
||||
file to become damaged,
|
||||
it may be possible to recover data from the undamaged blocks
|
||||
in the file.
|
||||
compresses files in blocks, usually 900kbytes long. Each
|
||||
block is handled independently. If a media or transmission error causes
|
||||
a multi-block .bz2
|
||||
file to become damaged, it may be possible to
|
||||
recover data from the undamaged blocks in the file.
|
||||
|
||||
The compressed representation of each block is delimited by
|
||||
a 48-bit pattern, which makes it possible to find the block
|
||||
boundaries with reasonable certainty. Each block also carries
|
||||
its own 32-bit CRC, so damaged blocks can be
|
||||
distinguished from undamaged ones.
|
||||
The compressed representation of each block is delimited by a 48-bit
|
||||
pattern, which makes it possible to find the block boundaries with
|
||||
reasonable certainty. Each block also carries its own 32-bit CRC, so
|
||||
damaged blocks can be distinguished from undamaged ones.
|
||||
|
||||
.I bzip2recover
|
||||
is a simple program whose purpose is to search for
|
||||
blocks in .bz2 files, and write each block out into
|
||||
its own .bz2 file. You can then use
|
||||
.I bzip2 -t
|
||||
to test the integrity of the resulting files,
|
||||
and decompress those which are undamaged.
|
||||
blocks in .bz2 files, and write each block out into its own .bz2
|
||||
file. You can then use
|
||||
.I bzip2
|
||||
\-t
|
||||
to test the
|
||||
integrity of the resulting files, and decompress those which are
|
||||
undamaged.
|
||||
|
||||
.I bzip2recover
|
||||
takes a single argument, the name of the damaged file,
|
||||
and writes a number of files "rec0001file.bz2", "rec0002file.bz2",
|
||||
etc, containing the extracted blocks. The output filenames
|
||||
are designed so that the use of wildcards in subsequent processing
|
||||
-- for example, "bzip2 -dc rec*file.bz2 > recovered_data" --
|
||||
lists the files in the "right" order.
|
||||
and writes a number of files "rec0001file.bz2",
|
||||
"rec0002file.bz2", etc, containing the extracted blocks.
|
||||
The output filenames are designed so that the use of
|
||||
wildcards in subsequent processing -- for example,
|
||||
"bzip2 -dc rec*file.bz2 > recovered_data" -- lists the files in
|
||||
the correct order.
|
||||
|
||||
.I bzip2recover
|
||||
should be of most use dealing with large .bz2 files, as
|
||||
these will contain many blocks. It is clearly futile to
|
||||
use it on damaged single-block files, since a damaged
|
||||
block cannot be recovered. If you wish to minimise
|
||||
any potential data loss through media or transmission
|
||||
errors, you might consider compressing with a smaller
|
||||
should be of most use dealing with large .bz2
|
||||
files, as these will contain many blocks. It is clearly
|
||||
futile to use it on damaged single-block files, since a
|
||||
damaged block cannot be recovered. If you wish to minimise
|
||||
any potential data loss through media or transmission errors,
|
||||
you might consider compressing with a smaller
|
||||
block size.
|
||||
|
||||
.SH PERFORMANCE NOTES
|
||||
The sorting phase of compression gathers together similar strings
|
||||
in the file. Because of this, files containing very long
|
||||
runs of repeated symbols, like "aabaabaabaab ..." (repeated
|
||||
several hundred times) may compress extraordinarily slowly.
|
||||
You can use the
|
||||
\-vvvvv
|
||||
option to monitor progress in great detail, if you want.
|
||||
Decompression speed is unaffected.
|
||||
The sorting phase of compression gathers together similar strings in the
|
||||
file. Because of this, files containing very long runs of repeated
|
||||
symbols, like "aabaabaabaab ..." (repeated several hundred times) may
|
||||
compress more slowly than normal. Versions 0.9.5 and above fare much
|
||||
better than previous versions in this respect. The ratio between
|
||||
worst-case and average-case compression time is in the region of 10:1.
|
||||
For previous versions, this figure was more like 100:1. You can use the
|
||||
\-vvvv option to monitor progress in great detail, if you want.
|
||||
|
||||
Such pathological cases
|
||||
seem rare in practice, appearing mostly in artificially-constructed
|
||||
test files, and in low-level disk images. It may be inadvisable to
|
||||
use
|
||||
.I bzip2
|
||||
to compress the latter.
|
||||
If you do get a file which causes severe slowness in compression,
|
||||
try making the block size as small as possible, with flag \-1.
|
||||
Decompression speed is unaffected by these phenomena.
|
||||
|
||||
.I bzip2
|
||||
usually allocates several megabytes of memory to operate in,
|
||||
and then charges all over it in a fairly random fashion. This
|
||||
means that performance, both for compressing and decompressing,
|
||||
is largely determined by the speed
|
||||
at which your machine can service cache misses.
|
||||
Because of this, small changes
|
||||
to the code to reduce the miss rate have been observed to give
|
||||
disproportionately large performance improvements.
|
||||
usually allocates several megabytes of memory to operate
|
||||
in, and then charges all over it in a fairly random fashion. This means
|
||||
that performance, both for compressing and decompressing, is largely
|
||||
determined by the speed at which your machine can service cache misses.
|
||||
Because of this, small changes to the code to reduce the miss rate have
|
||||
been observed to give disproportionately large performance improvements.
|
||||
I imagine
|
||||
.I bzip2
|
||||
will perform best on machines with very large caches.
|
||||
|
||||
.SH CAVEATS
|
||||
I/O error messages are not as helpful as they could be.
|
||||
.I Bzip2
|
||||
tries hard to detect I/O errors and exit cleanly, but the
|
||||
details of what the problem is sometimes seem rather misleading.
|
||||
.I bzip2
|
||||
tries hard to detect I/O errors and exit cleanly, but the details of
|
||||
what the problem is sometimes seem rather misleading.
|
||||
|
||||
This manual page pertains to version 0.9.0 of
|
||||
This manual page pertains to version 0.9.5 of
|
||||
.I bzip2.
|
||||
Compressed data created by this version is entirely forwards and
|
||||
backwards compatible with the previous public release, version 0.1pl2,
|
||||
but with the following exception: 0.9.0 can correctly decompress
|
||||
multiple concatenated compressed files. 0.1pl2 cannot do this; it
|
||||
will stop after decompressing just the first file in the stream.
|
||||
|
||||
Wildcard expansion for Windows 95 and NT
|
||||
is flaky.
|
||||
Compressed
|
||||
data created by this version is entirely forwards and backwards
|
||||
compatible with the previous public releases, versions 0.1pl2 and 0.9.0,
|
||||
but with the following exception: 0.9.0 and above can correctly
|
||||
decompress multiple concatenated compressed files. 0.1pl2 cannot do
|
||||
this; it will stop after decompressing just the first file in the
|
||||
stream.
|
||||
|
||||
.I bzip2recover
|
||||
uses 32-bit integers to represent bit positions in
|
||||
compressed files, so it cannot handle compressed files
|
||||
more than 512 megabytes long. This could easily be fixed.
|
||||
compressed files, so it cannot handle compressed files more than 512
|
||||
megabytes long. This could easily be fixed.
|
||||
|
||||
.SH AUTHOR
|
||||
Julian Seward, jseward@acm.org.
|
||||
@ -397,23 +419,19 @@ http://www.muraroa.demon.co.uk
|
||||
|
||||
The ideas embodied in
|
||||
.I bzip2
|
||||
are due to (at least) the following people:
|
||||
Michael Burrows and David Wheeler (for the block sorting
|
||||
transformation), David Wheeler (again, for the Huffman coder),
|
||||
Peter Fenwick (for the structured coding model in the original
|
||||
are due to (at least) the following
|
||||
people: Michael Burrows and David Wheeler (for the block sorting
|
||||
transformation), David Wheeler (again, for the Huffman coder), Peter
|
||||
Fenwick (for the structured coding model in the original
|
||||
.I bzip,
|
||||
and many refinements),
|
||||
and
|
||||
Alistair Moffat, Radford Neal and Ian Witten (for the arithmetic
|
||||
coder in the original
|
||||
and many refinements), and Alistair Moffat, Radford Neal and Ian Witten
|
||||
(for the arithmetic coder in the original
|
||||
.I bzip).
|
||||
I am much indebted for their help, support and advice.
|
||||
See the manual in the source distribution for pointers to
|
||||
sources of documentation.
|
||||
Christian von Roques encouraged me to look for faster
|
||||
sorting algorithms, so as to speed up compression.
|
||||
Bela Lubkin encouraged me to improve the worst-case
|
||||
compression performance.
|
||||
Many people sent patches, helped with portability problems,
|
||||
lent machines, gave advice and were generally helpful.
|
||||
|
||||
I am much
|
||||
indebted for their help, support and advice. See the manual in the
|
||||
source distribution for pointers to sources of documentation. Christian
|
||||
von Roques encouraged me to look for faster sorting algorithms, so as to
|
||||
speed up compression. Bela Lubkin encouraged me to improve the
|
||||
worst-case compression performance. Many people sent patches, helped
|
||||
with portability problems, lent machines, gave advice and were generally
|
||||
helpful.
|
||||
|
@ -1,24 +1,20 @@
|
||||
|
||||
|
||||
|
||||
bzip2(1) bzip2(1)
|
||||
|
||||
|
||||
NNAAMMEE
|
||||
bzip2, bunzip2 - a block-sorting file compressor, v0.9.0
|
||||
bzip2, bunzip2 - a block-sorting file compressor, v0.9.5
|
||||
bzcat - decompresses files to stdout
|
||||
bzip2recover - recovers data from damaged bzip2 files
|
||||
|
||||
|
||||
SSYYNNOOPPSSIISS
|
||||
bbzziipp22 [ --ccddffkkssttvvzzVVLL112233445566778899 ] [ _f_i_l_e_n_a_m_e_s _._._. ]
|
||||
bbzziipp22 [ --ccddffkkqqssttvvzzVVLL112233445566778899 ] [ _f_i_l_e_n_a_m_e_s _._._. ]
|
||||
bbuunnzziipp22 [ --ffkkvvssVVLL ] [ _f_i_l_e_n_a_m_e_s _._._. ]
|
||||
bbzzccaatt [ --ss ] [ _f_i_l_e_n_a_m_e_s _._._. ]
|
||||
bbzziipp22rreeccoovveerr _f_i_l_e_n_a_m_e
|
||||
|
||||
|
||||
DDEESSCCRRIIPPTTIIOONN
|
||||
_b_z_i_p_2 compresses files using the Burrows-Wheeler block-
|
||||
_b_z_i_p_2 compresses files using the Burrows-Wheeler block
|
||||
sorting text compression algorithm, and Huffman coding.
|
||||
Compression is generally considerably better than that
|
||||
achieved by more conventional LZ77/LZ78-based compressors,
|
||||
@ -26,22 +22,22 @@ DDEESSCCRRIIPPTTIIOONN
|
||||
tistical compressors.
|
||||
|
||||
The command-line options are deliberately very similar to
|
||||
those of _G_N_U _G_z_i_p_, but they are not identical.
|
||||
those of _G_N_U _g_z_i_p_, but they are not identical.
|
||||
|
||||
_b_z_i_p_2 expects a list of file names to accompany the com-
|
||||
mand-line flags. Each file is replaced by a compressed
|
||||
version of itself, with the name "original_name.bz2".
|
||||
Each compressed file has the same modification date and
|
||||
permissions as the corresponding original, so that these
|
||||
properties can be correctly restored at decompression
|
||||
time. File name handling is naive in the sense that there
|
||||
is no mechanism for preserving original file names, per-
|
||||
missions and dates in filesystems which lack these con-
|
||||
cepts, or have serious file name length restrictions, such
|
||||
as MS-DOS.
|
||||
Each compressed file has the same modification date, per-
|
||||
missions, and, when possible, ownership as the correspond-
|
||||
ing original, so that these properties can be correctly
|
||||
restored at decompression time. File name handling is
|
||||
naive in the sense that there is no mechanism for preserv-
|
||||
ing original file names, permissions, ownerships or dates
|
||||
in filesystems which lack these concepts, or have serious
|
||||
file name length restrictions, such as MS-DOS.
|
||||
|
||||
_b_z_i_p_2 and _b_u_n_z_i_p_2 will by default not overwrite existing
|
||||
files; if you want this to happen, specify the -f flag.
|
||||
files. If you want this to happen, specify the -f flag.
|
||||
|
||||
If no file names are specified, _b_z_i_p_2 compresses from
|
||||
standard input to standard output. In this case, _b_z_i_p_2
|
||||
@ -49,42 +45,50 @@ DDEESSCCRRIIPPTTIIOONN
|
||||
this would be entirely incomprehensible and therefore
|
||||
pointless.
|
||||
|
||||
_b_u_n_z_i_p_2 (or _b_z_i_p_2 _-_d ) decompresses and restores all spec-
|
||||
ified files whose names end in ".bz2". Files without this
|
||||
suffix are ignored. Again, supplying no filenames causes
|
||||
decompression from standard input to standard output.
|
||||
_b_u_n_z_i_p_2 (or _b_z_i_p_2 _-_d_) decompresses all specified files.
|
||||
Files which were not created by _b_z_i_p_2 will be detected and
|
||||
ignored, and a warning issued. _b_z_i_p_2 attempts to guess
|
||||
the filename for the decompressed file from that of the
|
||||
compressed file as follows:
|
||||
|
||||
filename.bz2 becomes filename
|
||||
filename.bz becomes filename
|
||||
filename.tbz2 becomes filename.tar
|
||||
filename.tbz becomes filename.tar
|
||||
anyothername becomes anyothername.out
|
||||
|
||||
If the file does not end in one of the recognised endings,
|
||||
_._b_z_2_, _._b_z_, _._t_b_z_2 or _._t_b_z_, _b_z_i_p_2 complains that it cannot
|
||||
guess the name of the original file, and uses the original
|
||||
name with _._o_u_t appended.
|
||||
|
||||
As with compression, supplying no filenames causes decom-
|
||||
pression from standard input to standard output.
|
||||
|
||||
_b_u_n_z_i_p_2 will correctly decompress a file which is the con-
|
||||
catenation of two or more compressed files. The result is
|
||||
the concatenation of the corresponding uncompressed files.
|
||||
Integrity testing (-t) of concatenated compressed files is
|
||||
|
||||
|
||||
|
||||
1
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
bzip2(1) bzip2(1)
|
||||
|
||||
|
||||
also supported.
|
||||
|
||||
You can also compress or decompress files to the standard
|
||||
output by giving the -c flag. Multiple files may be com-
|
||||
You can also compress or decompress files to the standard
|
||||
output by giving the -c flag. Multiple files may be com-
|
||||
pressed and decompressed like this. The resulting outputs
|
||||
are fed sequentially to stdout. Compression of multiple
|
||||
files in this manner generates a stream containing multi-
|
||||
are fed sequentially to stdout. Compression of multiple
|
||||
files in this manner generates a stream containing multi-
|
||||
ple compressed file representations. Such a stream can be
|
||||
decompressed correctly only by _b_z_i_p_2 version 0.9.0 or
|
||||
later. Earlier versions of _b_z_i_p_2 will stop after decom-
|
||||
decompressed correctly only by _b_z_i_p_2 version 0.9.0 or
|
||||
later. Earlier versions of _b_z_i_p_2 will stop after decom-
|
||||
pressing the first file in the stream.
|
||||
|
||||
_b_z_c_a_t (or _b_z_i_p_2 _-_d_c ) decompresses all specified files to
|
||||
_b_z_c_a_t (or _b_z_i_p_2 _-_d_c_) decompresses all specified files to
|
||||
the standard output.
|
||||
|
||||
_b_z_i_p_2 will read arguments from the environment variables
|
||||
_B_Z_I_P_2 and _B_Z_I_P_, in that order, and will process them
|
||||
before any arguments read from the command line. This
|
||||
gives a convenient way to supply default arguments.
|
||||
|
||||
Compression is always performed, even if the compressed
|
||||
file is slightly larger than the original. Files of less
|
||||
than about one hundred bytes tend to get larger, since the
|
||||
@ -101,121 +105,19 @@ bzip2(1) bzip2(1)
|
||||
corruption going undetected is microscopic, about one
|
||||
chance in four billion for each file processed. Be aware,
|
||||
though, that the check occurs upon decompression, so it
|
||||
can only tell you that that something is wrong. It can't
|
||||
help you recover the original uncompressed data. You can
|
||||
use _b_z_i_p_2_r_e_c_o_v_e_r to try to recover data from damaged
|
||||
files.
|
||||
can only tell you that something is wrong. It can't help
|
||||
you recover the original uncompressed data. You can use
|
||||
_b_z_i_p_2_r_e_c_o_v_e_r to try to recover data from damaged files.
|
||||
|
||||
Return values: 0 for a normal exit, 1 for environmental
|
||||
problems (file not found, invalid flags, I/O errors, &c),
|
||||
Return values: 0 for a normal exit, 1 for environmental
|
||||
problems (file not found, invalid flags, I/O errors, &c),
|
||||
2 to indicate a corrupt compressed file, 3 for an internal
|
||||
consistency error (eg, bug) which caused _b_z_i_p_2 to panic.
|
||||
|
||||
|
||||
MMEEMMOORRYY MMAANNAAGGEEMMEENNTT
|
||||
_B_z_i_p_2 compresses large files in blocks. The block size
|
||||
affects both the compression ratio achieved, and the
|
||||
amount of memory needed both for compression and decom-
|
||||
pression. The flags -1 through -9 specify the block size
|
||||
to be 100,000 bytes through 900,000 bytes (the default)
|
||||
respectively. At decompression-time, the block size used
|
||||
for compression is read from the header of the compressed
|
||||
file, and _b_u_n_z_i_p_2 then allocates itself just enough memory
|
||||
to decompress the file. Since block sizes are stored in
|
||||
compressed files, it follows that the flags -1 to -9 are
|
||||
irrelevant to and so ignored during decompression.
|
||||
|
||||
|
||||
|
||||
2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
bzip2(1) bzip2(1)
|
||||
|
||||
|
||||
Compression and decompression requirements, in bytes, can
|
||||
be estimated as:
|
||||
|
||||
Compression: 400k + ( 7 x block size )
|
||||
|
||||
Decompression: 100k + ( 4 x block size ), or
|
||||
100k + ( 2.5 x block size )
|
||||
|
||||
Larger block sizes give rapidly diminishing marginal
|
||||
returns; most of the compression comes from the first two
|
||||
or three hundred k of block size, a fact worth bearing in
|
||||
mind when using _b_z_i_p_2 on small machines. It is also
|
||||
important to appreciate that the decompression memory
|
||||
requirement is set at compression-time by the choice of
|
||||
block size.
|
||||
|
||||
For files compressed with the default 900k block size,
|
||||
_b_u_n_z_i_p_2 will require about 3700 kbytes to decompress. To
|
||||
support decompression of any file on a 4 megabyte machine,
|
||||
_b_u_n_z_i_p_2 has an option to decompress using approximately
|
||||
half this amount of memory, about 2300 kbytes. Decompres-
|
||||
sion speed is also halved, so you should use this option
|
||||
only where necessary. The relevant flag is -s.
|
||||
|
||||
In general, try and use the largest block size memory con-
|
||||
straints allow, since that maximises the compression
|
||||
achieved. Compression and decompression speed are virtu-
|
||||
ally unaffected by block size.
|
||||
|
||||
Another significant point applies to files which fit in a
|
||||
single block -- that means most files you'd encounter
|
||||
using a large block size. The amount of real memory
|
||||
touched is proportional to the size of the file, since the
|
||||
file is smaller than a block. For example, compressing a
|
||||
file 20,000 bytes long with the flag -9 will cause the
|
||||
compressor to allocate around 6700k of memory, but only
|
||||
touch 400k + 20000 * 7 = 540 kbytes of it. Similarly, the
|
||||
decompressor will allocate 3700k but only touch 100k +
|
||||
20000 * 4 = 180 kbytes.
|
||||
|
||||
Here is a table which summarises the maximum memory usage
|
||||
for different block sizes. Also recorded is the total
|
||||
compressed size for 14 files of the Calgary Text Compres-
|
||||
sion Corpus totalling 3,141,622 bytes. This column gives
|
||||
some feel for how compression varies with block size.
|
||||
These figures tend to understate the advantage of larger
|
||||
block sizes for larger files, since the Corpus is domi-
|
||||
nated by smaller files.
|
||||
|
||||
Compress Decompress Decompress Corpus
|
||||
Flag usage usage -s usage Size
|
||||
|
||||
-1 1100k 500k 350k 914704
|
||||
-2 1800k 900k 600k 877703
|
||||
|
||||
|
||||
|
||||
3
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
bzip2(1) bzip2(1)
|
||||
|
||||
|
||||
-3 2500k 1300k 850k 860338
|
||||
-4 3200k 1700k 1100k 846899
|
||||
-5 3900k 2100k 1350k 845160
|
||||
-6 4600k 2500k 1600k 838626
|
||||
-7 5400k 2900k 1850k 834096
|
||||
-8 6000k 3300k 2100k 828642
|
||||
-9 6700k 3700k 2350k 828642
|
||||
|
||||
|
||||
OOPPTTIIOONNSS
|
||||
--cc ----ssttddoouutt
|
||||
Compress or decompress to standard output. -c will
|
||||
decompress multiple files to stdout, but will only
|
||||
compress a single file to stdout.
|
||||
Compress or decompress to standard output.
|
||||
|
||||
--dd ----ddeeccoommpprreessss
|
||||
Force decompression. _b_z_i_p_2_, _b_u_n_z_i_p_2 and _b_z_c_a_t are
|
||||
@ -235,7 +137,9 @@ OOPPTTIIOONNSS
|
||||
|
||||
--ff ----ffoorrccee
|
||||
Force overwrite of output files. Normally, _b_z_i_p_2
|
||||
will not overwrite existing output files.
|
||||
will not overwrite existing output files. Also
|
||||
forces _b_z_i_p_2 to break hard links to files, which it
|
||||
otherwise wouldn't do.
|
||||
|
||||
--kk ----kkeeeepp
|
||||
Keep (don't delete) input files during compression
|
||||
@ -254,19 +158,12 @@ OOPPTTIIOONNSS
|
||||
figure, at the expense of your compression ratio.
|
||||
In short, if your machine is low on memory (8
|
||||
megabytes or less), use -s for everything. See
|
||||
MEMORY MANAGEMENT above.
|
||||
|
||||
|
||||
|
||||
|
||||
4
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
bzip2(1) bzip2(1)
|
||||
MEMORY MANAGEMENT below.
|
||||
|
||||
--qq ----qquuiieett
|
||||
Suppress non-essential warning messages. Messages
|
||||
pertaining to I/O errors and other critical events
|
||||
will not be suppressed.
|
||||
|
||||
--vv ----vveerrbboossee
|
||||
Verbose mode -- show the compression ratio for each
|
||||
@ -281,22 +178,96 @@ bzip2(1) bzip2(1)
|
||||
--11 ttoo --99
|
||||
Set the block size to 100 k, 200 k .. 900 k when
|
||||
compressing. Has no effect when decompressing.
|
||||
See MEMORY MANAGEMENT above.
|
||||
See MEMORY MANAGEMENT below.
|
||||
|
||||
----rreeppeettiittiivvee--ffaasstt
|
||||
_b_z_i_p_2 injects some small pseudo-random variations
|
||||
into very repetitive blocks to limit worst-case
|
||||
performance during compression. If sorting runs
|
||||
into difficulties, the block is randomised, and
|
||||
sorting is restarted. Very roughly, _b_z_i_p_2 persists
|
||||
for three times as long as a well-behaved input
|
||||
would take before resorting to randomisation. This
|
||||
flag makes it give up much sooner.
|
||||
---- Treats all subsequent arguments as file names, even
|
||||
if they start with a dash. This is so you can han-
|
||||
dle files with names beginning with a dash, for
|
||||
example: bzip2 -- -myfilename.
|
||||
|
||||
----rreeppeettiittiivvee--ffaasstt ----rreeppeettiittiivvee--bbeesstt
|
||||
These flags are redundant in versions 0.9.5 and
|
||||
above. They provided some coarse control over the
|
||||
behaviour of the sorting algorithm in earlier ver-
|
||||
sions, which was sometimes useful. 0.9.5 and above
|
||||
have an improved algorithm which renders these
|
||||
flags irrelevant.
|
||||
|
||||
|
||||
----rreeppeettiittiivvee--bbeesstt
|
||||
Opposite of --repetitive-fast; try a lot harder
|
||||
before resorting to randomisation.
|
||||
MMEEMMOORRYY MMAANNAAGGEEMMEENNTT
|
||||
_b_z_i_p_2 compresses large files in blocks. The block size
|
||||
affects both the compression ratio achieved, and the
|
||||
amount of memory needed for compression and decompression.
|
||||
The flags -1 through -9 specify the block size to be
|
||||
100,000 bytes through 900,000 bytes (the default) respec-
|
||||
tively. At decompression time, the block size used for
|
||||
compression is read from the header of the compressed
|
||||
file, and _b_u_n_z_i_p_2 then allocates itself just enough memory
|
||||
to decompress the file. Since block sizes are stored in
|
||||
compressed files, it follows that the flags -1 to -9 are
|
||||
irrelevant to and so ignored during decompression.
|
||||
|
||||
Compression and decompression requirements, in bytes, can
|
||||
be estimated as:
|
||||
|
||||
Compression: 400k + ( 8 x block size )
|
||||
|
||||
Decompression: 100k + ( 4 x block size ), or
|
||||
100k + ( 2.5 x block size )
|
||||
|
||||
Larger block sizes give rapidly diminishing marginal
|
||||
returns. Most of the compression comes from the first two
|
||||
or three hundred k of block size, a fact worth bearing in
|
||||
mind when using _b_z_i_p_2 on small machines. It is also
|
||||
important to appreciate that the decompression memory
|
||||
requirement is set at compression time by the choice of
|
||||
block size.
|
||||
|
||||
For files compressed with the default 900k block size,
|
||||
_b_u_n_z_i_p_2 will require about 3700 kbytes to decompress. To
|
||||
support decompression of any file on a 4 megabyte machine,
|
||||
_b_u_n_z_i_p_2 has an option to decompress using approximately
|
||||
half this amount of memory, about 2300 kbytes. Decompres-
|
||||
sion speed is also halved, so you should use this option
|
||||
only where necessary. The relevant flag is -s.
|
||||
|
||||
In general, try and use the largest block size memory con-
|
||||
straints allow, since that maximises the compression
|
||||
achieved. Compression and decompression speed are virtu-
|
||||
ally unaffected by block size.
|
||||
|
||||
Another significant point applies to files which fit in a
|
||||
single block -- that means most files you'd encounter
|
||||
using a large block size. The amount of real memory
|
||||
touched is proportional to the size of the file, since the
|
||||
file is smaller than a block. For example, compressing a
|
||||
file 20,000 bytes long with the flag -9 will cause the
|
||||
compressor to allocate around 7600k of memory, but only
|
||||
touch 400k + 20000 * 8 = 560 kbytes of it. Similarly, the
|
||||
decompressor will allocate 3700k but only touch 100k +
|
||||
20000 * 4 = 180 kbytes.
|
||||
|
||||
Here is a table which summarises the maximum memory usage
|
||||
for different block sizes. Also recorded is the total
|
||||
compressed size for 14 files of the Calgary Text Compres-
|
||||
sion Corpus totalling 3,141,622 bytes. This column gives
|
||||
some feel for how compression varies with block size.
|
||||
These figures tend to understate the advantage of larger
|
||||
block sizes for larger files, since the Corpus is domi-
|
||||
nated by smaller files.
|
||||
|
||||
Compress Decompress Decompress Corpus
|
||||
Flag usage usage -s usage Size
|
||||
|
||||
-1 1200k 500k 350k 914704
|
||||
-2 2000k 900k 600k 877703
|
||||
-3 2800k 1300k 850k 860338
|
||||
-4 3600k 1700k 1100k 846899
|
||||
-5 4400k 2100k 1350k 845160
|
||||
-6 5200k 2500k 1600k 838626
|
||||
-7 6100k 2900k 1850k 834096
|
||||
-8 6800k 3300k 2100k 828642
|
||||
-9 7600k 3700k 2350k 828642
|
||||
|
||||
|
||||
RREECCOOVVEERRIINNGG DDAATTAA FFRROOMM DDAAMMAAGGEEDD FFIILLEESS
|
||||
@ -314,7 +285,7 @@ RREECCOOVVEERRIINNGG DDAATTAA FFRROOMM DDAAMMAAGGEEDD F
|
||||
|
||||
_b_z_i_p_2_r_e_c_o_v_e_r is a simple program whose purpose is to
|
||||
search for blocks in .bz2 files, and write each block out
|
||||
into its own .bz2 file. You can then use _b_z_i_p_2 _-_t to test
|
||||
into its own .bz2 file. You can then use _b_z_i_p_2 -t to test
|
||||
the integrity of the resulting files, and decompress those
|
||||
which are undamaged.
|
||||
|
||||
@ -322,21 +293,9 @@ RREECCOOVVEERRIINNGG DDAATTAA FFRROOMM DDAAMMAAGGEEDD F
|
||||
aged file, and writes a number of files "rec0001file.bz2",
|
||||
"rec0002file.bz2", etc, containing the extracted blocks.
|
||||
The output filenames are designed so that the use of
|
||||
|
||||
|
||||
|
||||
5
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
bzip2(1) bzip2(1)
|
||||
|
||||
|
||||
wildcards in subsequent processing -- for example, "bzip2
|
||||
-dc rec*file.bz2 > recovered_data" -- lists the files in
|
||||
the "right" order.
|
||||
-dc rec*file.bz2 > recovered_data" -- lists the files in
|
||||
the correct order.
|
||||
|
||||
_b_z_i_p_2_r_e_c_o_v_e_r should be of most use dealing with large .bz2
|
||||
files, as these will contain many blocks. It is clearly
|
||||
@ -351,17 +310,15 @@ PPEERRFFOORRMMAANNCCEE NNOOTTEESS
|
||||
The sorting phase of compression gathers together similar
|
||||
strings in the file. Because of this, files containing
|
||||
very long runs of repeated symbols, like "aabaabaabaab
|
||||
..." (repeated several hundred times) may compress
|
||||
extraordinarily slowly. You can use the -vvvvv option to
|
||||
monitor progress in great detail, if you want. Decompres-
|
||||
sion speed is unaffected.
|
||||
..." (repeated several hundred times) may compress more
|
||||
slowly than normal. Versions 0.9.5 and above fare much
|
||||
better than previous versions in this respect. The ratio
|
||||
between worst-case and average-case compression time is in
|
||||
the region of 10:1. For previous versions, this figure
|
||||
was more like 100:1. You can use the -vvvv option to mon-
|
||||
itor progress in great detail, if you want.
|
||||
|
||||
Such pathological cases seem rare in practice, appearing
|
||||
mostly in artificially-constructed test files, and in low-
|
||||
level disk images. It may be inadvisable to use _b_z_i_p_2 to
|
||||
compress the latter. If you do get a file which causes
|
||||
severe slowness in compression, try making the block size
|
||||
as small as possible, with flag -1.
|
||||
Decompression speed is unaffected by these phenomena.
|
||||
|
||||
_b_z_i_p_2 usually allocates several megabytes of memory to
|
||||
operate in, and then charges all over it in a fairly ran-
|
||||
@ -376,88 +333,43 @@ PPEERRFFOORRMMAANNCCEE NNOOTTEESS
|
||||
|
||||
CCAAVVEEAATTSS
|
||||
I/O error messages are not as helpful as they could be.
|
||||
_B_z_i_p_2 tries hard to detect I/O errors and exit cleanly,
|
||||
_b_z_i_p_2 tries hard to detect I/O errors and exit cleanly,
|
||||
but the details of what the problem is sometimes seem
|
||||
rather misleading.
|
||||
|
||||
This manual page pertains to version 0.9.0 of _b_z_i_p_2_. Com-
|
||||
This manual page pertains to version 0.9.5 of _b_z_i_p_2_. Com-
|
||||
pressed data created by this version is entirely forwards
|
||||
and backwards compatible with the previous public release,
|
||||
version 0.1pl2, but with the following exception: 0.9.0
|
||||
can correctly decompress multiple concatenated compressed
|
||||
files. 0.1pl2 cannot do this; it will stop after decom-
|
||||
pressing just the first file in the stream.
|
||||
and backwards compatible with the previous public
|
||||
releases, versions 0.1pl2 and 0.9.0, but with the follow-
|
||||
ing exception: 0.9.0 and above can correctly decompress
|
||||
multiple concatenated compressed files. 0.1pl2 cannot do
|
||||
this; it will stop after decompressing just the first file
|
||||
in the stream.
|
||||
|
||||
|
||||
|
||||
|
||||
6
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
bzip2(1) bzip2(1)
|
||||
|
||||
|
||||
Wildcard expansion for Windows 95 and NT is flaky.
|
||||
|
||||
_b_z_i_p_2_r_e_c_o_v_e_r uses 32-bit integers to represent bit posi-
|
||||
tions in compressed files, so it cannot handle compressed
|
||||
files more than 512 megabytes long. This could easily be
|
||||
_b_z_i_p_2_r_e_c_o_v_e_r uses 32-bit integers to represent bit posi-
|
||||
tions in compressed files, so it cannot handle compressed
|
||||
files more than 512 megabytes long. This could easily be
|
||||
fixed.
|
||||
|
||||
|
||||
AAUUTTHHOORR
|
||||
Julian Seward, jseward@acm.org.
|
||||
|
||||
http://www.muraroa.demon.co.uk
|
||||
|
||||
The ideas embodied in _b_z_i_p_2 are due to (at least) the fol-
|
||||
lowing people: Michael Burrows and David Wheeler (for the
|
||||
block sorting transformation), David Wheeler (again, for
|
||||
lowing people: Michael Burrows and David Wheeler (for the
|
||||
block sorting transformation), David Wheeler (again, for
|
||||
the Huffman coder), Peter Fenwick (for the structured cod-
|
||||
ing model in the original _b_z_i_p_, and many refinements), and
|
||||
Alistair Moffat, Radford Neal and Ian Witten (for the
|
||||
Alistair Moffat, Radford Neal and Ian Witten (for the
|
||||
arithmetic coder in the original _b_z_i_p_)_. I am much
|
||||
indebted for their help, support and advice. See the man-
|
||||
ual in the source distribution for pointers to sources of
|
||||
ual in the source distribution for pointers to sources of
|
||||
documentation. Christian von Roques encouraged me to look
|
||||
for faster sorting algorithms, so as to speed up compres-
|
||||
for faster sorting algorithms, so as to speed up compres-
|
||||
sion. Bela Lubkin encouraged me to improve the worst-case
|
||||
compression performance. Many people sent patches, helped
|
||||
with portability problems, lent machines, gave advice and
|
||||
with portability problems, lent machines, gave advice and
|
||||
were generally helpful.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
7
|
||||
|
||||
|
||||
|
543
bzip2.c
543
bzip2.c
@ -7,7 +7,7 @@
|
||||
This file is a part of bzip2 and/or libbzip2, a program and
|
||||
library for lossless, block-sorting data compression.
|
||||
|
||||
Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
|
||||
Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -40,9 +40,9 @@
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Julian Seward, Guildford, Surrey, UK.
|
||||
Julian Seward, Cambridge, UK.
|
||||
jseward@acm.org
|
||||
bzip2/libbzip2 version 0.9.0c of 18 October 1998
|
||||
bzip2/libbzip2 version 0.9.5 of 24 May 1999
|
||||
|
||||
This program is based on (at least) the work of:
|
||||
Mike Burrows
|
||||
@ -123,8 +123,10 @@
|
||||
--*/
|
||||
#define BZ_LCCWIN32 0
|
||||
|
||||
#ifdef _WIN32
|
||||
#if defined(_WIN32) && !defined(__CYGWIN32__)
|
||||
#undef BZ_LCCWIN32
|
||||
#define BZ_LCCWIN32 1
|
||||
#undef BZ_UNIX
|
||||
#define BZ_UNIX 0
|
||||
#endif
|
||||
|
||||
@ -139,6 +141,8 @@
|
||||
#include <string.h>
|
||||
#include <signal.h>
|
||||
#include <math.h>
|
||||
#include <errno.h>
|
||||
#include <ctype.h>
|
||||
#include "bzlib.h"
|
||||
|
||||
#define ERROR_IF_EOF(i) { if ((i) == EOF) ioError(); }
|
||||
@ -166,6 +170,9 @@
|
||||
# define APPEND_FILESPEC(root, name) \
|
||||
root=snocString((root), (name))
|
||||
|
||||
# define APPEND_FLAG(root, name) \
|
||||
root=snocString((root), (name))
|
||||
|
||||
# define SET_BINARY_MODE(fd) /**/
|
||||
|
||||
# ifdef __GNUC__
|
||||
@ -173,6 +180,19 @@
|
||||
# else
|
||||
# define NORETURN /**/
|
||||
# endif
|
||||
# ifdef __DJGPP__
|
||||
# include <io.h>
|
||||
# include <fcntl.h>
|
||||
# undef MY_LSTAT
|
||||
# define MY_LSTAT stat
|
||||
# undef SET_BINARY_MODE
|
||||
# define SET_BINARY_MODE(fd) \
|
||||
do { \
|
||||
int retVal = setmode ( fileno ( fd ), \
|
||||
O_BINARY ); \
|
||||
ERROR_IF_MINUS_ONE ( retVal ); \
|
||||
} while ( 0 )
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
@ -188,6 +208,9 @@
|
||||
# define MY_STAT _stat
|
||||
# define MY_S_IFREG(x) ((x) & _S_IFREG)
|
||||
|
||||
# define APPEND_FLAG(root, name) \
|
||||
root=snocString((root), (name))
|
||||
|
||||
# if 0
|
||||
/*-- lcc-win32 seems to expand wildcards itself --*/
|
||||
# define APPEND_FILESPEC(root, spec) \
|
||||
@ -254,7 +277,7 @@ typedef int IntNative;
|
||||
|
||||
Int32 verbosity;
|
||||
Bool keepInputFiles, smallMode;
|
||||
Bool forceOverwrite, testFailsExist;
|
||||
Bool forceOverwrite, testFailsExist, noisy;
|
||||
Int32 numFileNames, numFilesProcessed, blockSize100k;
|
||||
|
||||
|
||||
@ -274,8 +297,9 @@ Int32 srcMode;
|
||||
#define FILE_NAME_LEN 1034
|
||||
|
||||
Int32 longestFileName;
|
||||
Char inName[FILE_NAME_LEN];
|
||||
Char inName [FILE_NAME_LEN];
|
||||
Char outName[FILE_NAME_LEN];
|
||||
Char tmpName[FILE_NAME_LEN];
|
||||
Char *progName;
|
||||
Char progNameReally[FILE_NAME_LEN];
|
||||
FILE *outputHandleJustInCase;
|
||||
@ -467,6 +491,7 @@ Bool uncompressStream ( FILE *zStream, FILE *stream )
|
||||
if (streamNo == 1) {
|
||||
return False;
|
||||
} else {
|
||||
if (noisy)
|
||||
fprintf ( stderr,
|
||||
"\n%s: %s: trailing garbage after EOF ignored\n",
|
||||
progName, inName );
|
||||
@ -532,32 +557,31 @@ Bool testStream ( FILE *zStream )
|
||||
|
||||
errhandler:
|
||||
bzReadClose ( &bzerr_dummy, bzf );
|
||||
if (verbosity == 0)
|
||||
fprintf ( stderr, "%s: %s: ", progName, inName );
|
||||
switch (bzerr) {
|
||||
case BZ_IO_ERROR:
|
||||
errhandler_io:
|
||||
ioError(); break;
|
||||
case BZ_DATA_ERROR:
|
||||
fprintf ( stderr,
|
||||
"\n%s: data integrity (CRC) error in data\n",
|
||||
inName );
|
||||
"data integrity (CRC) error in data\n" );
|
||||
return False;
|
||||
case BZ_MEM_ERROR:
|
||||
outOfMemory();
|
||||
case BZ_UNEXPECTED_EOF:
|
||||
fprintf ( stderr,
|
||||
"\n%s: file ends unexpectedly\n",
|
||||
inName );
|
||||
"file ends unexpectedly\n" );
|
||||
return False;
|
||||
case BZ_DATA_ERROR_MAGIC:
|
||||
if (streamNo == 1) {
|
||||
fprintf ( stderr,
|
||||
"\n%s: bad magic number (ie, not created by bzip2)\n",
|
||||
inName );
|
||||
"bad magic number (file not created by bzip2)\n" );
|
||||
return False;
|
||||
} else {
|
||||
if (noisy)
|
||||
fprintf ( stderr,
|
||||
"\n%s: %s: trailing garbage after EOF ignored\n",
|
||||
progName, inName );
|
||||
"trailing garbage after EOF ignored\n" );
|
||||
return True;
|
||||
}
|
||||
default:
|
||||
@ -576,6 +600,7 @@ Bool testStream ( FILE *zStream )
|
||||
/*---------------------------------------------*/
|
||||
void cadvise ( void )
|
||||
{
|
||||
if (noisy)
|
||||
fprintf (
|
||||
stderr,
|
||||
"\nIt is possible that the compressed file(s) have become corrupted.\n"
|
||||
@ -589,6 +614,7 @@ void cadvise ( void )
|
||||
/*---------------------------------------------*/
|
||||
void showFileNames ( void )
|
||||
{
|
||||
if (noisy)
|
||||
fprintf (
|
||||
stderr,
|
||||
"\tInput file = %s, output file = %s\n",
|
||||
@ -603,6 +629,7 @@ void cleanUpAndFail ( Int32 ec )
|
||||
IntNative retVal;
|
||||
|
||||
if ( srcMode == SM_F2F && opMode != OM_TEST ) {
|
||||
if (noisy)
|
||||
fprintf ( stderr, "%s: Deleting output file %s, if it exists.\n",
|
||||
progName, outName );
|
||||
if (outputHandleJustInCase != NULL)
|
||||
@ -613,7 +640,7 @@ void cleanUpAndFail ( Int32 ec )
|
||||
"%s: WARNING: deletion of output file (apparently) failed.\n",
|
||||
progName );
|
||||
}
|
||||
if (numFileNames > 0 && numFilesProcessed < numFileNames) {
|
||||
if (noisy && numFileNames > 0 && numFilesProcessed < numFileNames) {
|
||||
fprintf ( stderr,
|
||||
"%s: WARNING: some files have not been processed:\n"
|
||||
"\t%d specified on command line, %d not processed yet.\n\n",
|
||||
@ -639,7 +666,7 @@ void panic ( Char* s )
|
||||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
void crcError ()
|
||||
void crcError ( void )
|
||||
{
|
||||
fprintf ( stderr,
|
||||
"\n%s: Data integrity error when decompressing.\n",
|
||||
@ -665,7 +692,7 @@ void compressedStreamEOF ( void )
|
||||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
void ioError ( )
|
||||
void ioError ( void )
|
||||
{
|
||||
fprintf ( stderr,
|
||||
"\n%s: I/O or other error, bailing out. Possible reason follows.\n",
|
||||
@ -680,7 +707,7 @@ void ioError ( )
|
||||
void mySignalCatcher ( IntNative n )
|
||||
{
|
||||
fprintf ( stderr,
|
||||
"\n%s: Control-C (or similar) caught, quitting.\n",
|
||||
"\n%s: Control-C or similar caught, quitting.\n",
|
||||
progName );
|
||||
cleanUpAndFail(1);
|
||||
}
|
||||
@ -740,9 +767,10 @@ void copyFileName ( Char* to, Char* from )
|
||||
if ( strlen(from) > FILE_NAME_LEN-10 ) {
|
||||
fprintf (
|
||||
stderr,
|
||||
"bzip2: file name\n`%s'\nis suspiciously (> 1024 chars) long.\n"
|
||||
"Try using a reasonable file name instead. Sorry! :)\n",
|
||||
from
|
||||
"bzip2: file name\n`%s'\n"
|
||||
"is suspiciously (more than %d chars) long.\n"
|
||||
"Try using a reasonable file name instead. Sorry! :-)\n",
|
||||
from, FILE_NAME_LEN-10
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
@ -778,6 +806,21 @@ Bool notAStandardFile ( Char* name )
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
/*--
|
||||
rac 11/21/98 see if file has hard links to it
|
||||
--*/
|
||||
Int32 countHardLinks ( Char* name )
|
||||
{
|
||||
IntNative i;
|
||||
struct MY_STAT statBuf;
|
||||
|
||||
i = MY_LSTAT ( name, &statBuf );
|
||||
if (i != 0) return 0;
|
||||
return (statBuf.st_nlink - 1);
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
void copyDatePermissionsAndOwner ( Char *srcName, Char *dstName )
|
||||
{
|
||||
@ -793,17 +836,14 @@ void copyDatePermissionsAndOwner ( Char *srcName, Char *dstName )
|
||||
|
||||
retVal = chmod ( dstName, statBuf.st_mode );
|
||||
ERROR_IF_NOT_ZERO ( retVal );
|
||||
/* Not sure if this is really portable or not. Causes
|
||||
problems on my x86-Linux Redhat 5.0 box. Decided
|
||||
to omit it from 0.9.0. JRS, 27 June 98. If you
|
||||
understand Unix file semantics and portability issues
|
||||
well enough to fix this properly, drop me a line
|
||||
at jseward@acm.org.
|
||||
retVal = chown ( dstName, statBuf.st_uid, statBuf.st_gid );
|
||||
ERROR_IF_NOT_ZERO ( retVal );
|
||||
*/
|
||||
|
||||
retVal = utime ( dstName, &uTimBuf );
|
||||
ERROR_IF_NOT_ZERO ( retVal );
|
||||
|
||||
retVal = chown ( dstName, statBuf.st_uid, statBuf.st_gid );
|
||||
/* chown() will in many cases return with EPERM, which can
|
||||
be safely ignored.
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -819,20 +859,6 @@ void setInterimPermissions ( Char *dstName )
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
Bool endsInBz2 ( Char* name )
|
||||
{
|
||||
Int32 n = strlen ( name );
|
||||
if (n <= 4) return False;
|
||||
return
|
||||
(name[n-4] == '.' &&
|
||||
name[n-3] == 'b' &&
|
||||
name[n-2] == 'z' &&
|
||||
name[n-1] == '2');
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
Bool containsDubiousChars ( Char* name )
|
||||
{
|
||||
@ -843,50 +869,95 @@ Bool containsDubiousChars ( Char* name )
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
#define BZ_N_SUFFIX_PAIRS 4
|
||||
|
||||
Char* zSuffix[BZ_N_SUFFIX_PAIRS]
|
||||
= { ".bz2", ".bz", ".tbz2", ".tbz" };
|
||||
Char* unzSuffix[BZ_N_SUFFIX_PAIRS]
|
||||
= { "", "", ".tar", ".tar" };
|
||||
|
||||
Bool hasSuffix ( Char* s, Char* suffix )
|
||||
{
|
||||
Int32 ns = strlen(s);
|
||||
Int32 nx = strlen(suffix);
|
||||
if (ns < nx) return False;
|
||||
if (strcmp(s + ns - nx, suffix) == 0) return True;
|
||||
return False;
|
||||
}
|
||||
|
||||
Bool mapSuffix ( Char* name,
|
||||
Char* oldSuffix, Char* newSuffix )
|
||||
{
|
||||
if (!hasSuffix(name,oldSuffix)) return False;
|
||||
name[strlen(name)-strlen(oldSuffix)] = 0;
|
||||
strcat ( name, newSuffix );
|
||||
return True;
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
void compress ( Char *name )
|
||||
{
|
||||
FILE *inStr;
|
||||
FILE *outStr;
|
||||
|
||||
FILE *inStr;
|
||||
FILE *outStr;
|
||||
Int32 n, i;
|
||||
if (name == NULL && srcMode != SM_I2O)
|
||||
panic ( "compress: bad modes\n" );
|
||||
|
||||
switch (srcMode) {
|
||||
case SM_I2O: copyFileName ( inName, "(stdin)" );
|
||||
copyFileName ( outName, "(stdout)" ); break;
|
||||
case SM_F2F: copyFileName ( inName, name );
|
||||
copyFileName ( outName, name );
|
||||
strcat ( outName, ".bz2" ); break;
|
||||
case SM_F2O: copyFileName ( inName, name );
|
||||
copyFileName ( outName, "(stdout)" ); break;
|
||||
case SM_I2O:
|
||||
copyFileName ( inName, "(stdin)" );
|
||||
copyFileName ( outName, "(stdout)" );
|
||||
break;
|
||||
case SM_F2F:
|
||||
copyFileName ( inName, name );
|
||||
copyFileName ( outName, name );
|
||||
strcat ( outName, ".bz2" );
|
||||
break;
|
||||
case SM_F2O:
|
||||
copyFileName ( inName, name );
|
||||
copyFileName ( outName, "(stdout)" );
|
||||
break;
|
||||
}
|
||||
|
||||
if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
|
||||
if (noisy)
|
||||
fprintf ( stderr, "%s: There are no files matching `%s'.\n",
|
||||
progName, inName );
|
||||
return;
|
||||
}
|
||||
if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
|
||||
fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
|
||||
progName, inName );
|
||||
fprintf ( stderr, "%s: Can't open input file %s: %s.\n",
|
||||
progName, inName, strerror(errno) );
|
||||
return;
|
||||
}
|
||||
if ( srcMode != SM_I2O && endsInBz2 ( inName )) {
|
||||
fprintf ( stderr, "%s: Input file name %s ends in `.bz2', skipping.\n",
|
||||
progName, inName );
|
||||
return;
|
||||
for (i = 0; i < BZ_N_SUFFIX_PAIRS; i++) {
|
||||
if (hasSuffix(inName, zSuffix[i])) {
|
||||
if (noisy)
|
||||
fprintf ( stderr,
|
||||
"%s: Input file %s already has %s suffix.\n",
|
||||
progName, inName, zSuffix[i] );
|
||||
return;
|
||||
}
|
||||
}
|
||||
if ( srcMode != SM_I2O && notAStandardFile ( inName )) {
|
||||
fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
|
||||
if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) {
|
||||
if (noisy)
|
||||
fprintf ( stderr, "%s: Input file %s is not a normal file.\n",
|
||||
progName, inName );
|
||||
return;
|
||||
}
|
||||
if ( srcMode == SM_F2F && !forceOverwrite && fileExists ( outName ) ) {
|
||||
fprintf ( stderr, "%s: Output file %s already exists, skipping.\n",
|
||||
fprintf ( stderr, "%s: Output file %s already exists.\n",
|
||||
progName, outName );
|
||||
return;
|
||||
}
|
||||
if ( srcMode == SM_F2F && !forceOverwrite &&
|
||||
(n=countHardLinks ( inName )) > 0) {
|
||||
fprintf ( stderr, "%s: Input file %s has %d other link%s.\n",
|
||||
progName, inName, n, n > 1 ? "s" : "" );
|
||||
return;
|
||||
}
|
||||
|
||||
switch ( srcMode ) {
|
||||
|
||||
@ -912,11 +983,12 @@ void compress ( Char *name )
|
||||
progName );
|
||||
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
|
||||
progName, progName );
|
||||
if ( inStr != NULL ) fclose ( inStr );
|
||||
return;
|
||||
};
|
||||
if ( inStr == NULL ) {
|
||||
fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
|
||||
progName, inName );
|
||||
fprintf ( stderr, "%s: Can't open input file %s: %s.\n",
|
||||
progName, inName, strerror(errno) );
|
||||
return;
|
||||
};
|
||||
break;
|
||||
@ -925,13 +997,15 @@ void compress ( Char *name )
|
||||
inStr = fopen ( inName, "rb" );
|
||||
outStr = fopen ( outName, "wb" );
|
||||
if ( outStr == NULL) {
|
||||
fprintf ( stderr, "%s: Can't create output file %s, skipping.\n",
|
||||
progName, outName );
|
||||
fprintf ( stderr, "%s: Can't create output file %s: %s.\n",
|
||||
progName, outName, strerror(errno) );
|
||||
if ( inStr != NULL ) fclose ( inStr );
|
||||
return;
|
||||
}
|
||||
if ( inStr == NULL ) {
|
||||
fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
|
||||
progName, inName );
|
||||
fprintf ( stderr, "%s: Can't open input file %s: %s.\n",
|
||||
progName, inName, strerror(errno) );
|
||||
if ( outStr != NULL ) fclose ( outStr );
|
||||
return;
|
||||
};
|
||||
setInterimPermissions ( outName );
|
||||
@ -967,51 +1041,72 @@ void compress ( Char *name )
|
||||
/*---------------------------------------------*/
|
||||
void uncompress ( Char *name )
|
||||
{
|
||||
FILE *inStr;
|
||||
FILE *outStr;
|
||||
Bool magicNumberOK;
|
||||
FILE *inStr;
|
||||
FILE *outStr;
|
||||
Int32 n, i;
|
||||
Bool magicNumberOK;
|
||||
Bool cantGuess;
|
||||
|
||||
if (name == NULL && srcMode != SM_I2O)
|
||||
panic ( "uncompress: bad modes\n" );
|
||||
|
||||
cantGuess = False;
|
||||
switch (srcMode) {
|
||||
case SM_I2O: copyFileName ( inName, "(stdin)" );
|
||||
copyFileName ( outName, "(stdout)" ); break;
|
||||
case SM_F2F: copyFileName ( inName, name );
|
||||
copyFileName ( outName, name );
|
||||
if (endsInBz2 ( outName ))
|
||||
outName [ strlen ( outName ) - 4 ] = '\0';
|
||||
break;
|
||||
case SM_F2O: copyFileName ( inName, name );
|
||||
copyFileName ( outName, "(stdout)" ); break;
|
||||
case SM_I2O:
|
||||
copyFileName ( inName, "(stdin)" );
|
||||
copyFileName ( outName, "(stdout)" );
|
||||
break;
|
||||
case SM_F2F:
|
||||
copyFileName ( inName, name );
|
||||
copyFileName ( outName, name );
|
||||
for (i = 0; i < BZ_N_SUFFIX_PAIRS; i++)
|
||||
if (mapSuffix(outName,zSuffix[i],unzSuffix[i]))
|
||||
goto zzz;
|
||||
cantGuess = True;
|
||||
strcat ( outName, ".out" );
|
||||
break;
|
||||
case SM_F2O:
|
||||
copyFileName ( inName, name );
|
||||
copyFileName ( outName, "(stdout)" );
|
||||
break;
|
||||
}
|
||||
|
||||
zzz:
|
||||
if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
|
||||
if (noisy)
|
||||
fprintf ( stderr, "%s: There are no files matching `%s'.\n",
|
||||
progName, inName );
|
||||
return;
|
||||
}
|
||||
if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
|
||||
fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
|
||||
fprintf ( stderr, "%s: Can't open input file %s: %s.\n",
|
||||
progName, inName, strerror(errno) );
|
||||
return;
|
||||
}
|
||||
if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) {
|
||||
if (noisy)
|
||||
fprintf ( stderr, "%s: Input file %s is not a normal file.\n",
|
||||
progName, inName );
|
||||
return;
|
||||
}
|
||||
if ( srcMode != SM_I2O && !endsInBz2 ( inName )) {
|
||||
if ( /* srcMode == SM_F2F implied && */ cantGuess ) {
|
||||
if (noisy)
|
||||
fprintf ( stderr,
|
||||
"%s: Input file name %s doesn't end in `.bz2', skipping.\n",
|
||||
progName, inName );
|
||||
return;
|
||||
}
|
||||
if ( srcMode != SM_I2O && notAStandardFile ( inName )) {
|
||||
fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
|
||||
progName, inName );
|
||||
return;
|
||||
"%s: Can't guess original name for %s -- using %s\n",
|
||||
progName, inName, outName );
|
||||
/* just a warning, no return */
|
||||
}
|
||||
if ( srcMode == SM_F2F && !forceOverwrite && fileExists ( outName ) ) {
|
||||
fprintf ( stderr, "%s: Output file %s already exists, skipping.\n",
|
||||
fprintf ( stderr, "%s: Output file %s already exists.\n",
|
||||
progName, outName );
|
||||
return;
|
||||
}
|
||||
if ( srcMode == SM_F2F && !forceOverwrite &&
|
||||
(n=countHardLinks ( inName ) ) > 0) {
|
||||
fprintf ( stderr, "%s: Input file %s has %d other link%s.\n",
|
||||
progName, inName, n, n > 1 ? "s" : "" );
|
||||
return;
|
||||
}
|
||||
|
||||
switch ( srcMode ) {
|
||||
|
||||
@ -1032,8 +1127,9 @@ void uncompress ( Char *name )
|
||||
inStr = fopen ( inName, "rb" );
|
||||
outStr = stdout;
|
||||
if ( inStr == NULL ) {
|
||||
fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
|
||||
progName, inName );
|
||||
fprintf ( stderr, "%s: Can't open input file %s:%s.\n",
|
||||
progName, inName, strerror(errno) );
|
||||
if ( inStr != NULL ) fclose ( inStr );
|
||||
return;
|
||||
};
|
||||
break;
|
||||
@ -1042,13 +1138,15 @@ void uncompress ( Char *name )
|
||||
inStr = fopen ( inName, "rb" );
|
||||
outStr = fopen ( outName, "wb" );
|
||||
if ( outStr == NULL) {
|
||||
fprintf ( stderr, "%s: Can't create output file %s, skipping.\n",
|
||||
progName, outName );
|
||||
fprintf ( stderr, "%s: Can't create output file %s: %s.\n",
|
||||
progName, outName, strerror(errno) );
|
||||
if ( inStr != NULL ) fclose ( inStr );
|
||||
return;
|
||||
}
|
||||
if ( inStr == NULL ) {
|
||||
fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
|
||||
progName, inName );
|
||||
fprintf ( stderr, "%s: Can't open input file %s: %s.\n",
|
||||
progName, inName, strerror(errno) );
|
||||
if ( outStr != NULL ) fclose ( outStr );
|
||||
return;
|
||||
};
|
||||
setInterimPermissions ( outName );
|
||||
@ -1091,9 +1189,9 @@ void uncompress ( Char *name )
|
||||
fprintf ( stderr, "done\n" );
|
||||
} else {
|
||||
if (verbosity >= 1)
|
||||
fprintf ( stderr, "not a bzip2 file, skipping.\n" ); else
|
||||
fprintf ( stderr, "not a bzip2 file.\n" ); else
|
||||
fprintf ( stderr,
|
||||
"%s: %s is not a bzip2 file, skipping.\n",
|
||||
"%s: %s is not a bzip2 file.\n",
|
||||
progName, inName );
|
||||
}
|
||||
|
||||
@ -1117,24 +1215,14 @@ void testf ( Char *name )
|
||||
}
|
||||
|
||||
if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
|
||||
if (noisy)
|
||||
fprintf ( stderr, "%s: There are no files matching `%s'.\n",
|
||||
progName, inName );
|
||||
return;
|
||||
}
|
||||
if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
|
||||
fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
|
||||
progName, inName );
|
||||
return;
|
||||
}
|
||||
if ( srcMode != SM_I2O && !endsInBz2 ( inName )) {
|
||||
fprintf ( stderr,
|
||||
"%s: Input file name %s doesn't end in `.bz2', skipping.\n",
|
||||
progName, inName );
|
||||
return;
|
||||
}
|
||||
if ( srcMode != SM_I2O && notAStandardFile ( inName )) {
|
||||
fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
|
||||
progName, inName );
|
||||
fprintf ( stderr, "%s: Can't open input %s: %s.\n",
|
||||
progName, inName, strerror(errno) );
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1155,8 +1243,8 @@ void testf ( Char *name )
|
||||
case SM_F2O: case SM_F2F:
|
||||
inStr = fopen ( inName, "rb" );
|
||||
if ( inStr == NULL ) {
|
||||
fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
|
||||
progName, inName );
|
||||
fprintf ( stderr, "%s: Can't open input file %s:%s.\n",
|
||||
progName, inName, strerror(errno) );
|
||||
return;
|
||||
};
|
||||
break;
|
||||
@ -1186,13 +1274,13 @@ void license ( void )
|
||||
fprintf ( stderr,
|
||||
|
||||
"bzip2, a block-sorting file compressor. "
|
||||
"Version 0.9.0c, 18-Oct-98.\n"
|
||||
"Version 0.9.5d, 4-Sept-99.\n"
|
||||
" \n"
|
||||
" Copyright (C) 1996, 1997, 1998 by Julian Seward.\n"
|
||||
" Copyright (C) 1996, 1997, 1998, 1999 by Julian Seward.\n"
|
||||
" \n"
|
||||
" This program is free software; you can redistribute it and/or modify\n"
|
||||
" it under the terms set out in the LICENSE file, which is included\n"
|
||||
" in the bzip2-0.9.0c source distribution.\n"
|
||||
" in the bzip2-0.9.5 source distribution.\n"
|
||||
" \n"
|
||||
" This program is distributed in the hope that it will be useful,\n"
|
||||
" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
|
||||
@ -1209,27 +1297,26 @@ void usage ( Char *fullProgName )
|
||||
fprintf (
|
||||
stderr,
|
||||
"bzip2, a block-sorting file compressor. "
|
||||
"Version 0.9.0c, 18-Oct-98.\n"
|
||||
"Version 0.9.5d, 4-Sept-99.\n"
|
||||
"\n usage: %s [flags and input files in any order]\n"
|
||||
"\n"
|
||||
" -h --help print this message\n"
|
||||
" -d --decompress force decompression\n"
|
||||
" -z --compress force compression\n"
|
||||
" -k --keep keep (don't delete) input files\n"
|
||||
" -f --force overwrite existing output filess\n"
|
||||
" -f --force overwrite existing output files\n"
|
||||
" -t --test test compressed file integrity\n"
|
||||
" -c --stdout output to standard out\n"
|
||||
" -q --quiet suppress noncritical error messages\n"
|
||||
" -v --verbose be verbose (a 2nd -v gives more)\n"
|
||||
" -L --license display software version & license\n"
|
||||
" -V --version display software version & license\n"
|
||||
" -s --small use less memory (at most 2500k)\n"
|
||||
" -1 .. -9 set block size to 100k .. 900k\n"
|
||||
" --repetitive-fast compress repetitive blocks faster\n"
|
||||
" --repetitive-best compress repetitive blocks better\n"
|
||||
"\n"
|
||||
" If invoked as `bzip2', default action is to compress.\n"
|
||||
" as `bunzip2', default action is to decompress.\n"
|
||||
" as `bz2cat', default action is to decompress to stdout.\n"
|
||||
" as `bzcat', default action is to decompress to stdout.\n"
|
||||
"\n"
|
||||
" If no file names are given, bzip2 compresses or decompresses\n"
|
||||
" from standard input to standard output. You can combine\n"
|
||||
@ -1244,19 +1331,29 @@ void usage ( Char *fullProgName )
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
void redundant ( Char* flag )
|
||||
{
|
||||
fprintf (
|
||||
stderr,
|
||||
"%s: %s is redundant in versions 0.9.5 and above\n",
|
||||
progName, flag );
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
/*--
|
||||
All the garbage from here to main() is purely to
|
||||
implement a linked list of command-line arguments,
|
||||
into which main() copies argv[1 .. argc-1].
|
||||
|
||||
The purpose of this ridiculous exercise is to
|
||||
facilitate the expansion of wildcard characters
|
||||
* and ? in filenames for halfwitted OSs like
|
||||
MSDOS, Windows 95 and NT.
|
||||
The purpose of this exercise is to facilitate
|
||||
the expansion of wildcard characters * and ? in
|
||||
filenames for OSs which don't know how to do it
|
||||
themselves, like MSDOS, Windows 95 and NT.
|
||||
|
||||
The actual Dirty Work is done by the platform-specific
|
||||
macro APPEND_FILESPEC.
|
||||
The actual Dirty Work is done by the platform-
|
||||
specific macro APPEND_FILESPEC.
|
||||
--*/
|
||||
|
||||
typedef
|
||||
@ -1308,8 +1405,34 @@ Cell *snocString ( Cell *root, Char *name )
|
||||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
#define ISFLAG(s) (strcmp(aa->name, (s))==0)
|
||||
void addFlagsFromEnvVar ( Cell** argList, Char* varName )
|
||||
{
|
||||
Int32 i, j, k;
|
||||
Char *envbase, *p;
|
||||
|
||||
envbase = getenv(varName);
|
||||
if (envbase != NULL) {
|
||||
p = envbase;
|
||||
i = 0;
|
||||
while (True) {
|
||||
if (p[i] == 0) break;
|
||||
p += i;
|
||||
i = 0;
|
||||
while (isspace((Int32)(p[0]))) p++;
|
||||
while (p[i] != 0 && !isspace((Int32)(p[i]))) i++;
|
||||
if (i > 0) {
|
||||
k = i; if (k > FILE_NAME_LEN-10) k = FILE_NAME_LEN-10;
|
||||
for (j = 0; j < k; j++) tmpName[j] = p[j];
|
||||
tmpName[k] = 0;
|
||||
APPEND_FLAG(*argList, tmpName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
#define ISFLAG(s) (strcmp(aa->name, (s))==0)
|
||||
|
||||
IntNative main ( IntNative argc, Char *argv[] )
|
||||
{
|
||||
@ -1317,6 +1440,7 @@ IntNative main ( IntNative argc, Char *argv[] )
|
||||
Char *tmp;
|
||||
Cell *argList;
|
||||
Cell *aa;
|
||||
Bool decode;
|
||||
|
||||
/*-- Be really really really paranoid :-) --*/
|
||||
if (sizeof(Int32) != 4 || sizeof(UInt32) != 4 ||
|
||||
@ -1332,27 +1456,27 @@ IntNative main ( IntNative argc, Char *argv[] )
|
||||
}
|
||||
|
||||
|
||||
/*-- Set up signal handlers --*/
|
||||
signal (SIGINT, mySignalCatcher);
|
||||
signal (SIGTERM, mySignalCatcher);
|
||||
signal (SIGSEGV, mySIGSEGVorSIGBUScatcher);
|
||||
#if BZ_UNIX
|
||||
signal (SIGHUP, mySignalCatcher);
|
||||
signal (SIGBUS, mySIGSEGVorSIGBUScatcher);
|
||||
#endif
|
||||
|
||||
|
||||
/*-- Initialise --*/
|
||||
outputHandleJustInCase = NULL;
|
||||
smallMode = False;
|
||||
keepInputFiles = False;
|
||||
forceOverwrite = False;
|
||||
noisy = True;
|
||||
verbosity = 0;
|
||||
blockSize100k = 9;
|
||||
testFailsExist = False;
|
||||
numFileNames = 0;
|
||||
numFilesProcessed = 0;
|
||||
workFactor = 30;
|
||||
i = j = 0; /* avoid bogus warning from egcs-1.1.X */
|
||||
|
||||
/*-- Set up signal handlers for mem access errors --*/
|
||||
signal (SIGSEGV, mySIGSEGVorSIGBUScatcher);
|
||||
#if BZ_UNIX
|
||||
#ifndef __DJGPP__
|
||||
signal (SIGBUS, mySIGSEGVorSIGBUScatcher);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
copyFileName ( inName, "(none)" );
|
||||
copyFileName ( outName, "(none)" );
|
||||
@ -1363,8 +1487,12 @@ IntNative main ( IntNative argc, Char *argv[] )
|
||||
if (*tmp == PATH_SEP) progName = tmp + 1;
|
||||
|
||||
|
||||
/*-- Expand filename wildcards in arg list --*/
|
||||
/*-- Copy flags from env var BZIP2, and
|
||||
expand filename wildcards in arg list.
|
||||
--*/
|
||||
argList = NULL;
|
||||
addFlagsFromEnvVar ( &argList, "BZIP2" );
|
||||
addFlagsFromEnvVar ( &argList, "BZIP" );
|
||||
for (i = 1; i <= argc-1; i++)
|
||||
APPEND_FILESPEC(argList, argv[i]);
|
||||
|
||||
@ -1372,12 +1500,14 @@ IntNative main ( IntNative argc, Char *argv[] )
|
||||
/*-- Find the length of the longest filename --*/
|
||||
longestFileName = 7;
|
||||
numFileNames = 0;
|
||||
for (aa = argList; aa != NULL; aa = aa->link)
|
||||
if (aa->name[0] != '-') {
|
||||
numFileNames++;
|
||||
if (longestFileName < (Int32)strlen(aa->name) )
|
||||
longestFileName = (Int32)strlen(aa->name);
|
||||
}
|
||||
decode = True;
|
||||
for (aa = argList; aa != NULL; aa = aa->link) {
|
||||
if (ISFLAG("--")) { decode = False; continue; }
|
||||
if (aa->name[0] == '-' && decode) continue;
|
||||
numFileNames++;
|
||||
if (longestFileName < (Int32)strlen(aa->name) )
|
||||
longestFileName = (Int32)strlen(aa->name);
|
||||
}
|
||||
|
||||
|
||||
/*-- Determine source modes; flag handling may change this too. --*/
|
||||
@ -1403,9 +1533,10 @@ IntNative main ( IntNative argc, Char *argv[] )
|
||||
|
||||
|
||||
/*-- Look at the flags. --*/
|
||||
for (aa = argList; aa != NULL; aa = aa->link)
|
||||
if (aa->name[0] == '-' && aa->name[1] != '-')
|
||||
for (j = 1; aa->name[j] != '\0'; j++)
|
||||
for (aa = argList; aa != NULL; aa = aa->link) {
|
||||
if (ISFLAG("--")) break;
|
||||
if (aa->name[0] == '-' && aa->name[1] != '-') {
|
||||
for (j = 1; aa->name[j] != '\0'; j++) {
|
||||
switch (aa->name[j]) {
|
||||
case 'c': srcMode = SM_F2O; break;
|
||||
case 'd': opMode = OM_UNZ; break;
|
||||
@ -1414,6 +1545,7 @@ IntNative main ( IntNative argc, Char *argv[] )
|
||||
case 't': opMode = OM_TEST; break;
|
||||
case 'k': keepInputFiles = True; break;
|
||||
case 's': smallMode = True; break;
|
||||
case 'q': noisy = False; break;
|
||||
case '1': blockSize100k = 1; break;
|
||||
case '2': blockSize100k = 2; break;
|
||||
case '3': blockSize100k = 3; break;
|
||||
@ -1427,17 +1559,21 @@ IntNative main ( IntNative argc, Char *argv[] )
|
||||
case 'L': license(); break;
|
||||
case 'v': verbosity++; break;
|
||||
case 'h': usage ( progName );
|
||||
exit ( 1 );
|
||||
exit ( 0 );
|
||||
break;
|
||||
default: fprintf ( stderr, "%s: Bad flag `%s'\n",
|
||||
progName, aa->name );
|
||||
usage ( progName );
|
||||
exit ( 1 );
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*-- And again ... --*/
|
||||
for (aa = argList; aa != NULL; aa = aa->link) {
|
||||
if (ISFLAG("--")) break;
|
||||
if (ISFLAG("--stdout")) srcMode = SM_F2O; else
|
||||
if (ISFLAG("--decompress")) opMode = OM_UNZ; else
|
||||
if (ISFLAG("--compress")) opMode = OM_Z; else
|
||||
@ -1445,12 +1581,14 @@ IntNative main ( IntNative argc, Char *argv[] )
|
||||
if (ISFLAG("--test")) opMode = OM_TEST; else
|
||||
if (ISFLAG("--keep")) keepInputFiles = True; else
|
||||
if (ISFLAG("--small")) smallMode = True; else
|
||||
if (ISFLAG("--quiet")) noisy = False; else
|
||||
if (ISFLAG("--version")) license(); else
|
||||
if (ISFLAG("--license")) license(); else
|
||||
if (ISFLAG("--repetitive-fast")) workFactor = 5; else
|
||||
if (ISFLAG("--repetitive-best")) workFactor = 150; else
|
||||
if (ISFLAG("--exponential")) workFactor = 1; else
|
||||
if (ISFLAG("--repetitive-best")) redundant(aa->name); else
|
||||
if (ISFLAG("--repetitive-fast")) redundant(aa->name); else
|
||||
if (ISFLAG("--verbose")) verbosity++; else
|
||||
if (ISFLAG("--help")) { usage ( progName ); exit ( 1 ); }
|
||||
if (ISFLAG("--help")) { usage ( progName ); exit ( 0 ); }
|
||||
else
|
||||
if (strncmp ( aa->name, "--", 2) == 0) {
|
||||
fprintf ( stderr, "%s: Bad flag `%s'\n", progName, aa->name );
|
||||
@ -1460,13 +1598,8 @@ IntNative main ( IntNative argc, Char *argv[] )
|
||||
}
|
||||
|
||||
if (verbosity > 4) verbosity = 4;
|
||||
if (opMode == OM_Z && smallMode) blockSize100k = 2;
|
||||
|
||||
if (srcMode == SM_F2O && numFileNames == 0) {
|
||||
fprintf ( stderr, "%s: -c expects at least one filename.\n",
|
||||
progName );
|
||||
exit ( 1 );
|
||||
}
|
||||
if (opMode == OM_Z && smallMode && blockSize100k > 2)
|
||||
blockSize100k = 2;
|
||||
|
||||
if (opMode == OM_TEST && srcMode == SM_F2O) {
|
||||
fprintf ( stderr, "%s: -c and -t cannot be used together.\n",
|
||||
@ -1474,46 +1607,82 @@ IntNative main ( IntNative argc, Char *argv[] )
|
||||
exit ( 1 );
|
||||
}
|
||||
|
||||
if (srcMode == SM_F2O && numFileNames == 0)
|
||||
srcMode = SM_I2O;
|
||||
|
||||
if (opMode != OM_Z) blockSize100k = 0;
|
||||
|
||||
if (srcMode == SM_F2F) {
|
||||
signal (SIGINT, mySignalCatcher);
|
||||
signal (SIGTERM, mySignalCatcher);
|
||||
# if BZ_UNIX
|
||||
signal (SIGHUP, mySignalCatcher);
|
||||
# endif
|
||||
}
|
||||
|
||||
if (opMode == OM_Z) {
|
||||
if (srcMode == SM_I2O)
|
||||
compress ( NULL );
|
||||
else
|
||||
for (aa = argList; aa != NULL; aa = aa->link)
|
||||
if (aa->name[0] != '-') {
|
||||
numFilesProcessed++;
|
||||
compress ( aa->name );
|
||||
}
|
||||
} else
|
||||
if (srcMode == SM_I2O) {
|
||||
compress ( NULL );
|
||||
} else {
|
||||
decode = True;
|
||||
for (aa = argList; aa != NULL; aa = aa->link) {
|
||||
if (ISFLAG("--")) { decode = False; continue; }
|
||||
if (aa->name[0] == '-' && decode) continue;
|
||||
numFilesProcessed++;
|
||||
compress ( aa->name );
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
if (opMode == OM_UNZ) {
|
||||
if (srcMode == SM_I2O)
|
||||
if (srcMode == SM_I2O) {
|
||||
uncompress ( NULL );
|
||||
else
|
||||
for (aa = argList; aa != NULL; aa = aa->link)
|
||||
if (aa->name[0] != '-') {
|
||||
numFilesProcessed++;
|
||||
uncompress ( aa->name );
|
||||
}
|
||||
} else {
|
||||
} else {
|
||||
decode = True;
|
||||
for (aa = argList; aa != NULL; aa = aa->link) {
|
||||
if (ISFLAG("--")) { decode = False; continue; }
|
||||
if (aa->name[0] == '-' && decode) continue;
|
||||
numFilesProcessed++;
|
||||
uncompress ( aa->name );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
else {
|
||||
testFailsExist = False;
|
||||
if (srcMode == SM_I2O)
|
||||
if (srcMode == SM_I2O) {
|
||||
testf ( NULL );
|
||||
else
|
||||
for (aa = argList; aa != NULL; aa = aa->link)
|
||||
if (aa->name[0] != '-') {
|
||||
numFilesProcessed++;
|
||||
testf ( aa->name );
|
||||
}
|
||||
if (testFailsExist) {
|
||||
} else {
|
||||
decode = True;
|
||||
for (aa = argList; aa != NULL; aa = aa->link) {
|
||||
if (ISFLAG("--")) { decode = False; continue; }
|
||||
if (aa->name[0] == '-' && decode) continue;
|
||||
numFilesProcessed++;
|
||||
testf ( aa->name );
|
||||
}
|
||||
}
|
||||
if (testFailsExist && noisy) {
|
||||
fprintf ( stderr,
|
||||
"\n"
|
||||
"You can use the `bzip2recover' program to *attempt* to recover\n"
|
||||
"You can use the `bzip2recover' program to attempt to recover\n"
|
||||
"data from undamaged sections of corrupted files.\n\n"
|
||||
);
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
|
||||
/* Free the argument list memory to mollify leak detectors
|
||||
(eg) Purify, Checker. Serves no other useful purpose.
|
||||
*/
|
||||
aa = argList;
|
||||
while (aa != NULL) {
|
||||
Cell* aa2 = aa->link;
|
||||
if (aa->name) free(aa->name);
|
||||
free(aa);
|
||||
aa = aa2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
332
bzip2.txt
332
bzip2.txt
@ -1,22 +1,20 @@
|
||||
|
||||
bzip2(1) bzip2(1)
|
||||
|
||||
|
||||
NAME
|
||||
bzip2, bunzip2 - a block-sorting file compressor, v0.9.0
|
||||
bzip2, bunzip2 - a block-sorting file compressor, v0.9.5
|
||||
bzcat - decompresses files to stdout
|
||||
bzip2recover - recovers data from damaged bzip2 files
|
||||
|
||||
|
||||
SYNOPSIS
|
||||
bzip2 [ -cdfkstvzVL123456789 ] [ filenames ... ]
|
||||
bzip2 [ -cdfkqstvzVL123456789 ] [ filenames ... ]
|
||||
bunzip2 [ -fkvsVL ] [ filenames ... ]
|
||||
bzcat [ -s ] [ filenames ... ]
|
||||
bzip2recover filename
|
||||
|
||||
|
||||
DESCRIPTION
|
||||
bzip2 compresses files using the Burrows-Wheeler block-
|
||||
bzip2 compresses files using the Burrows-Wheeler block
|
||||
sorting text compression algorithm, and Huffman coding.
|
||||
Compression is generally considerably better than that
|
||||
achieved by more conventional LZ77/LZ78-based compressors,
|
||||
@ -24,22 +22,22 @@ DESCRIPTION
|
||||
tistical compressors.
|
||||
|
||||
The command-line options are deliberately very similar to
|
||||
those of GNU Gzip, but they are not identical.
|
||||
those of GNU gzip, but they are not identical.
|
||||
|
||||
bzip2 expects a list of file names to accompany the com-
|
||||
mand-line flags. Each file is replaced by a compressed
|
||||
version of itself, with the name "original_name.bz2".
|
||||
Each compressed file has the same modification date and
|
||||
permissions as the corresponding original, so that these
|
||||
properties can be correctly restored at decompression
|
||||
time. File name handling is naive in the sense that there
|
||||
is no mechanism for preserving original file names, per-
|
||||
missions and dates in filesystems which lack these con-
|
||||
cepts, or have serious file name length restrictions, such
|
||||
as MS-DOS.
|
||||
Each compressed file has the same modification date, per-
|
||||
missions, and, when possible, ownership as the correspond-
|
||||
ing original, so that these properties can be correctly
|
||||
restored at decompression time. File name handling is
|
||||
naive in the sense that there is no mechanism for preserv-
|
||||
ing original file names, permissions, ownerships or dates
|
||||
in filesystems which lack these concepts, or have serious
|
||||
file name length restrictions, such as MS-DOS.
|
||||
|
||||
bzip2 and bunzip2 will by default not overwrite existing
|
||||
files; if you want this to happen, specify the -f flag.
|
||||
files. If you want this to happen, specify the -f flag.
|
||||
|
||||
If no file names are specified, bzip2 compresses from
|
||||
standard input to standard output. In this case, bzip2
|
||||
@ -47,10 +45,25 @@ DESCRIPTION
|
||||
this would be entirely incomprehensible and therefore
|
||||
pointless.
|
||||
|
||||
bunzip2 (or bzip2 -d ) decompresses and restores all spec-
|
||||
ified files whose names end in ".bz2". Files without this
|
||||
suffix are ignored. Again, supplying no filenames causes
|
||||
decompression from standard input to standard output.
|
||||
bunzip2 (or bzip2 -d) decompresses all specified files.
|
||||
Files which were not created by bzip2 will be detected and
|
||||
ignored, and a warning issued. bzip2 attempts to guess
|
||||
the filename for the decompressed file from that of the
|
||||
compressed file as follows:
|
||||
|
||||
filename.bz2 becomes filename
|
||||
filename.bz becomes filename
|
||||
filename.tbz2 becomes filename.tar
|
||||
filename.tbz becomes filename.tar
|
||||
anyothername becomes anyothername.out
|
||||
|
||||
If the file does not end in one of the recognised endings,
|
||||
.bz2, .bz, .tbz2 or .tbz, bzip2 complains that it cannot
|
||||
guess the name of the original file, and uses the original
|
||||
name with .out appended.
|
||||
|
||||
As with compression, supplying no filenames causes decom-
|
||||
pression from standard input to standard output.
|
||||
|
||||
bunzip2 will correctly decompress a file which is the con-
|
||||
catenation of two or more compressed files. The result is
|
||||
@ -58,19 +71,24 @@ DESCRIPTION
|
||||
Integrity testing (-t) of concatenated compressed files is
|
||||
also supported.
|
||||
|
||||
You can also compress or decompress files to the standard
|
||||
output by giving the -c flag. Multiple files may be com-
|
||||
You can also compress or decompress files to the standard
|
||||
output by giving the -c flag. Multiple files may be com-
|
||||
pressed and decompressed like this. The resulting outputs
|
||||
are fed sequentially to stdout. Compression of multiple
|
||||
files in this manner generates a stream containing multi-
|
||||
are fed sequentially to stdout. Compression of multiple
|
||||
files in this manner generates a stream containing multi-
|
||||
ple compressed file representations. Such a stream can be
|
||||
decompressed correctly only by bzip2 version 0.9.0 or
|
||||
later. Earlier versions of bzip2 will stop after decom-
|
||||
decompressed correctly only by bzip2 version 0.9.0 or
|
||||
later. Earlier versions of bzip2 will stop after decom-
|
||||
pressing the first file in the stream.
|
||||
|
||||
bzcat (or bzip2 -dc ) decompresses all specified files to
|
||||
bzcat (or bzip2 -dc) decompresses all specified files to
|
||||
the standard output.
|
||||
|
||||
bzip2 will read arguments from the environment variables
|
||||
BZIP2 and BZIP, in that order, and will process them
|
||||
before any arguments read from the command line. This
|
||||
gives a convenient way to supply default arguments.
|
||||
|
||||
Compression is always performed, even if the compressed
|
||||
file is slightly larger than the original. Files of less
|
||||
than about one hundred bytes tend to get larger, since the
|
||||
@ -87,98 +105,19 @@ DESCRIPTION
|
||||
corruption going undetected is microscopic, about one
|
||||
chance in four billion for each file processed. Be aware,
|
||||
though, that the check occurs upon decompression, so it
|
||||
can only tell you that that something is wrong. It can't
|
||||
help you recover the original uncompressed data. You can
|
||||
use bzip2recover to try to recover data from damaged
|
||||
files.
|
||||
can only tell you that something is wrong. It can't help
|
||||
you recover the original uncompressed data. You can use
|
||||
bzip2recover to try to recover data from damaged files.
|
||||
|
||||
Return values: 0 for a normal exit, 1 for environmental
|
||||
problems (file not found, invalid flags, I/O errors, &c),
|
||||
Return values: 0 for a normal exit, 1 for environmental
|
||||
problems (file not found, invalid flags, I/O errors, &c),
|
||||
2 to indicate a corrupt compressed file, 3 for an internal
|
||||
consistency error (eg, bug) which caused bzip2 to panic.
|
||||
|
||||
|
||||
MEMORY MANAGEMENT
|
||||
Bzip2 compresses large files in blocks. The block size
|
||||
affects both the compression ratio achieved, and the
|
||||
amount of memory needed both for compression and decom-
|
||||
pression. The flags -1 through -9 specify the block size
|
||||
to be 100,000 bytes through 900,000 bytes (the default)
|
||||
respectively. At decompression-time, the block size used
|
||||
for compression is read from the header of the compressed
|
||||
file, and bunzip2 then allocates itself just enough memory
|
||||
to decompress the file. Since block sizes are stored in
|
||||
compressed files, it follows that the flags -1 to -9 are
|
||||
irrelevant to and so ignored during decompression.
|
||||
|
||||
Compression and decompression requirements, in bytes, can
|
||||
be estimated as:
|
||||
|
||||
Compression: 400k + ( 7 x block size )
|
||||
|
||||
Decompression: 100k + ( 4 x block size ), or
|
||||
100k + ( 2.5 x block size )
|
||||
|
||||
Larger block sizes give rapidly diminishing marginal
|
||||
returns; most of the compression comes from the first two
|
||||
or three hundred k of block size, a fact worth bearing in
|
||||
mind when using bzip2 on small machines. It is also
|
||||
important to appreciate that the decompression memory
|
||||
requirement is set at compression-time by the choice of
|
||||
block size.
|
||||
|
||||
For files compressed with the default 900k block size,
|
||||
bunzip2 will require about 3700 kbytes to decompress. To
|
||||
support decompression of any file on a 4 megabyte machine,
|
||||
bunzip2 has an option to decompress using approximately
|
||||
half this amount of memory, about 2300 kbytes. Decompres-
|
||||
sion speed is also halved, so you should use this option
|
||||
only where necessary. The relevant flag is -s.
|
||||
|
||||
In general, try and use the largest block size memory con-
|
||||
straints allow, since that maximises the compression
|
||||
achieved. Compression and decompression speed are virtu-
|
||||
ally unaffected by block size.
|
||||
|
||||
Another significant point applies to files which fit in a
|
||||
single block -- that means most files you'd encounter
|
||||
using a large block size. The amount of real memory
|
||||
touched is proportional to the size of the file, since the
|
||||
file is smaller than a block. For example, compressing a
|
||||
file 20,000 bytes long with the flag -9 will cause the
|
||||
compressor to allocate around 6700k of memory, but only
|
||||
touch 400k + 20000 * 7 = 540 kbytes of it. Similarly, the
|
||||
decompressor will allocate 3700k but only touch 100k +
|
||||
20000 * 4 = 180 kbytes.
|
||||
|
||||
Here is a table which summarises the maximum memory usage
|
||||
for different block sizes. Also recorded is the total
|
||||
compressed size for 14 files of the Calgary Text Compres-
|
||||
sion Corpus totalling 3,141,622 bytes. This column gives
|
||||
some feel for how compression varies with block size.
|
||||
These figures tend to understate the advantage of larger
|
||||
block sizes for larger files, since the Corpus is domi-
|
||||
nated by smaller files.
|
||||
|
||||
Compress Decompress Decompress Corpus
|
||||
Flag usage usage -s usage Size
|
||||
|
||||
-1 1100k 500k 350k 914704
|
||||
-2 1800k 900k 600k 877703
|
||||
-3 2500k 1300k 850k 860338
|
||||
-4 3200k 1700k 1100k 846899
|
||||
-5 3900k 2100k 1350k 845160
|
||||
-6 4600k 2500k 1600k 838626
|
||||
-7 5400k 2900k 1850k 834096
|
||||
-8 6000k 3300k 2100k 828642
|
||||
-9 6700k 3700k 2350k 828642
|
||||
|
||||
|
||||
OPTIONS
|
||||
-c --stdout
|
||||
Compress or decompress to standard output. -c will
|
||||
decompress multiple files to stdout, but will only
|
||||
compress a single file to stdout.
|
||||
Compress or decompress to standard output.
|
||||
|
||||
-d --decompress
|
||||
Force decompression. bzip2, bunzip2 and bzcat are
|
||||
@ -198,7 +137,9 @@ OPTIONS
|
||||
|
||||
-f --force
|
||||
Force overwrite of output files. Normally, bzip2
|
||||
will not overwrite existing output files.
|
||||
will not overwrite existing output files. Also
|
||||
forces bzip2 to break hard links to files, which it
|
||||
otherwise wouldn't do.
|
||||
|
||||
-k --keep
|
||||
Keep (don't delete) input files during compression
|
||||
@ -217,7 +158,12 @@ OPTIONS
|
||||
figure, at the expense of your compression ratio.
|
||||
In short, if your machine is low on memory (8
|
||||
megabytes or less), use -s for everything. See
|
||||
MEMORY MANAGEMENT above.
|
||||
MEMORY MANAGEMENT below.
|
||||
|
||||
-q --quiet
|
||||
Suppress non-essential warning messages. Messages
|
||||
pertaining to I/O errors and other critical events
|
||||
will not be suppressed.
|
||||
|
||||
-v --verbose
|
||||
Verbose mode -- show the compression ratio for each
|
||||
@ -232,21 +178,96 @@ OPTIONS
|
||||
-1 to -9
|
||||
Set the block size to 100 k, 200 k .. 900 k when
|
||||
compressing. Has no effect when decompressing.
|
||||
See MEMORY MANAGEMENT above.
|
||||
See MEMORY MANAGEMENT below.
|
||||
|
||||
--repetitive-fast
|
||||
bzip2 injects some small pseudo-random variations
|
||||
into very repetitive blocks to limit worst-case
|
||||
performance during compression. If sorting runs
|
||||
into difficulties, the block is randomised, and
|
||||
sorting is restarted. Very roughly, bzip2 persists
|
||||
for three times as long as a well-behaved input
|
||||
would take before resorting to randomisation. This
|
||||
flag makes it give up much sooner.
|
||||
-- Treats all subsequent arguments as file names, even
|
||||
if they start with a dash. This is so you can han-
|
||||
dle files with names beginning with a dash, for
|
||||
example: bzip2 -- -myfilename.
|
||||
|
||||
--repetitive-best
|
||||
Opposite of --repetitive-fast; try a lot harder
|
||||
before resorting to randomisation.
|
||||
--repetitive-fast --repetitive-best
|
||||
These flags are redundant in versions 0.9.5 and
|
||||
above. They provided some coarse control over the
|
||||
behaviour of the sorting algorithm in earlier ver-
|
||||
sions, which was sometimes useful. 0.9.5 and above
|
||||
have an improved algorithm which renders these
|
||||
flags irrelevant.
|
||||
|
||||
|
||||
MEMORY MANAGEMENT
|
||||
bzip2 compresses large files in blocks. The block size
|
||||
affects both the compression ratio achieved, and the
|
||||
amount of memory needed for compression and decompression.
|
||||
The flags -1 through -9 specify the block size to be
|
||||
100,000 bytes through 900,000 bytes (the default) respec-
|
||||
tively. At decompression time, the block size used for
|
||||
compression is read from the header of the compressed
|
||||
file, and bunzip2 then allocates itself just enough memory
|
||||
to decompress the file. Since block sizes are stored in
|
||||
compressed files, it follows that the flags -1 to -9 are
|
||||
irrelevant to and so ignored during decompression.
|
||||
|
||||
Compression and decompression requirements, in bytes, can
|
||||
be estimated as:
|
||||
|
||||
Compression: 400k + ( 8 x block size )
|
||||
|
||||
Decompression: 100k + ( 4 x block size ), or
|
||||
100k + ( 2.5 x block size )
|
||||
|
||||
Larger block sizes give rapidly diminishing marginal
|
||||
returns. Most of the compression comes from the first two
|
||||
or three hundred k of block size, a fact worth bearing in
|
||||
mind when using bzip2 on small machines. It is also
|
||||
important to appreciate that the decompression memory
|
||||
requirement is set at compression time by the choice of
|
||||
block size.
|
||||
|
||||
For files compressed with the default 900k block size,
|
||||
bunzip2 will require about 3700 kbytes to decompress. To
|
||||
support decompression of any file on a 4 megabyte machine,
|
||||
bunzip2 has an option to decompress using approximately
|
||||
half this amount of memory, about 2300 kbytes. Decompres-
|
||||
sion speed is also halved, so you should use this option
|
||||
only where necessary. The relevant flag is -s.
|
||||
|
||||
In general, try and use the largest block size memory con-
|
||||
straints allow, since that maximises the compression
|
||||
achieved. Compression and decompression speed are virtu-
|
||||
ally unaffected by block size.
|
||||
|
||||
Another significant point applies to files which fit in a
|
||||
single block -- that means most files you'd encounter
|
||||
using a large block size. The amount of real memory
|
||||
touched is proportional to the size of the file, since the
|
||||
file is smaller than a block. For example, compressing a
|
||||
file 20,000 bytes long with the flag -9 will cause the
|
||||
compressor to allocate around 7600k of memory, but only
|
||||
touch 400k + 20000 * 8 = 560 kbytes of it. Similarly, the
|
||||
decompressor will allocate 3700k but only touch 100k +
|
||||
20000 * 4 = 180 kbytes.
|
||||
|
||||
Here is a table which summarises the maximum memory usage
|
||||
for different block sizes. Also recorded is the total
|
||||
compressed size for 14 files of the Calgary Text Compres-
|
||||
sion Corpus totalling 3,141,622 bytes. This column gives
|
||||
some feel for how compression varies with block size.
|
||||
These figures tend to understate the advantage of larger
|
||||
block sizes for larger files, since the Corpus is domi-
|
||||
nated by smaller files.
|
||||
|
||||
Compress Decompress Decompress Corpus
|
||||
Flag usage usage -s usage Size
|
||||
|
||||
-1 1200k 500k 350k 914704
|
||||
-2 2000k 900k 600k 877703
|
||||
-3 2800k 1300k 850k 860338
|
||||
-4 3600k 1700k 1100k 846899
|
||||
-5 4400k 2100k 1350k 845160
|
||||
-6 5200k 2500k 1600k 838626
|
||||
-7 6100k 2900k 1850k 834096
|
||||
-8 6800k 3300k 2100k 828642
|
||||
-9 7600k 3700k 2350k 828642
|
||||
|
||||
|
||||
RECOVERING DATA FROM DAMAGED FILES
|
||||
@ -273,8 +294,8 @@ RECOVERING DATA FROM DAMAGED FILES
|
||||
"rec0002file.bz2", etc, containing the extracted blocks.
|
||||
The output filenames are designed so that the use of
|
||||
wildcards in subsequent processing -- for example, "bzip2
|
||||
-dc rec*file.bz2 > recovered_data" -- lists the files in
|
||||
the "right" order.
|
||||
-dc rec*file.bz2 > recovered_data" -- lists the files in
|
||||
the correct order.
|
||||
|
||||
bzip2recover should be of most use dealing with large .bz2
|
||||
files, as these will contain many blocks. It is clearly
|
||||
@ -289,17 +310,15 @@ PERFORMANCE NOTES
|
||||
The sorting phase of compression gathers together similar
|
||||
strings in the file. Because of this, files containing
|
||||
very long runs of repeated symbols, like "aabaabaabaab
|
||||
..." (repeated several hundred times) may compress
|
||||
extraordinarily slowly. You can use the -vvvvv option to
|
||||
monitor progress in great detail, if you want. Decompres-
|
||||
sion speed is unaffected.
|
||||
..." (repeated several hundred times) may compress more
|
||||
slowly than normal. Versions 0.9.5 and above fare much
|
||||
better than previous versions in this respect. The ratio
|
||||
between worst-case and average-case compression time is in
|
||||
the region of 10:1. For previous versions, this figure
|
||||
was more like 100:1. You can use the -vvvv option to mon-
|
||||
itor progress in great detail, if you want.
|
||||
|
||||
Such pathological cases seem rare in practice, appearing
|
||||
mostly in artificially-constructed test files, and in low-
|
||||
level disk images. It may be inadvisable to use bzip2 to
|
||||
compress the latter. If you do get a file which causes
|
||||
severe slowness in compression, try making the block size
|
||||
as small as possible, with flag -1.
|
||||
Decompression speed is unaffected by these phenomena.
|
||||
|
||||
bzip2 usually allocates several megabytes of memory to
|
||||
operate in, and then charges all over it in a fairly ran-
|
||||
@ -314,42 +333,43 @@ PERFORMANCE NOTES
|
||||
|
||||
CAVEATS
|
||||
I/O error messages are not as helpful as they could be.
|
||||
Bzip2 tries hard to detect I/O errors and exit cleanly,
|
||||
bzip2 tries hard to detect I/O errors and exit cleanly,
|
||||
but the details of what the problem is sometimes seem
|
||||
rather misleading.
|
||||
|
||||
This manual page pertains to version 0.9.0 of bzip2. Com-
|
||||
This manual page pertains to version 0.9.5 of bzip2. Com-
|
||||
pressed data created by this version is entirely forwards
|
||||
and backwards compatible with the previous public release,
|
||||
version 0.1pl2, but with the following exception: 0.9.0
|
||||
can correctly decompress multiple concatenated compressed
|
||||
files. 0.1pl2 cannot do this; it will stop after decom-
|
||||
pressing just the first file in the stream.
|
||||
and backwards compatible with the previous public
|
||||
releases, versions 0.1pl2 and 0.9.0, but with the follow-
|
||||
ing exception: 0.9.0 and above can correctly decompress
|
||||
multiple concatenated compressed files. 0.1pl2 cannot do
|
||||
this; it will stop after decompressing just the first file
|
||||
in the stream.
|
||||
|
||||
Wildcard expansion for Windows 95 and NT is flaky.
|
||||
|
||||
bzip2recover uses 32-bit integers to represent bit posi-
|
||||
tions in compressed files, so it cannot handle compressed
|
||||
files more than 512 megabytes long. This could easily be
|
||||
bzip2recover uses 32-bit integers to represent bit posi-
|
||||
tions in compressed files, so it cannot handle compressed
|
||||
files more than 512 megabytes long. This could easily be
|
||||
fixed.
|
||||
|
||||
|
||||
AUTHOR
|
||||
Julian Seward, jseward@acm.org.
|
||||
|
||||
http://www.muraroa.demon.co.uk
|
||||
|
||||
The ideas embodied in bzip2 are due to (at least) the fol-
|
||||
lowing people: Michael Burrows and David Wheeler (for the
|
||||
block sorting transformation), David Wheeler (again, for
|
||||
lowing people: Michael Burrows and David Wheeler (for the
|
||||
block sorting transformation), David Wheeler (again, for
|
||||
the Huffman coder), Peter Fenwick (for the structured cod-
|
||||
ing model in the original bzip, and many refinements), and
|
||||
Alistair Moffat, Radford Neal and Ian Witten (for the
|
||||
Alistair Moffat, Radford Neal and Ian Witten (for the
|
||||
arithmetic coder in the original bzip). I am much
|
||||
indebted for their help, support and advice. See the man-
|
||||
ual in the source distribution for pointers to sources of
|
||||
ual in the source distribution for pointers to sources of
|
||||
documentation. Christian von Roques encouraged me to look
|
||||
for faster sorting algorithms, so as to speed up compres-
|
||||
for faster sorting algorithms, so as to speed up compres-
|
||||
sion. Bela Lubkin encouraged me to improve the worst-case
|
||||
compression performance. Many people sent patches, helped
|
||||
with portability problems, lent machines, gave advice and
|
||||
with portability problems, lent machines, gave advice and
|
||||
were generally helpful.
|
||||
|
||||
|
@ -7,9 +7,9 @@
|
||||
/*--
|
||||
This program is bzip2recover, a program to attempt data
|
||||
salvage from damaged files created by the accompanying
|
||||
bzip2-0.9.0c program.
|
||||
bzip2-0.9.5 program.
|
||||
|
||||
Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
|
||||
Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -42,9 +42,9 @@
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Julian Seward, Guildford, Surrey, UK.
|
||||
Julian Seward, Cambridge, UK.
|
||||
jseward@acm.org
|
||||
bzip2/libbzip2 version 0.9.0c of 18 October 1998
|
||||
bzip2/libbzip2 version 0.9.5 of 24 May 1999
|
||||
--*/
|
||||
|
||||
/*--
|
||||
@ -260,6 +260,12 @@ Bool endsInBz2 ( Char* name )
|
||||
#define BLOCK_ENDMARK_HI 0x00001772UL
|
||||
#define BLOCK_ENDMARK_LO 0x45385090UL
|
||||
|
||||
|
||||
UInt32 bStart[20000];
|
||||
UInt32 bEnd[20000];
|
||||
UInt32 rbStart[20000];
|
||||
UInt32 rbEnd[20000];
|
||||
|
||||
Int32 main ( Int32 argc, Char** argv )
|
||||
{
|
||||
FILE* inFile;
|
||||
@ -267,11 +273,6 @@ Int32 main ( Int32 argc, Char** argv )
|
||||
BitStream* bsIn, *bsWr;
|
||||
Int32 currBlock, b, wrBlock;
|
||||
UInt32 bitsRead;
|
||||
UInt32 bStart[20000];
|
||||
UInt32 bEnd[20000];
|
||||
|
||||
UInt32 rbStart[20000];
|
||||
UInt32 rbEnd[20000];
|
||||
Int32 rbCtr;
|
||||
|
||||
|
||||
@ -281,7 +282,7 @@ Int32 main ( Int32 argc, Char** argv )
|
||||
strcpy ( progName, argv[0] );
|
||||
inFileName[0] = outFileName[0] = 0;
|
||||
|
||||
fprintf ( stderr, "bzip2recover v0.9.0c: extracts blocks from damaged .bz2 files.\n" );
|
||||
fprintf ( stderr, "bzip2recover 0.9.5d: extracts blocks from damaged .bz2 files.\n" );
|
||||
|
||||
if (argc != 2) {
|
||||
fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
|
||||
|
134
bzlib.c
134
bzlib.c
@ -8,7 +8,7 @@
|
||||
This file is a part of bzip2 and/or libbzip2, a program and
|
||||
library for lossless, block-sorting data compression.
|
||||
|
||||
Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
|
||||
Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -41,9 +41,9 @@
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Julian Seward, Guildford, Surrey, UK.
|
||||
Julian Seward, Cambridge, UK.
|
||||
jseward@acm.org
|
||||
bzip2/libbzip2 version 0.9.0c of 18 October 1998
|
||||
bzip2/libbzip2 version 0.9.5 of 24 May 1999
|
||||
|
||||
This program is based on (at least) the work of:
|
||||
Mike Burrows
|
||||
@ -86,14 +86,14 @@
|
||||
void bz__AssertH__fail ( int errcode )
|
||||
{
|
||||
fprintf(stderr,
|
||||
"\n\nbzip2/libbzip2, v0.9.0c: internal error number %d.\n"
|
||||
"This is a bug in bzip2/libbzip2, v0.9.0c. Please report\n"
|
||||
"\n\nbzip2/libbzip2, v0.9.5d: internal error number %d.\n"
|
||||
"This is a bug in bzip2/libbzip2, v0.9.5d. Please report\n"
|
||||
"it to me at: jseward@acm.org. If this happened when\n"
|
||||
"you were using some program which uses libbzip2 as a\n"
|
||||
"component, you should also report this bug to the author(s)\n"
|
||||
"of that program. Please make an effort to report this bug;\n"
|
||||
"timely and accurate bug reports eventually lead to higher\n"
|
||||
"quality software. Thx. Julian Seward, 18 October 1998.\n\n",
|
||||
"quality software. Thanks. Julian Seward, 4 Sept 1999.\n\n",
|
||||
errcode
|
||||
);
|
||||
exit(3);
|
||||
@ -171,29 +171,23 @@ int BZ_API(bzCompressInit)
|
||||
if (s == NULL) return BZ_MEM_ERROR;
|
||||
s->strm = strm;
|
||||
|
||||
s->block = NULL;
|
||||
s->quadrant = NULL;
|
||||
s->zptr = NULL;
|
||||
s->ftab = NULL;
|
||||
s->arr1 = NULL;
|
||||
s->arr2 = NULL;
|
||||
s->ftab = NULL;
|
||||
|
||||
n = 100000 * blockSize100k;
|
||||
s->block = BZALLOC( (n + BZ_NUM_OVERSHOOT_BYTES) * sizeof(UChar) );
|
||||
s->quadrant = BZALLOC( (n + BZ_NUM_OVERSHOOT_BYTES) * sizeof(Int16) );
|
||||
s->zptr = BZALLOC( n * sizeof(Int32) );
|
||||
s->ftab = BZALLOC( 65537 * sizeof(Int32) );
|
||||
n = 100000 * blockSize100k;
|
||||
s->arr1 = BZALLOC( n * sizeof(UInt32) );
|
||||
s->arr2 = BZALLOC( (n+BZ_N_OVERSHOOT) * sizeof(UInt32) );
|
||||
s->ftab = BZALLOC( 65537 * sizeof(UInt32) );
|
||||
|
||||
if (s->block == NULL || s->quadrant == NULL ||
|
||||
s->zptr == NULL || s->ftab == NULL) {
|
||||
if (s->block != NULL) BZFREE(s->block);
|
||||
if (s->quadrant != NULL) BZFREE(s->quadrant);
|
||||
if (s->zptr != NULL) BZFREE(s->zptr);
|
||||
if (s->ftab != NULL) BZFREE(s->ftab);
|
||||
if (s != NULL) BZFREE(s);
|
||||
if (s->arr1 == NULL || s->arr2 == NULL || s->ftab == NULL) {
|
||||
if (s->arr1 != NULL) BZFREE(s->arr1);
|
||||
if (s->arr2 != NULL) BZFREE(s->arr2);
|
||||
if (s->ftab != NULL) BZFREE(s->ftab);
|
||||
if (s != NULL) BZFREE(s);
|
||||
return BZ_MEM_ERROR;
|
||||
}
|
||||
|
||||
s->szptr = (UInt16*)(s->zptr);
|
||||
|
||||
s->blockNo = 0;
|
||||
s->state = BZ_S_INPUT;
|
||||
s->mode = BZ_M_RUNNING;
|
||||
@ -202,7 +196,12 @@ int BZ_API(bzCompressInit)
|
||||
s->nblockMAX = 100000 * blockSize100k - 19;
|
||||
s->verbosity = verbosity;
|
||||
s->workFactor = workFactor;
|
||||
s->nBlocksRandomised = 0;
|
||||
|
||||
s->block = (UInt16*)s->arr2;
|
||||
s->mtfv = (UInt16*)s->arr1;
|
||||
s->zbits = NULL;
|
||||
s->ptr = (UInt32*)s->arr1;
|
||||
|
||||
strm->state = s;
|
||||
strm->total_in = 0;
|
||||
strm->total_out = 0;
|
||||
@ -224,24 +223,24 @@ void add_pair_to_block ( EState* s )
|
||||
s->inUse[s->state_in_ch] = True;
|
||||
switch (s->state_in_len) {
|
||||
case 1:
|
||||
s->block[s->nblock] = (UChar)ch; s->nblock++;
|
||||
s->block[s->nblock] = (UInt16)ch; s->nblock++;
|
||||
break;
|
||||
case 2:
|
||||
s->block[s->nblock] = (UChar)ch; s->nblock++;
|
||||
s->block[s->nblock] = (UChar)ch; s->nblock++;
|
||||
s->block[s->nblock] = (UInt16)ch; s->nblock++;
|
||||
s->block[s->nblock] = (UInt16)ch; s->nblock++;
|
||||
break;
|
||||
case 3:
|
||||
s->block[s->nblock] = (UChar)ch; s->nblock++;
|
||||
s->block[s->nblock] = (UChar)ch; s->nblock++;
|
||||
s->block[s->nblock] = (UChar)ch; s->nblock++;
|
||||
s->block[s->nblock] = (UInt16)ch; s->nblock++;
|
||||
s->block[s->nblock] = (UInt16)ch; s->nblock++;
|
||||
s->block[s->nblock] = (UInt16)ch; s->nblock++;
|
||||
break;
|
||||
default:
|
||||
s->inUse[s->state_in_len-4] = True;
|
||||
s->block[s->nblock] = (UChar)ch; s->nblock++;
|
||||
s->block[s->nblock] = (UChar)ch; s->nblock++;
|
||||
s->block[s->nblock] = (UChar)ch; s->nblock++;
|
||||
s->block[s->nblock] = (UChar)ch; s->nblock++;
|
||||
s->block[s->nblock] = (UChar)(s->state_in_len-4);
|
||||
s->block[s->nblock] = (UInt16)ch; s->nblock++;
|
||||
s->block[s->nblock] = (UInt16)ch; s->nblock++;
|
||||
s->block[s->nblock] = (UInt16)ch; s->nblock++;
|
||||
s->block[s->nblock] = (UInt16)ch; s->nblock++;
|
||||
s->block[s->nblock] = ((UInt16)(s->state_in_len-4));
|
||||
s->nblock++;
|
||||
break;
|
||||
}
|
||||
@ -267,7 +266,7 @@ void flush_RL ( EState* s )
|
||||
UChar ch = (UChar)(zs->state_in_ch); \
|
||||
BZ_UPDATE_CRC( zs->blockCRC, ch ); \
|
||||
zs->inUse[zs->state_in_ch] = True; \
|
||||
zs->block[zs->nblock] = (UChar)ch; \
|
||||
zs->block[zs->nblock] = (UInt16)ch; \
|
||||
zs->nblock++; \
|
||||
zs->state_in_ch = zchh; \
|
||||
} \
|
||||
@ -343,7 +342,7 @@ Bool copy_output_until_stop ( EState* s )
|
||||
if (s->state_out_pos >= s->numZ) break;
|
||||
|
||||
progress_out = True;
|
||||
*(s->strm->next_out) = ((UChar*)(s->quadrant))[s->state_out_pos];
|
||||
*(s->strm->next_out) = s->zbits[s->state_out_pos];
|
||||
s->state_out_pos++;
|
||||
s->strm->avail_out--;
|
||||
s->strm->next_out++;
|
||||
@ -382,7 +381,7 @@ Bool handle_compress ( bz_stream* strm )
|
||||
progress_in |= copy_input_until_stop ( s );
|
||||
if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) {
|
||||
flush_RL ( s );
|
||||
compressBlock ( s, s->mode == BZ_M_FINISHING );
|
||||
compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) );
|
||||
s->state = BZ_S_OUTPUT;
|
||||
}
|
||||
else
|
||||
@ -470,10 +469,9 @@ int BZ_API(bzCompressEnd) ( bz_stream *strm )
|
||||
if (s == NULL) return BZ_PARAM_ERROR;
|
||||
if (s->strm != strm) return BZ_PARAM_ERROR;
|
||||
|
||||
if (s->block != NULL) BZFREE(s->block);
|
||||
if (s->quadrant != NULL) BZFREE(s->quadrant);
|
||||
if (s->zptr != NULL) BZFREE(s->zptr);
|
||||
if (s->ftab != NULL) BZFREE(s->ftab);
|
||||
if (s->arr1 != NULL) BZFREE(s->arr1);
|
||||
if (s->arr2 != NULL) BZFREE(s->arr2);
|
||||
if (s->ftab != NULL) BZFREE(s->ftab);
|
||||
BZFREE(strm->state);
|
||||
|
||||
strm->state = NULL;
|
||||
@ -816,7 +814,8 @@ int BZ_API(bzDecompress) ( bz_stream *strm )
|
||||
}
|
||||
|
||||
AssertH ( 0, 6001 );
|
||||
/*notreached*/
|
||||
|
||||
return 0; /*NOTREACHED*/
|
||||
}
|
||||
|
||||
|
||||
@ -1284,7 +1283,7 @@ int BZ_API(bzBuffToBuffDecompress)
|
||||
|
||||
errhandler:
|
||||
bzDecompressEnd ( &strm );
|
||||
return BZ_SEQUENCE_ERROR;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@ -1339,24 +1338,18 @@ BZFILE * bzopen_or_bzdopen
|
||||
int smallMode = 0;
|
||||
int nUnused = 0;
|
||||
|
||||
if(mode==NULL){return NULL;}
|
||||
while(*mode){
|
||||
switch(*mode){
|
||||
if (mode == NULL) return NULL;
|
||||
while (*mode) {
|
||||
switch (*mode) {
|
||||
case 'r':
|
||||
writing = 0;break;
|
||||
writing = 0; break;
|
||||
case 'w':
|
||||
writing = 1;break;
|
||||
writing = 1; break;
|
||||
case 's':
|
||||
smallMode = 1;break;
|
||||
smallMode = 1; break;
|
||||
default:
|
||||
if(isdigit(*mode)){
|
||||
blockSize100k = 0;
|
||||
while(isdigit(*mode)){
|
||||
blockSize100k = blockSize100k*10 + *mode-'0';
|
||||
mode++;
|
||||
}
|
||||
}else{
|
||||
/* ignore */
|
||||
if (isdigit((int)(*mode))) {
|
||||
blockSize100k = *mode-'0';
|
||||
}
|
||||
}
|
||||
mode++;
|
||||
@ -1364,29 +1357,32 @@ BZFILE * bzopen_or_bzdopen
|
||||
strcat(mode2, writing ? "w" : "r" );
|
||||
strcat(mode2,"b"); /* binary mode */
|
||||
|
||||
if(open_mode==0){
|
||||
if(path==NULL || strcmp(path,"")==0){
|
||||
if (open_mode==0) {
|
||||
if (path==NULL || strcmp(path,"")==0) {
|
||||
fp = (writing ? stdout : stdin);
|
||||
SET_BINARY_MODE(fp);
|
||||
}else{
|
||||
} else {
|
||||
fp = fopen(path,mode2);
|
||||
}
|
||||
}else{
|
||||
} else {
|
||||
#ifdef BZ_STRICT_ANSI
|
||||
fp = NULL;
|
||||
#else
|
||||
fp = fdopen(fd,mode2);
|
||||
#endif
|
||||
}
|
||||
if(fp==NULL){return NULL;}
|
||||
if (fp == NULL) return NULL;
|
||||
|
||||
if(writing){
|
||||
if (writing) {
|
||||
/* Guard against total chaos and anarchy -- JRS */
|
||||
if (blockSize100k < 1) blockSize100k = 1;
|
||||
if (blockSize100k > 9) blockSize100k = 9;
|
||||
bzfp = bzWriteOpen(&bzerr,fp,blockSize100k,verbosity,workFactor);
|
||||
}else{
|
||||
} else {
|
||||
bzfp = bzReadOpen(&bzerr,fp,verbosity,smallMode,unused,nUnused);
|
||||
}
|
||||
if(bzfp==NULL){
|
||||
if(fp!=stdin && fp!=stdout) fclose(fp);
|
||||
if (bzfp == NULL) {
|
||||
if (fp != stdin && fp != stdout) fclose(fp);
|
||||
return NULL;
|
||||
}
|
||||
return bzfp;
|
||||
@ -1458,7 +1454,7 @@ void BZ_API(bzclose) (BZFILE* b)
|
||||
int bzerr;
|
||||
FILE *fp = ((bzFile *)b)->handle;
|
||||
|
||||
if(b==NULL){return;}
|
||||
if (b==NULL) {return;}
|
||||
if(((bzFile*)b)->writing){
|
||||
bzWriteClose(&bzerr,b,0,NULL,NULL);
|
||||
if(bzerr != BZ_OK){
|
||||
|
13
bzlib.h
13
bzlib.h
@ -8,7 +8,7 @@
|
||||
This file is a part of bzip2 and/or libbzip2, a program and
|
||||
library for lossless, block-sorting data compression.
|
||||
|
||||
Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
|
||||
Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -41,9 +41,9 @@
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Julian Seward, Guildford, Surrey, UK.
|
||||
Julian Seward, Cambridge, UK.
|
||||
jseward@acm.org
|
||||
bzip2/libbzip2 version 0.9.0c of 18 October 1998
|
||||
bzip2/libbzip2 version 0.9.5 of 24 May 1999
|
||||
|
||||
This program is based on (at least) the work of:
|
||||
Mike Burrows
|
||||
@ -62,6 +62,10 @@
|
||||
#ifndef _BZLIB_H
|
||||
#define _BZLIB_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define BZ_RUN 0
|
||||
#define BZ_FLUSH 1
|
||||
#define BZ_FINISH 2
|
||||
@ -291,6 +295,9 @@ BZ_EXTERN const char * BZ_API(bzerror) (
|
||||
);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
||||
This file is a part of bzip2 and/or libbzip2, a program and
|
||||
library for lossless, block-sorting data compression.
|
||||
|
||||
Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
|
||||
Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -41,9 +41,9 @@
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Julian Seward, Guildford, Surrey, UK.
|
||||
Julian Seward, Cambridge, UK.
|
||||
jseward@acm.org
|
||||
bzip2/libbzip2 version 0.9.0c of 18 October 1998
|
||||
bzip2/libbzip2 version 0.9.5 of 24 May 1999
|
||||
|
||||
This program is based on (at least) the work of:
|
||||
Mike Burrows
|
||||
@ -76,7 +76,7 @@
|
||||
|
||||
/*-- General stuff. --*/
|
||||
|
||||
#define BZ_VERSION "0.9.0c"
|
||||
#define BZ_VERSION "0.9.5d"
|
||||
|
||||
typedef char Char;
|
||||
typedef unsigned char Bool;
|
||||
@ -210,7 +210,11 @@ extern UInt32 crc32Table[256];
|
||||
#define BZ_S_OUTPUT 1
|
||||
#define BZ_S_INPUT 2
|
||||
|
||||
#define BZ_NUM_OVERSHOOT_BYTES 20
|
||||
#define BZ_N_RADIX 2
|
||||
#define BZ_N_QSORT 12
|
||||
#define BZ_N_SHELL 18
|
||||
#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2)
|
||||
|
||||
|
||||
|
||||
|
||||
@ -230,18 +234,20 @@ typedef
|
||||
UInt32 avail_in_expect;
|
||||
|
||||
/* for doing the block sorting */
|
||||
UChar* block;
|
||||
UInt16* quadrant;
|
||||
UInt32* zptr;
|
||||
UInt16* szptr;
|
||||
Int32* ftab;
|
||||
Int32 workDone;
|
||||
Int32 workLimit;
|
||||
Int32 workFactor;
|
||||
Bool firstAttempt;
|
||||
Bool blockRandomised;
|
||||
UInt32* arr1;
|
||||
UInt32* arr2;
|
||||
UInt32* ftab;
|
||||
Int32 origPtr;
|
||||
|
||||
/* aliases for arr1 and arr2 */
|
||||
UInt32* ptr;
|
||||
UInt16* block;
|
||||
UInt16* mtfv;
|
||||
UChar* zbits;
|
||||
|
||||
/* for deciding when to use the fallback sorting algorithm */
|
||||
Int32 workFactor;
|
||||
|
||||
/* run-length-encoding of the input */
|
||||
UInt32 state_in_ch;
|
||||
Int32 state_in_len;
|
||||
@ -269,7 +275,6 @@ typedef
|
||||
/* misc administratium */
|
||||
Int32 verbosity;
|
||||
Int32 blockNo;
|
||||
Int32 nBlocksRandomised;
|
||||
Int32 blockSize100k;
|
||||
|
||||
/* stuff for coding the MTF values */
|
||||
@ -478,17 +483,17 @@ typedef
|
||||
}
|
||||
|
||||
#define GET_LL4(i) \
|
||||
(((UInt32)(s->ll4[(i) >> 1])) >> (((i) << 2) & 0x4) & 0xF)
|
||||
((((UInt32)(s->ll4[(i) >> 1])) >> (((i) << 2) & 0x4)) & 0xF)
|
||||
|
||||
#define SET_LL(i,n) \
|
||||
#define SET_LL(i,n) \
|
||||
{ s->ll16[i] = (UInt16)(n & 0x0000ffff); \
|
||||
SET_LL4(i, n >> 16); \
|
||||
SET_LL4(i, n >> 16); \
|
||||
}
|
||||
|
||||
#define GET_LL(i) \
|
||||
(((UInt32)s->ll16[i]) | (GET_LL4(i) << 16))
|
||||
|
||||
#define BZ_GET_SMALL(cccc) \
|
||||
#define BZ_GET_SMALL(cccc) \
|
||||
cccc = indexIntoF ( s->tPos, s->cftab ); \
|
||||
s->tPos = GET_LL(s->tPos);
|
||||
|
||||
|
141
compress.c
141
compress.c
@ -8,7 +8,7 @@
|
||||
This file is a part of bzip2 and/or libbzip2, a program and
|
||||
library for lossless, block-sorting data compression.
|
||||
|
||||
Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
|
||||
Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -41,9 +41,9 @@
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Julian Seward, Guildford, Surrey, UK.
|
||||
Julian Seward, Cambridge, UK.
|
||||
jseward@acm.org
|
||||
bzip2/libbzip2 version 0.9.0 of 28 June 1998
|
||||
bzip2/libbzip2 version 0.9.5 of 24 May 1999
|
||||
|
||||
This program is based on (at least) the work of:
|
||||
Mike Burrows
|
||||
@ -90,7 +90,7 @@ static
|
||||
void bsFinishWrite ( EState* s )
|
||||
{
|
||||
while (s->bsLive > 0) {
|
||||
((UChar*)(s->quadrant))[s->numZ] = (UChar)(s->bsBuff >> 24);
|
||||
s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
|
||||
s->numZ++;
|
||||
s->bsBuff <<= 8;
|
||||
s->bsLive -= 8;
|
||||
@ -102,7 +102,7 @@ void bsFinishWrite ( EState* s )
|
||||
#define bsNEEDW(nz) \
|
||||
{ \
|
||||
while (s->bsLive >= 8) { \
|
||||
((UChar*)(s->quadrant))[s->numZ] \
|
||||
s->zbits[s->numZ] \
|
||||
= (UChar)(s->bsBuff >> 24); \
|
||||
s->numZ++; \
|
||||
s->bsBuff <<= 8; \
|
||||
@ -162,13 +162,39 @@ void makeMaps_e ( EState* s )
|
||||
static
|
||||
void generateMTFValues ( EState* s )
|
||||
{
|
||||
UChar yy[256];
|
||||
Int32 i, j;
|
||||
UChar tmp;
|
||||
UChar tmp2;
|
||||
Int32 zPend;
|
||||
Int32 wr;
|
||||
Int32 EOB;
|
||||
UChar yy[256];
|
||||
Int32 i, j;
|
||||
UChar tmp;
|
||||
UChar tmp2;
|
||||
Int32 zPend;
|
||||
Int32 wr;
|
||||
Int32 EOB;
|
||||
|
||||
/*
|
||||
After sorting (eg, here),
|
||||
s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
|
||||
and
|
||||
((UInt16*)s->arr2) [ 0 .. s->nblock-1 ] [15:8]
|
||||
holds the original block data.
|
||||
|
||||
The first thing to do is generate the MTF values,
|
||||
and put them in
|
||||
((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
|
||||
Because there are strictly fewer or equal MTF values
|
||||
than block values, ptr values in this area are overwritten
|
||||
with MTF values only when they are no longer needed.
|
||||
|
||||
The final compressed bitstream is generated into the
|
||||
area starting at
|
||||
(UChar*) (&((UInt16)s->arr2)[s->nblock])
|
||||
|
||||
These storage aliases are set up in bzCompressInit(),
|
||||
except for the last one, which is arranged in
|
||||
compressBlock().
|
||||
*/
|
||||
UInt32* ptr = s->ptr;
|
||||
UInt16* block = s->block;
|
||||
UInt16* mtfv = s->mtfv;
|
||||
|
||||
makeMaps_e ( s );
|
||||
EOB = s->nInUse+1;
|
||||
@ -183,52 +209,61 @@ void generateMTFValues ( EState* s )
|
||||
UChar ll_i;
|
||||
|
||||
AssertD ( wr <= i, "generateMTFValues(1)" );
|
||||
j = s->zptr[i]-1; if (j < 0) j += s->nblock;
|
||||
ll_i = s->unseqToSeq[s->block[j]];
|
||||
j = ptr[i]-1; if (j < 0) j += s->nblock;
|
||||
ll_i = s->unseqToSeq[block[j] >> 8];
|
||||
AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
|
||||
|
||||
j = 0;
|
||||
tmp = yy[j];
|
||||
while ( ll_i != tmp ) {
|
||||
j++;
|
||||
tmp2 = tmp;
|
||||
tmp = yy[j];
|
||||
yy[j] = tmp2;
|
||||
};
|
||||
yy[0] = tmp;
|
||||
|
||||
if (j == 0) {
|
||||
tmp = yy[0];
|
||||
if (tmp == ll_i) {
|
||||
zPend++;
|
||||
} else {
|
||||
tmp2 = tmp;
|
||||
tmp = yy[1];
|
||||
yy[1] = tmp2;
|
||||
j = 1;
|
||||
while ( ll_i != tmp ) {
|
||||
j++;
|
||||
tmp2 = tmp;
|
||||
tmp = yy[j];
|
||||
yy[j] = tmp2;
|
||||
};
|
||||
yy[0] = tmp;
|
||||
|
||||
if (zPend > 0) {
|
||||
zPend--;
|
||||
while (True) {
|
||||
switch (zPend % 2) {
|
||||
case 0: s->szptr[wr] = BZ_RUNA; wr++; s->mtfFreq[BZ_RUNA]++; break;
|
||||
case 1: s->szptr[wr] = BZ_RUNB; wr++; s->mtfFreq[BZ_RUNB]++; break;
|
||||
};
|
||||
if (zPend & 1) {
|
||||
mtfv[wr] = BZ_RUNB; wr++;
|
||||
s->mtfFreq[BZ_RUNB]++;
|
||||
} else {
|
||||
mtfv[wr] = BZ_RUNA; wr++;
|
||||
s->mtfFreq[BZ_RUNA]++;
|
||||
}
|
||||
if (zPend < 2) break;
|
||||
zPend = (zPend - 2) / 2;
|
||||
};
|
||||
zPend = 0;
|
||||
}
|
||||
s->szptr[wr] = j+1; wr++; s->mtfFreq[j+1]++;
|
||||
mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
|
||||
}
|
||||
}
|
||||
|
||||
if (zPend > 0) {
|
||||
zPend--;
|
||||
while (True) {
|
||||
switch (zPend % 2) {
|
||||
case 0: s->szptr[wr] = BZ_RUNA; wr++; s->mtfFreq[BZ_RUNA]++; break;
|
||||
case 1: s->szptr[wr] = BZ_RUNB; wr++; s->mtfFreq[BZ_RUNB]++; break;
|
||||
};
|
||||
if (zPend & 1) {
|
||||
mtfv[wr] = BZ_RUNB; wr++;
|
||||
s->mtfFreq[BZ_RUNB]++;
|
||||
} else {
|
||||
mtfv[wr] = BZ_RUNA; wr++;
|
||||
s->mtfFreq[BZ_RUNA]++;
|
||||
}
|
||||
if (zPend < 2) break;
|
||||
zPend = (zPend - 2) / 2;
|
||||
};
|
||||
}
|
||||
|
||||
s->szptr[wr] = EOB; wr++; s->mtfFreq[EOB]++;
|
||||
mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
|
||||
|
||||
s->nMTF = wr;
|
||||
}
|
||||
@ -259,6 +294,8 @@ void sendMTFValues ( EState* s )
|
||||
UInt16 cost[BZ_N_GROUPS];
|
||||
Int32 fave[BZ_N_GROUPS];
|
||||
|
||||
UInt16* mtfv = s->mtfv;
|
||||
|
||||
if (s->verbosity >= 3)
|
||||
VPrintf3( " %d in block, %d after MTF & 1-2 coding, "
|
||||
"%d+2 syms in use\n",
|
||||
@ -348,7 +385,7 @@ void sendMTFValues ( EState* s )
|
||||
register UInt16 cost0, cost1, cost2, cost3, cost4, cost5;
|
||||
cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
|
||||
for (i = gs; i <= ge; i++) {
|
||||
UInt16 icv = s->szptr[i];
|
||||
UInt16 icv = mtfv[i];
|
||||
cost0 += s->len[0][icv];
|
||||
cost1 += s->len[1][icv];
|
||||
cost2 += s->len[2][icv];
|
||||
@ -360,7 +397,7 @@ void sendMTFValues ( EState* s )
|
||||
cost[3] = cost3; cost[4] = cost4; cost[5] = cost5;
|
||||
} else {
|
||||
for (i = gs; i <= ge; i++) {
|
||||
UInt16 icv = s->szptr[i];
|
||||
UInt16 icv = mtfv[i];
|
||||
for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
|
||||
}
|
||||
}
|
||||
@ -381,7 +418,7 @@ void sendMTFValues ( EState* s )
|
||||
Increment the symbol frequencies for the selected table.
|
||||
--*/
|
||||
for (i = gs; i <= ge; i++)
|
||||
s->rfreq[bt][ s->szptr[i] ]++;
|
||||
s->rfreq[bt][ mtfv[i] ]++;
|
||||
|
||||
gs = ge+1;
|
||||
}
|
||||
@ -502,8 +539,8 @@ void sendMTFValues ( EState* s )
|
||||
for (i = gs; i <= ge; i++) {
|
||||
AssertH ( s->selector[selCtr] < nGroups, 3006 );
|
||||
bsW ( s,
|
||||
s->len [s->selector[selCtr]] [s->szptr[i]],
|
||||
s->code [s->selector[selCtr]] [s->szptr[i]] );
|
||||
s->len [s->selector[selCtr]] [mtfv[i]],
|
||||
s->code [s->selector[selCtr]] [mtfv[i]] );
|
||||
}
|
||||
|
||||
gs = ge+1;
|
||||
@ -534,13 +571,15 @@ void compressBlock ( EState* s, Bool is_last_block )
|
||||
blockSort ( s );
|
||||
}
|
||||
|
||||
s->zbits = (UChar*) (&((UInt16*)s->arr2)[s->nblock]);
|
||||
|
||||
/*-- If this is the first block, create the stream header. --*/
|
||||
if (s->blockNo == 1) {
|
||||
bsInitWrite ( s );
|
||||
bsPutUChar ( s, 'B' );
|
||||
bsPutUChar ( s, 'Z' );
|
||||
bsPutUChar ( s, 'h' );
|
||||
bsPutUChar ( s, '0' + s->blockSize100k );
|
||||
bsPutUChar ( s, (UChar)('0' + s->blockSize100k) );
|
||||
}
|
||||
|
||||
if (s->nblock > 0) {
|
||||
@ -552,11 +591,16 @@ void compressBlock ( EState* s, Bool is_last_block )
|
||||
/*-- Now the block's CRC, so it is in a known place. --*/
|
||||
bsPutUInt32 ( s, s->blockCRC );
|
||||
|
||||
/*-- Now a single bit indicating randomisation. --*/
|
||||
if (s->blockRandomised) {
|
||||
bsW(s,1,1); s->nBlocksRandomised++;
|
||||
} else
|
||||
bsW(s,1,0);
|
||||
/*--
|
||||
Now a single bit indicating (non-)randomisation.
|
||||
As of version 0.9.5, we use a better sorting algorithm
|
||||
which makes randomisation unnecessary. So always set
|
||||
the randomised bit to 'no'. Of course, the decoder
|
||||
still needs to be able to handle randomised blocks
|
||||
so as to maintain backwards compatibility with
|
||||
older versions of bzip2.
|
||||
--*/
|
||||
bsW(s,1,0);
|
||||
|
||||
bsW ( s, 24, s->origPtr );
|
||||
generateMTFValues ( s );
|
||||
@ -567,11 +611,6 @@ void compressBlock ( EState* s, Bool is_last_block )
|
||||
/*-- If this is the last block, add the stream trailer. --*/
|
||||
if (is_last_block) {
|
||||
|
||||
if (s->verbosity >= 2 && s->nBlocksRandomised > 0)
|
||||
VPrintf2 ( " %d block%s needed randomisation\n",
|
||||
s->nBlocksRandomised,
|
||||
s->nBlocksRandomised == 1 ? "" : "s" );
|
||||
|
||||
bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
|
||||
bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
|
||||
bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
|
||||
|
@ -8,7 +8,7 @@
|
||||
This file is a part of bzip2 and/or libbzip2, a program and
|
||||
library for lossless, block-sorting data compression.
|
||||
|
||||
Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
|
||||
Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -41,9 +41,9 @@
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Julian Seward, Guildford, Surrey, UK.
|
||||
Julian Seward, Cambridge, UK.
|
||||
jseward@acm.org
|
||||
bzip2/libbzip2 version 0.9.0c of 18 October 1998
|
||||
bzip2/libbzip2 version 0.9.5 of 24 May 1999
|
||||
|
||||
This program is based on (at least) the work of:
|
||||
Mike Burrows
|
||||
|
@ -8,7 +8,7 @@
|
||||
This file is a part of bzip2 and/or libbzip2, a program and
|
||||
library for lossless, block-sorting data compression.
|
||||
|
||||
Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
|
||||
Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -41,9 +41,9 @@
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Julian Seward, Guildford, Surrey, UK.
|
||||
Julian Seward, Cambridge, UK.
|
||||
jseward@acm.org
|
||||
bzip2/libbzip2 version 0.9.0c of 18 October 1998
|
||||
bzip2/libbzip2 version 0.9.5 of 24 May 1999
|
||||
|
||||
This program is based on (at least) the work of:
|
||||
Mike Burrows
|
||||
@ -141,7 +141,7 @@ Int32 decompress ( DState* s )
|
||||
bz_stream* strm = s->strm;
|
||||
|
||||
/* stuff that needs to be saved/restored */
|
||||
Int32 i ;
|
||||
Int32 i;
|
||||
Int32 j;
|
||||
Int32 t;
|
||||
Int32 alphaSize;
|
||||
|
242
dlltest.c
242
dlltest.c
@ -1,17 +1,17 @@
|
||||
/*
|
||||
minibz2
|
||||
libbz2.dll test program.
|
||||
by Yoshioka Tsuneo(QWF00133@nifty.ne.jp/tsuneo-y@is.aist-nara.ac.jp)
|
||||
This file is Public Domain.
|
||||
welcome any email to me.
|
||||
minibz2
|
||||
libbz2.dll test program.
|
||||
by Yoshioka Tsuneo(QWF00133@nifty.ne.jp/tsuneo-y@is.aist-nara.ac.jp)
|
||||
This file is Public Domain.
|
||||
welcome any email to me.
|
||||
|
||||
usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]
|
||||
usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]
|
||||
*/
|
||||
|
||||
#define BZ_IMPORT
|
||||
#include "bzlib.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "bzlib.h"
|
||||
#ifdef _WIN32
|
||||
#include <io.h>
|
||||
#endif
|
||||
@ -24,140 +24,142 @@ static int BZ2DLLLoaded = 0;
|
||||
static HINSTANCE BZ2DLLhLib;
|
||||
int BZ2DLLLoadLibrary(void)
|
||||
{
|
||||
HINSTANCE hLib;
|
||||
HINSTANCE hLib;
|
||||
|
||||
if(BZ2DLLLoaded==1){return 0;}
|
||||
hLib=LoadLibrary("libbz2.dll");
|
||||
if(hLib == NULL){
|
||||
puts("Can't load libbz2.dll");
|
||||
return -1;
|
||||
}
|
||||
BZ2DLLLoaded=1;
|
||||
BZ2DLLhLib=hLib;
|
||||
bzlibVersion=GetProcAddress(hLib,"bzlibVersion");
|
||||
bzopen=GetProcAddress(hLib,"bzopen");
|
||||
bzdopen=GetProcAddress(hLib,"bzdopen");
|
||||
bzread=GetProcAddress(hLib,"bzread");
|
||||
bzwrite=GetProcAddress(hLib,"bzwrite");
|
||||
bzflush=GetProcAddress(hLib,"bzflush");
|
||||
bzclose=GetProcAddress(hLib,"bzclose");
|
||||
bzerror=GetProcAddress(hLib,"bzerror");
|
||||
return 0;
|
||||
if(BZ2DLLLoaded==1){return 0;}
|
||||
hLib=LoadLibrary("libbz2.dll");
|
||||
if(hLib == NULL){
|
||||
puts("Can't load libbz2.dll");
|
||||
return -1;
|
||||
}
|
||||
BZ2DLLLoaded=1;
|
||||
BZ2DLLhLib=hLib;
|
||||
bzlibVersion=GetProcAddress(hLib,"bzlibVersion");
|
||||
bzopen=GetProcAddress(hLib,"bzopen");
|
||||
bzdopen=GetProcAddress(hLib,"bzdopen");
|
||||
bzread=GetProcAddress(hLib,"bzread");
|
||||
bzwrite=GetProcAddress(hLib,"bzwrite");
|
||||
bzflush=GetProcAddress(hLib,"bzflush");
|
||||
bzclose=GetProcAddress(hLib,"bzclose");
|
||||
bzerror=GetProcAddress(hLib,"bzerror");
|
||||
return 0;
|
||||
|
||||
}
|
||||
int BZ2DLLFreeLibrary(void)
|
||||
{
|
||||
if(BZ2DLLLoaded==0){return 0;}
|
||||
FreeLibrary(BZ2DLLhLib);
|
||||
BZ2DLLLoaded=0;
|
||||
if(BZ2DLLLoaded==0){return 0;}
|
||||
FreeLibrary(BZ2DLLhLib);
|
||||
BZ2DLLLoaded=0;
|
||||
}
|
||||
#endif /* WIN32 */
|
||||
|
||||
void usage(void)
|
||||
{
|
||||
puts("usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]");
|
||||
puts("usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]");
|
||||
}
|
||||
|
||||
void main(int argc,char *argv[])
|
||||
int main(int argc,char *argv[])
|
||||
{
|
||||
int decompress = 0;
|
||||
int level = 9;
|
||||
char *fn_r,*fn_w;
|
||||
int decompress = 0;
|
||||
int level = 9;
|
||||
char *fn_r = NULL;
|
||||
char *fn_w = NULL;
|
||||
|
||||
#ifdef _WIN32
|
||||
if(BZ2DLLLoadLibrary()<0){
|
||||
puts("can't load dll");
|
||||
exit(1);
|
||||
}
|
||||
if(BZ2DLLLoadLibrary()<0){
|
||||
puts("can't load dll");
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
while(++argv,--argc){
|
||||
if(**argv =='-' || **argv=='/'){
|
||||
char *p;
|
||||
while(++argv,--argc){
|
||||
if(**argv =='-' || **argv=='/'){
|
||||
char *p;
|
||||
|
||||
for(p=*argv+1;*p;p++){
|
||||
if(*p=='d'){
|
||||
decompress = 1;
|
||||
}else if('1'<=*p && *p<='9'){
|
||||
level = *p - '0';
|
||||
}else{
|
||||
usage();
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(argc>=1){
|
||||
fn_r = *argv;
|
||||
argc--;argv++;
|
||||
}else{
|
||||
fn_r = NULL;
|
||||
}
|
||||
if(argc>=1){
|
||||
fn_w = *argv;
|
||||
argc--;argv++;
|
||||
}else{
|
||||
fn_w = NULL;
|
||||
}
|
||||
{
|
||||
int len;
|
||||
char buff[0x1000];
|
||||
char mode[10];
|
||||
for(p=*argv+1;*p;p++){
|
||||
if(*p=='d'){
|
||||
decompress = 1;
|
||||
}else if('1'<=*p && *p<='9'){
|
||||
level = *p - '0';
|
||||
}else{
|
||||
usage();
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(argc>=1){
|
||||
fn_r = *argv;
|
||||
argc--;argv++;
|
||||
}else{
|
||||
fn_r = NULL;
|
||||
}
|
||||
if(argc>=1){
|
||||
fn_w = *argv;
|
||||
argc--;argv++;
|
||||
}else{
|
||||
fn_w = NULL;
|
||||
}
|
||||
{
|
||||
int len;
|
||||
char buff[0x1000];
|
||||
char mode[10];
|
||||
|
||||
if(decompress){
|
||||
BZFILE *BZ2fp_r;
|
||||
FILE *fp_w;
|
||||
if(decompress){
|
||||
BZFILE *BZ2fp_r = NULL;
|
||||
FILE *fp_w = NULL;
|
||||
|
||||
if(fn_w){
|
||||
if((fp_w = fopen(fn_w,"wb"))==NULL){
|
||||
printf("can't open [%s]\n",fn_w);
|
||||
perror("reason:");
|
||||
exit(1);
|
||||
}
|
||||
}else{
|
||||
fp_w = stdout;
|
||||
}
|
||||
if((BZ2fp_r == NULL && (BZ2fp_r = bzdopen(fileno(stdin),"rb"))==NULL)
|
||||
|| (BZ2fp_r != NULL && (BZ2fp_r = bzopen(fn_r,"rb"))==NULL)){
|
||||
printf("can't bz2openstream\n");
|
||||
exit(1);
|
||||
}
|
||||
while((len=bzread(BZ2fp_r,buff,0x1000))>0){
|
||||
fwrite(buff,1,len,fp_w);
|
||||
}
|
||||
bzclose(BZ2fp_r);
|
||||
if(fp_w != stdout) fclose(fp_w);
|
||||
}else{
|
||||
BZFILE *BZ2fp_w;
|
||||
FILE *fp_r;
|
||||
if(fn_w){
|
||||
if((fp_w = fopen(fn_w,"wb"))==NULL){
|
||||
printf("can't open [%s]\n",fn_w);
|
||||
perror("reason:");
|
||||
exit(1);
|
||||
}
|
||||
}else{
|
||||
fp_w = stdout;
|
||||
}
|
||||
if((BZ2fp_r == NULL && (BZ2fp_r = bzdopen(fileno(stdin),"rb"))==NULL)
|
||||
|| (BZ2fp_r != NULL && (BZ2fp_r = bzopen(fn_r,"rb"))==NULL)){
|
||||
printf("can't bz2openstream\n");
|
||||
exit(1);
|
||||
}
|
||||
while((len=bzread(BZ2fp_r,buff,0x1000))>0){
|
||||
fwrite(buff,1,len,fp_w);
|
||||
}
|
||||
bzclose(BZ2fp_r);
|
||||
if(fp_w != stdout) fclose(fp_w);
|
||||
}else{
|
||||
BZFILE *BZ2fp_w = NULL;
|
||||
FILE *fp_r = NULL;
|
||||
|
||||
if(fn_r){
|
||||
if((fp_r = fopen(fn_r,"rb"))==NULL){
|
||||
printf("can't open [%s]\n",fn_r);
|
||||
perror("reason:");
|
||||
exit(1);
|
||||
}
|
||||
}else{
|
||||
fp_r = stdin;
|
||||
}
|
||||
mode[0]='w';
|
||||
mode[1] = '0' + level;
|
||||
mode[2] = '\0';
|
||||
if(fn_r){
|
||||
if((fp_r = fopen(fn_r,"rb"))==NULL){
|
||||
printf("can't open [%s]\n",fn_r);
|
||||
perror("reason:");
|
||||
exit(1);
|
||||
}
|
||||
}else{
|
||||
fp_r = stdin;
|
||||
}
|
||||
mode[0]='w';
|
||||
mode[1] = '0' + level;
|
||||
mode[2] = '\0';
|
||||
|
||||
if((fn_w == NULL && (BZ2fp_w = bzdopen(fileno(stdout),mode))==NULL)
|
||||
|| (fn_w !=NULL && (BZ2fp_w = bzopen(fn_w,mode))==NULL)){
|
||||
printf("can't bz2openstream\n");
|
||||
exit(1);
|
||||
}
|
||||
while((len=fread(buff,1,0x1000,fp_r))>0){
|
||||
bzwrite(BZ2fp_w,buff,len);
|
||||
}
|
||||
bzclose(BZ2fp_w);
|
||||
if(fp_r!=stdin)fclose(fp_r);
|
||||
}
|
||||
}
|
||||
if((fn_w == NULL && (BZ2fp_w = bzdopen(fileno(stdout),mode))==NULL)
|
||||
|| (fn_w !=NULL && (BZ2fp_w = bzopen(fn_w,mode))==NULL)){
|
||||
printf("can't bz2openstream\n");
|
||||
exit(1);
|
||||
}
|
||||
while((len=fread(buff,1,0x1000,fp_r))>0){
|
||||
bzwrite(BZ2fp_w,buff,len);
|
||||
}
|
||||
bzclose(BZ2fp_w);
|
||||
if(fp_r!=stdin)fclose(fp_r);
|
||||
}
|
||||
}
|
||||
#ifdef _WIN32
|
||||
BZ2DLLFreeLibrary();
|
||||
BZ2DLLFreeLibrary();
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
37
howbig.c
37
howbig.c
@ -1,37 +0,0 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include "bzlib.h"
|
||||
|
||||
unsigned char ibuff[1000000];
|
||||
unsigned char obuff[1000000];
|
||||
|
||||
void doone ( int n )
|
||||
{
|
||||
int i, j, k, q, nobuff;
|
||||
q = 0;
|
||||
|
||||
for (k = 0; k < 1; k++) {
|
||||
for (i = 0; i < n; i++)
|
||||
ibuff[i] = ((unsigned long)(random())) & 0xff;
|
||||
nobuff = 1000000;
|
||||
j = bzBuffToBuffCompress ( obuff, &nobuff, ibuff, n, 9,0,0 );
|
||||
assert (j == BZ_OK);
|
||||
if (nobuff > q) q = nobuff;
|
||||
}
|
||||
printf ( "%d %d(%d)\n", n, q, (int)((float)n * 1.01 - (float)q) );
|
||||
}
|
||||
|
||||
int main ( int argc, char** argv )
|
||||
{
|
||||
int i;
|
||||
i = 0;
|
||||
while (1) {
|
||||
if (i >= 900000) break;
|
||||
doone(i);
|
||||
if ( (int)(1.10 * i) > i )
|
||||
i = (int)(1.10 * i); else i++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -8,7 +8,7 @@
|
||||
This file is a part of bzip2 and/or libbzip2, a program and
|
||||
library for lossless, block-sorting data compression.
|
||||
|
||||
Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
|
||||
Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -41,9 +41,9 @@
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Julian Seward, Guildford, Surrey, UK.
|
||||
Julian Seward, Cambridge, UK.
|
||||
jseward@acm.org
|
||||
bzip2/libbzip2 version 0.9.0c of 18 October 1998
|
||||
bzip2/libbzip2 version 0.9.5 of 24 May 1999
|
||||
|
||||
This program is based on (at least) the work of:
|
||||
Mike Burrows
|
||||
|
65
makefile.msc
Normal file
65
makefile.msc
Normal file
@ -0,0 +1,65 @@
|
||||
# Makefile for Microsoft Visual C++ 6.0
|
||||
# usage: nmake -f makefile.msc
|
||||
# K.M. Syring (syring@gsf.de)
|
||||
# Fixed up by JRS for bzip2-0.9.5d release.
|
||||
|
||||
CC=cl
|
||||
CFLAGS= -DWIN32 -MD -Ox
|
||||
|
||||
OBJS= blocksort.obj \
|
||||
huffman.obj \
|
||||
crctable.obj \
|
||||
randtable.obj \
|
||||
compress.obj \
|
||||
decompress.obj \
|
||||
bzlib.obj
|
||||
|
||||
all: lib bzip2 test
|
||||
|
||||
bzip2: lib
|
||||
$(CC) $(CFLAGS) -o bzip2 bzip2.c libbz2.lib setargv.obj
|
||||
$(CC) $(CFLAGS) -o bzip2recover bzip2recover.c
|
||||
|
||||
lib: $(OBJS)
|
||||
del libbz2.lib
|
||||
lib /out:libbz2.lib $(OBJS)
|
||||
|
||||
test: bzip2
|
||||
type words1
|
||||
.\\bzip2 -1 < sample1.ref > sample1.rb2
|
||||
.\\bzip2 -2 < sample2.ref > sample2.rb2
|
||||
.\\bzip2 -3 < sample3.ref > sample3.rb2
|
||||
.\\bzip2 -d < sample1.bz2 > sample1.tst
|
||||
.\\bzip2 -d < sample2.bz2 > sample2.tst
|
||||
.\\bzip2 -ds < sample3.bz2 > sample3.tst
|
||||
fc sample1.bz2 sample1.rb2
|
||||
fc sample2.bz2 sample2.rb2
|
||||
fc sample3.bz2 sample3.rb2
|
||||
fc sample1.tst sample1.ref
|
||||
fc sample2.tst sample2.ref
|
||||
fc sample3.tst sample3.ref
|
||||
@echo All six of the fc's should find no differences.
|
||||
@echo If fc finds an error on sample3.tst, this could be
|
||||
@echo because WinZips 'TAR file smart CR/LF conversion'
|
||||
@echo is too clever for its own good. Disable this option.
|
||||
@echo The correct size for sample3.ref is 120,244. If it
|
||||
@echo is around 150k, WinZip has stuffed it up.
|
||||
@echo Also remember to set BZ_UNIX to 0 and BZ_LCCWIN32
|
||||
@echo to 1 in bzip2.c.
|
||||
|
||||
|
||||
clean:
|
||||
del *.obj
|
||||
del libbz2.lib
|
||||
del bzip2.exe
|
||||
del bzip2recover.exe
|
||||
del sample1.rb2
|
||||
del sample2.rb2
|
||||
del sample3.rb2
|
||||
del sample1.tst
|
||||
del sample2.tst
|
||||
del sample3.tst
|
||||
|
||||
.c.obj:
|
||||
$(CC) $(CFLAGS) -c $*.c -o $*.obj
|
||||
|
867
manual.texi
867
manual.texi
File diff suppressed because it is too large
Load Diff
@ -8,7 +8,7 @@
|
||||
This file is a part of bzip2 and/or libbzip2, a program and
|
||||
library for lossless, block-sorting data compression.
|
||||
|
||||
Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
|
||||
Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -41,9 +41,9 @@
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Julian Seward, Guildford, Surrey, UK.
|
||||
Julian Seward, Cambridge, UK.
|
||||
jseward@acm.org
|
||||
bzip2/libbzip2 version 0.9.0c of 18 October 1998
|
||||
bzip2/libbzip2 version 0.9.5 of 24 May 1999
|
||||
|
||||
This program is based on (at least) the work of:
|
||||
Mike Burrows
|
||||
|
BIN
sample3.bz2
Normal file
BIN
sample3.bz2
Normal file
Binary file not shown.
30007
sample3.ref
Normal file
30007
sample3.ref
Normal file
File diff suppressed because it is too large
Load Diff
2
words1
2
words1
@ -1,4 +1,4 @@
|
||||
|
||||
Doing 4 tests (2 compress, 2 uncompress) ...
|
||||
Doing 6 tests (3 compress, 3 uncompress) ...
|
||||
If there's a problem, things might stop at this point.
|
||||
|
||||
|
21
words3
21
words3
@ -1,12 +1,17 @@
|
||||
|
||||
If you got this far and the "cmp"s didn't find anything amiss, looks
|
||||
like you're in business. You should install bzip2, bunzip2 and bzcat:
|
||||
If you got this far and the "cmp"s didn't complain, it looks
|
||||
like you're in business.
|
||||
|
||||
Copy bzip2 and bzip2recover to a public place, maybe /usr/bin.
|
||||
In that public place, make bunzip2 and bzcat be
|
||||
symbolic links to the bzip2 you just copied there.
|
||||
Put the manual page, bzip2.1, somewhere appropriate;
|
||||
perhaps in /usr/man/man1.
|
||||
To install in /usr/bin, /usr/lib, /usr/man and /usr/include, type
|
||||
make install
|
||||
To install somewhere else, eg, /xxx/yyy/{bin,lib,man,include}, type
|
||||
make install PREFIX=/xxx/yyy
|
||||
If you are (justifiably) paranoid and want to see what 'make install'
|
||||
is going to do, you can first do
|
||||
make -n install or
|
||||
make -n install PREFIX=/xxx/yyy respectively.
|
||||
The -n instructs make to show the commands it would execute, but
|
||||
not actually execute them.
|
||||
|
||||
Instructions for use are in the preformatted manual page, in the file
|
||||
bzip2.txt. For more detailed documentation, read the full manual.
|
||||
@ -16,5 +21,3 @@ It is available in Postscript form (manual.ps) and HTML form
|
||||
You can also do "bzip2 --help" to see some helpful information.
|
||||
"bzip2 -L" displays the software license.
|
||||
|
||||
Happy compressing. -- JRS, 30 August 1998.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user