bzip2-1.0.1

This commit is contained in:
Julian Seward 2000-06-24 22:13:13 +02:00
parent f93cd82a9a
commit 795b859eee
27 changed files with 2164 additions and 919 deletions

67
CHANGES
View File

@ -98,3 +98,70 @@ functioning of the bzip2 program or library. Added a couple of casts
so the library compiles without warnings at level 3 in MS Visual so the library compiles without warnings at level 3 in MS Visual
Studio 6.0. Included a Y2K statement in the file Y2K_INFO. All other Studio 6.0. Included a Y2K statement in the file Y2K_INFO. All other
changes are minor documentation changes. changes are minor documentation changes.
1.0
~~~
Several minor bugfixes and enhancements:
* Large file support. The library uses 64-bit counters to
count the volume of data passing through it. bzip2.c
is now compiled with -D_FILE_OFFSET_BITS=64 to get large
file support from the C library. -v correctly prints out
file sizes greater than 4 gigabytes. All these changes have
been made without assuming a 64-bit platform or a C compiler
which supports 64-bit ints, so, except for the C library
aspect, they are fully portable.
* Decompression robustness. The library/program should be
robust to any corruption of compressed data, detecting and
handling _all_ corruption, instead of merely relying on
the CRCs. What this means is that the program should
never crash, given corrupted data, and the library should
always return BZ_DATA_ERROR.
* Fixed an obscure race-condition bug only ever observed on
Solaris, in which, if you were very unlucky and issued
control-C at exactly the wrong time, both input and output
files would be deleted.
* Don't run out of file handles on test/decompression when
large numbers of files have invalid magic numbers.
* Avoid library namespace pollution. Prefix all exported
symbols with BZ2_.
* Minor sorting enhancements from my DCC2000 paper.
* Advance the version number to 1.0, so as to counteract the
(false-in-this-case) impression some people have that programs
with version numbers less than 1.0 are in someway, experimental,
pre-release versions.
* Create an initial Makefile-libbz2_so to build a shared library.
Yes, I know I should really use libtool et al ...
* Make the program exit with 2 instead of 0 when decompression
fails due to a bad magic number (ie, an invalid bzip2 header).
Also exit with 1 (as the manual claims :-) whenever a diagnostic
message would have been printed AND the corresponding operation
is aborted, for example
bzip2: Output file xx already exists.
When a diagnostic message is printed but the operation is not
aborted, for example
bzip2: Can't guess original name for wurble -- using wurble.out
then the exit value 0 is returned, unless some other problem is
also detected.
I think it corresponds more closely to what the manual claims now.
1.0.1
~~~~~
* Modified dlltest.c so it uses the new BZ2_ naming scheme.
* Modified makefile-msc to fix minor build probs on Win2k.
* Updated README.COMPILATION.PROBLEMS.
There are no functionality changes or bug fixes relative to version
1.0.0. This is just a documentation update + a fix for minor Win32
build problems. For almost everyone, upgrading from 1.0.0 to 1.0.1 is
utterly pointless. Don't bother.

View File

@ -1,6 +1,6 @@
This program, "bzip2" and associated library "libbzip2", are This program, "bzip2" and associated library "libbzip2", are
copyright (C) 1996-1999 Julian R Seward. All rights reserved. copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions modification, are permitted provided that the following conditions
@ -35,5 +35,5 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Julian Seward, Cambridge, UK. Julian Seward, Cambridge, UK.
jseward@acm.org jseward@acm.org
bzip2/libbzip2 version 0.9.5 of 24 May 1999 bzip2/libbzip2 version 1.0 of 21 March 2000

View File

@ -1,7 +1,8 @@
SHELL=/bin/sh SHELL=/bin/sh
CC=gcc CC=gcc
CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce BIGFILES=-D_FILE_OFFSET_BITS=64
CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES)
OBJS= blocksort.o \ OBJS= blocksort.o \
huffman.o \ huffman.o \
@ -73,6 +74,7 @@ clean:
sample1.tst sample2.tst sample3.tst sample1.tst sample2.tst sample3.tst
blocksort.o: blocksort.c blocksort.o: blocksort.c
@cat words0
$(CC) $(CFLAGS) -c blocksort.c $(CC) $(CFLAGS) -c blocksort.c
huffman.o: huffman.c huffman.o: huffman.c
$(CC) $(CFLAGS) -c huffman.c $(CC) $(CFLAGS) -c huffman.c
@ -91,13 +93,49 @@ bzip2.o: bzip2.c
bzip2recover.o: bzip2recover.c bzip2recover.o: bzip2recover.c
$(CC) $(CFLAGS) -c bzip2recover.c $(CC) $(CFLAGS) -c bzip2recover.c
DISTNAME=bzip2-1.0.1
tarfile: tarfile:
tar cvf interim.tar blocksort.c huffman.c crctable.c \ rm -f $(DISTNAME)
randtable.c compress.c decompress.c bzlib.c bzip2.c \ ln -sf . $(DISTNAME)
bzip2recover.c bzlib.h bzlib_private.h Makefile manual.texi \ tar cvf $(DISTNAME).tar \
manual.ps LICENSE bzip2.1 bzip2.1.preformatted bzip2.txt \ $(DISTNAME)/blocksort.c \
words1 words2 words3 sample1.ref sample2.ref sample3.ref \ $(DISTNAME)/huffman.c \
sample1.bz2 sample2.bz2 sample3.bz2 dlltest.c \ $(DISTNAME)/crctable.c \
*.html README CHANGES libbz2.def libbz2.dsp \ $(DISTNAME)/randtable.c \
dlltest.dsp makefile.msc Y2K_INFO $(DISTNAME)/compress.c \
$(DISTNAME)/decompress.c \
$(DISTNAME)/bzlib.c \
$(DISTNAME)/bzip2.c \
$(DISTNAME)/bzip2recover.c \
$(DISTNAME)/bzlib.h \
$(DISTNAME)/bzlib_private.h \
$(DISTNAME)/Makefile \
$(DISTNAME)/manual.texi \
$(DISTNAME)/manual.ps \
$(DISTNAME)/LICENSE \
$(DISTNAME)/bzip2.1 \
$(DISTNAME)/bzip2.1.preformatted \
$(DISTNAME)/bzip2.txt \
$(DISTNAME)/words0 \
$(DISTNAME)/words1 \
$(DISTNAME)/words2 \
$(DISTNAME)/words3 \
$(DISTNAME)/sample1.ref \
$(DISTNAME)/sample2.ref \
$(DISTNAME)/sample3.ref \
$(DISTNAME)/sample1.bz2 \
$(DISTNAME)/sample2.bz2 \
$(DISTNAME)/sample3.bz2 \
$(DISTNAME)/dlltest.c \
$(DISTNAME)/*.html \
$(DISTNAME)/README \
$(DISTNAME)/README.COMPILATION.PROBLEMS \
$(DISTNAME)/CHANGES \
$(DISTNAME)/libbz2.def \
$(DISTNAME)/libbz2.dsp \
$(DISTNAME)/dlltest.dsp \
$(DISTNAME)/makefile.msc \
$(DISTNAME)/Y2K_INFO \
$(DISTNAME)/unzcrash.c \
$(DISTNAME)/spewG.c \
$(DISTNAME)/Makefile-libbz2_so

43
Makefile-libbz2_so Normal file
View File

@ -0,0 +1,43 @@
# This Makefile builds a shared version of the library,
# libbz2.so.1.0.1, with soname libbz2.so.1.0,
# at least on x86-Linux (RedHat 5.2),
# with gcc-2.7.2.3. Please see the README file for some
# important info about building the library like this.
SHELL=/bin/sh
CC=gcc
BIGFILES=-D_FILE_OFFSET_BITS=64
CFLAGS=-fpic -fPIC -Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES)
OBJS= blocksort.o \
huffman.o \
crctable.o \
randtable.o \
compress.o \
decompress.o \
bzlib.o
all: $(OBJS)
$(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.1 $(OBJS)
$(CC) $(CFLAGS) -o bzip2-shared bzip2.c libbz2.so.1.0.1
rm -f libbz2.so.1.0
ln -s libbz2.so.1.0.1 libbz2.so.1.0
clean:
rm -f $(OBJS) bzip2.o libbz2.so.1.0.1 libbz2.so.1.0 bzip2-shared
blocksort.o: blocksort.c
$(CC) $(CFLAGS) -c blocksort.c
huffman.o: huffman.c
$(CC) $(CFLAGS) -c huffman.c
crctable.o: crctable.c
$(CC) $(CFLAGS) -c crctable.c
randtable.o: randtable.c
$(CC) $(CFLAGS) -c randtable.c
compress.o: compress.c
$(CC) $(CFLAGS) -c compress.c
decompress.o: decompress.c
$(CC) $(CFLAGS) -c decompress.c
bzlib.o: bzlib.c
$(CC) $(CFLAGS) -c bzlib.c

43
README
View File

@ -1,9 +1,9 @@
This is the README for bzip2, a block-sorting file compressor, version This is the README for bzip2, a block-sorting file compressor, version
0.9.5d. This version is fully compatible with the previous public 1.0. This version is fully compatible with the previous public
releases, bzip2-0.1pl2 and bzip2-0.9.0. releases, bzip2-0.1pl2, bzip2-0.9.0 and bzip2-0.9.5.
bzip2-0.9.5 is distributed under a BSD-style license. For details, bzip2-1.0 is distributed under a BSD-style license. For details,
see the file LICENSE. see the file LICENSE.
Complete documentation is available in Postscript form (manual.ps) or Complete documentation is available in Postscript form (manual.ps) or
@ -30,15 +30,37 @@ The -n instructs make to show the commands it would execute, but
not actually execute them. not actually execute them.
HOW TO BUILD -- UNIX, shared library libbz2.so.
Do 'make -f Makefile-libbz2_so'. This Makefile seems to work for
Linux-ELF (RedHat 5.2 on an x86 box), with gcc. I make no claims
that it works for any other platform, though I suspect it probably
will work for most platforms employing both ELF and gcc.
bzip2-shared, a client of the shared library, is also build, but
not self-tested. So I suggest you also build using the normal
Makefile, since that conducts a self-test.
Important note for people upgrading .so's from 0.9.0/0.9.5 to
version 1.0. All the functions in the library have been renamed,
from (eg) bzCompress to BZ2_bzCompress, to avoid namespace pollution.
Unfortunately this means that the libbz2.so created by
Makefile-libbz2_so will not work with any program which used an
older version of the library. Sorry. I do encourage library
clients to make the effort to upgrade to use version 1.0, since
it is both faster and more robust than previous versions.
HOW TO BUILD -- Windows 95, NT, DOS, Mac, etc. HOW TO BUILD -- Windows 95, NT, DOS, Mac, etc.
It's difficult for me to support compilation on all these platforms. It's difficult for me to support compilation on all these platforms.
My approach is to collect binaries for these platforms, and put them My approach is to collect binaries for these platforms, and put them
on my web page (http://www.muraroa.demon.co.uk). Look there. However on the master web page (http://sourceware.cygnus.com/bzip2). Look
(FWIW), bzip2-0.9.5 is very standard ANSI C and should compile there. However (FWIW), bzip2-1.0 is very standard ANSI C and should
unmodified with MS Visual C. For Win32, there is one important compile unmodified with MS Visual C. For Win32, there is one
caveat: in bzip2.c, you must set BZ_UNIX to 0 and BZ_LCCWIN32 to 1 important caveat: in bzip2.c, you must set BZ_UNIX to 0 and
before building. BZ_LCCWIN32 to 1 before building. If you have difficulties building,
you might want to read README.COMPILATION.PROBLEMS.
VALIDATION VALIDATION
@ -116,6 +138,10 @@ WHAT'S NEW IN 0.9.5 ?
* Many small improvements in file and flag handling. * Many small improvements in file and flag handling.
* A Y2K statement. * A Y2K statement.
WHAT'S NEW IN 1.0
See the CHANGES file.
I hope you find bzip2 useful. Feel free to contact me at I hope you find bzip2 useful. Feel free to contact me at
jseward@acm.org jseward@acm.org
if you have any suggestions or queries. Many people mailed me with if you have any suggestions or queries. Many people mailed me with
@ -137,3 +163,4 @@ Cambridge, UK
23 August 1998 (bzip2, version 0.9.0) 23 August 1998 (bzip2, version 0.9.0)
8 June 1999 (bzip2, version 0.9.5) 8 June 1999 (bzip2, version 0.9.5)
4 Sept 1999 (bzip2, version 0.9.5d) 4 Sept 1999 (bzip2, version 0.9.5d)
5 May 2000 (bzip2, version 1.0pre8)

130
README.COMPILATION.PROBLEMS Normal file
View File

@ -0,0 +1,130 @@
bzip2-1.0 should compile without problems on the vast majority of
platforms. Using the supplied Makefile, I've built and tested it
myself for x86-linux, sparc-solaris, alpha-linux, x86-cygwin32 and
alpha-tru64unix. With makefile.msc, Visual C++ 6.0 and nmake, you can
build a native Win32 version too. Large file support seems to work
correctly on at least alpha-tru64unix and x86-cygwin32 (on Windows
2000).
When I say "large file" I mean a file of size 2,147,483,648 (2^31)
bytes or above. Many older OSs can't handle files above this size,
but many newer ones can. Large files are pretty huge -- most files
you'll encounter are not Large Files.
Earlier versions of bzip2 (0.1, 0.9.0, 0.9.5) compiled on a wide
variety of platforms without difficulty, and I hope this version will
continue in that tradition. However, in order to support large files,
I've had to include the define -D_FILE_OFFSET_BITS=64 in the Makefile.
This can cause problems.
The technique of adding -D_FILE_OFFSET_BITS=64 to get large file
support is, as far as I know, the Recommended Way to get correct large
file support. For more details, see the Large File Support
Specification, published by the Large File Summit, at
http://www.sas.com/standard/large.file/
As a general comment, if you get compilation errors which you think
are related to large file support, try removing the above define from
the Makefile, ie, delete the line
BIGFILES=-D_FILE_OFFSET_BITS=64
from the Makefile, and do 'make clean ; make'. This will give you a
version of bzip2 without large file support, which, for most
applications, is probably not a problem.
Alternatively, try some of the platform-specific hints listed below.
You can use the spewG.c program to generate huge files to test bzip2's
large file support, if you are feeling paranoid. Be aware though that
any compilation problems which affect bzip2 will also affect spewG.c,
alas.
Known problems as of 1.0pre8:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* HP/UX 10.20 and 11.00, using gcc (2.7.2.3 and 2.95.2): A large
number of warnings appear, including the following:
/usr/include/sys/resource.h: In function `getrlimit':
/usr/include/sys/resource.h:168:
warning: implicit declaration of function `__getrlimit64'
/usr/include/sys/resource.h: In function `setrlimit':
/usr/include/sys/resource.h:170:
warning: implicit declaration of function `__setrlimit64'
This would appear to be a problem with large file support, header
files and gcc. gcc may or may not give up at this point. If it
fails, you might be able to improve matters by adding
-D__STDC_EXT__=1
to the BIGFILES variable in the Makefile (ie, change its definition
to
BIGFILES=-D_FILE_OFFSET_BITS=64 -D__STDC_EXT__=1
Even if gcc does produce a binary which appears to work (ie passes
its self-tests), you might want to test it to see if it works properly
on large files.
* HP/UX 10.20 and 11.00, using HP's cc compiler.
No specific problems for this combination, except that you'll need to
specify the -Ae flag, and zap the gcc-specific stuff
-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce.
You should retain -D_FILE_OFFSET_BITS=64 in order to get large
file support -- which is reported to work ok for this HP/UX + cc
combination.
* SunOS 4.1.X.
Amazingly, there are still people out there using this venerable old
banger. I shouldn't be too rude -- I started life on SunOS, and
it was a pretty darn good OS, way back then. Anyway:
SunOS doesn't seem to have strerror(), so you'll have to use
perror(), perhaps by doing adding this (warning: UNTESTED CODE):
char* strerror ( int errnum )
{
if (errnum < 0 || errnum >= sys_nerr)
return "Unknown error";
else
return sys_errlist[errnum];
}
Or you could comment out the relevant calls to strerror; they're
not mission-critical. Or you could upgrade to Solaris. Ha ha ha!
(what?? you think I've got Bad Attitude?)
* Making a shared library on Solaris. (Not really a compilation
problem, but many people ask ...)
Firstly, if you have Solaris 8, either you have libbz2.so already
on your system, or you can install it from the Solaris CD.
Secondly, be aware that there are potential naming conflicts
between the .so file supplied with Solaris 8, and the .so file
which Makefile-libbz2_so will make. Makefile-libbz2_so creates
a .so which has the names which I intend to be "official" as
of version 1.0.0 and onwards. Unfortunately, the .so in
Solaris 8 appeared before I decided on the final names, so
the two libraries are incompatible. We have since communicated
and I hope that the problems will have been solved in the next
version of Solaris, whenever that might appear.
All that said: you might be able to get somewhere
by finding the line in Makefile-libbz2_so which says
$(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.1 $(OBJS)
and replacing with
($CC) -G -shared -o libbz2.so.1.0.1 -h libbz2.so.1.0 $(OBJS)
If gcc objects to the combination -fpic -fPIC, get rid of
the second one, leaving just "-fpic".
That's the end of the currently known compilation problems.

View File

@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression. library for lossless, block-sorting data compression.
Copyright (C) 1996-1999 Julian R Seward. All rights reserved. Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions modification, are permitted provided that the following conditions
@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK. Julian Seward, Cambridge, UK.
jseward@acm.org jseward@acm.org
bzip2/libbzip2 version 0.9.5 of 24 May 1999 bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of: This program is based on (at least) the work of:
Mike Burrows Mike Burrows
@ -56,6 +56,13 @@
Jon L. Bentley Jon L. Bentley
For more information on these sources, see the manual. For more information on these sources, see the manual.
To get some idea how the block sorting algorithms in this file
work, read my paper
On the Performance of BWT Sorting Algorithms
in Proceedings of the IEEE Data Compression Conference 2000,
Snowbird, Utah, USA, 27-30 March 2000. The main sort in this
file implements the algorithm called cache in the paper.
--*/ --*/
@ -232,11 +239,11 @@ void fallbackQSort3 ( UInt32* fmap,
/* Pre: /* Pre:
nblock > 0 nblock > 0
eclass exists for [0 .. nblock-1] eclass exists for [0 .. nblock-1]
((UInt16*)eclass) [0 .. nblock-1] [15:8] holds block ((UChar*)eclass) [0 .. nblock-1] holds block
ptr exists for [0 .. nblock-1] ptr exists for [0 .. nblock-1]
Post: Post:
((UInt16*)eclass) [0 .. nblock-1] [15:8] holds block ((UChar*)eclass) [0 .. nblock-1] holds block
All other areas of eclass destroyed All other areas of eclass destroyed
fmap [0 .. nblock-1] holds sorted order fmap [0 .. nblock-1] holds sorted order
bhtab [ 0 .. 2+(nblock/32) ] destroyed bhtab [ 0 .. 2+(nblock/32) ] destroyed
@ -260,7 +267,7 @@ void fallbackSort ( UInt32* fmap,
Int32 H, i, j, k, l, r, cc, cc1; Int32 H, i, j, k, l, r, cc, cc1;
Int32 nNotDone; Int32 nNotDone;
Int32 nBhtab; Int32 nBhtab;
UInt16* eclass16 = (UInt16*)eclass; UChar* eclass8 = (UChar*)eclass;
/*-- /*--
Initial 1-char radix sort to generate Initial 1-char radix sort to generate
@ -269,12 +276,12 @@ void fallbackSort ( UInt32* fmap,
if (verb >= 4) if (verb >= 4)
VPrintf0 ( " bucket sorting ...\n" ); VPrintf0 ( " bucket sorting ...\n" );
for (i = 0; i < 257; i++) ftab[i] = 0; for (i = 0; i < 257; i++) ftab[i] = 0;
for (i = 0; i < nblock; i++) ftab[eclass16[i] >> 8]++; for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
for (i = 0; i < nblock; i++) { for (i = 0; i < nblock; i++) {
j = eclass16[i] >> 8; j = eclass8[i];
k = ftab[j] - 1; k = ftab[j] - 1;
ftab[j] = k; ftab[j] = k;
fmap[k] = i; fmap[k] = i;
@ -354,7 +361,7 @@ void fallbackSort ( UInt32* fmap,
/*-- /*--
Reconstruct the original block in Reconstruct the original block in
eclass16 [0 .. nblock-1] [15:8], since the eclass8 [0 .. nblock-1], since the
previous phase destroyed it. previous phase destroyed it.
--*/ --*/
if (verb >= 4) if (verb >= 4)
@ -363,7 +370,7 @@ void fallbackSort ( UInt32* fmap,
for (i = 0; i < nblock; i++) { for (i = 0; i < nblock; i++) {
while (ftabCopy[j] == 0) j++; while (ftabCopy[j] == 0) j++;
ftabCopy[j]--; ftabCopy[j]--;
eclass16[fmap[i]] = j << 8; eclass8[fmap[i]] = (UChar)j;
} }
AssertH ( j < 256, 1005 ); AssertH ( j < 256, 1005 );
} }
@ -386,67 +393,116 @@ static
__inline__ __inline__
Bool mainGtU ( UInt32 i1, Bool mainGtU ( UInt32 i1,
UInt32 i2, UInt32 i2,
UInt16* block, UChar* block,
UInt16* quadrant, UInt16* quadrant,
UInt32 nblock, UInt32 nblock,
Int32* budget ) Int32* budget )
{ {
Int32 k; Int32 k;
UChar c1, c2;
UInt16 s1, s2; UInt16 s1, s2;
AssertD ( i1 != i2, "mainGtU" ); AssertD ( i1 != i2, "mainGtU" );
/* 1 */
s1 = block[i1]; s2 = block[i2]; c1 = block[i1]; c2 = block[i2];
if (s1 != s2) return (s1 > s2); if (c1 != c2) return (c1 > c2);
i1 += 2; i2 += 2; i1++; i2++;
/* 2 */
s1 = block[i1]; s2 = block[i2]; c1 = block[i1]; c2 = block[i2];
if (s1 != s2) return (s1 > s2); if (c1 != c2) return (c1 > c2);
i1 += 2; i2 += 2; i1++; i2++;
/* 3 */
s1 = block[i1]; s2 = block[i2]; c1 = block[i1]; c2 = block[i2];
if (s1 != s2) return (s1 > s2); if (c1 != c2) return (c1 > c2);
i1 += 2; i2 += 2; i1++; i2++;
/* 4 */
s1 = block[i1]; s2 = block[i2]; c1 = block[i1]; c2 = block[i2];
if (s1 != s2) return (s1 > s2); if (c1 != c2) return (c1 > c2);
i1 += 2; i2 += 2; i1++; i2++;
/* 5 */
s1 = block[i1]; s2 = block[i2]; c1 = block[i1]; c2 = block[i2];
if (s1 != s2) return (s1 > s2); if (c1 != c2) return (c1 > c2);
i1 += 2; i2 += 2; i1++; i2++;
/* 6 */
s1 = block[i1]; s2 = block[i2]; c1 = block[i1]; c2 = block[i2];
if (s1 != s2) return (s1 > s2); if (c1 != c2) return (c1 > c2);
i1 += 2; i2 += 2; i1++; i2++;
/* 7 */
c1 = block[i1]; c2 = block[i2];
if (c1 != c2) return (c1 > c2);
i1++; i2++;
/* 8 */
c1 = block[i1]; c2 = block[i2];
if (c1 != c2) return (c1 > c2);
i1++; i2++;
/* 9 */
c1 = block[i1]; c2 = block[i2];
if (c1 != c2) return (c1 > c2);
i1++; i2++;
/* 10 */
c1 = block[i1]; c2 = block[i2];
if (c1 != c2) return (c1 > c2);
i1++; i2++;
/* 11 */
c1 = block[i1]; c2 = block[i2];
if (c1 != c2) return (c1 > c2);
i1++; i2++;
/* 12 */
c1 = block[i1]; c2 = block[i2];
if (c1 != c2) return (c1 > c2);
i1++; i2++;
k = nblock + 8; k = nblock + 8;
do { do {
/* 1 */
s1 = block[i1]; s2 = block[i2]; c1 = block[i1]; c2 = block[i2];
if (s1 != s2) return (s1 > s2); if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1]; s2 = quadrant[i2]; s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2); if (s1 != s2) return (s1 > s2);
i1 += 2; i2 += 2; i1++; i2++;
/* 2 */
s1 = block[i1]; s2 = block[i2]; c1 = block[i1]; c2 = block[i2];
if (s1 != s2) return (s1 > s2); if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1]; s2 = quadrant[i2]; s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2); if (s1 != s2) return (s1 > s2);
i1 += 2; i2 += 2; i1++; i2++;
/* 3 */
s1 = block[i1]; s2 = block[i2]; c1 = block[i1]; c2 = block[i2];
if (s1 != s2) return (s1 > s2); if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1]; s2 = quadrant[i2]; s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2); if (s1 != s2) return (s1 > s2);
i1 += 2; i2 += 2; i1++; i2++;
/* 4 */
s1 = block[i1]; s2 = block[i2]; c1 = block[i1]; c2 = block[i2];
if (s1 != s2) return (s1 > s2); if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1]; s2 = quadrant[i2]; s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2); if (s1 != s2) return (s1 > s2);
i1 += 2; i2 += 2; i1++; i2++;
/* 5 */
c1 = block[i1]; c2 = block[i2];
if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
i1++; i2++;
/* 6 */
c1 = block[i1]; c2 = block[i2];
if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
i1++; i2++;
/* 7 */
c1 = block[i1]; c2 = block[i2];
if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
i1++; i2++;
/* 8 */
c1 = block[i1]; c2 = block[i2];
if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
i1++; i2++;
if (i1 >= nblock) i1 -= nblock; if (i1 >= nblock) i1 -= nblock;
if (i2 >= nblock) i2 -= nblock; if (i2 >= nblock) i2 -= nblock;
@ -467,13 +523,14 @@ Bool mainGtU ( UInt32 i1,
because the number of elems to sort is because the number of elems to sort is
usually small, typically <= 20. usually small, typically <= 20.
--*/ --*/
static
Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
9841, 29524, 88573, 265720, 9841, 29524, 88573, 265720,
797161, 2391484 }; 797161, 2391484 };
static static
void mainSimpleSort ( UInt32* ptr, void mainSimpleSort ( UInt32* ptr,
UInt16* block, UChar* block,
UInt16* quadrant, UInt16* quadrant,
Int32 nblock, Int32 nblock,
Int32 lo, Int32 lo,
@ -568,19 +625,19 @@ void mainSimpleSort ( UInt32* ptr,
} \ } \
} }
static static
__inline__ __inline__
UInt16 mmed3 ( UInt16 a, UInt16 b, UInt16 c ) UChar mmed3 ( UChar a, UChar b, UChar c )
{ {
UInt16 t; UChar t;
if (a > b) { t = a; a = b; b = t; }; if (a > b) { t = a; a = b; b = t; };
if (b > c) { t = b; b = c; c = t; }; if (b > c) {
b = c;
if (a > b) b = a; if (a > b) b = a;
}
return b; return b;
} }
#define mmin(a,b) ((a) < (b)) ? (a) : (b) #define mmin(a,b) ((a) < (b)) ? (a) : (b)
#define mpush(lz,hz,dz) { stackLo[sp] = lz; \ #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
@ -609,7 +666,7 @@ UInt16 mmed3 ( UInt16 a, UInt16 b, UInt16 c )
static static
void mainQSort3 ( UInt32* ptr, void mainQSort3 ( UInt32* ptr,
UInt16* block, UChar* block,
UInt16* quadrant, UInt16* quadrant,
Int32 nblock, Int32 nblock,
Int32 loSt, Int32 loSt,
@ -679,7 +736,7 @@ void mainQSort3 ( UInt32* ptr,
AssertD ( unHi == unLo-1, "mainQSort3(2)" ); AssertD ( unHi == unLo-1, "mainQSort3(2)" );
if (gtHi < ltLo) { if (gtHi < ltLo) {
mpush(lo, hi, d+2 ); mpush(lo, hi, d+1 );
continue; continue;
} }
@ -691,7 +748,7 @@ void mainQSort3 ( UInt32* ptr,
nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+2; nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
@ -722,11 +779,11 @@ void mainQSort3 ( UInt32* ptr,
/* Pre: /* Pre:
nblock > N_OVERSHOOT nblock > N_OVERSHOOT
block32 exists for [0 .. nblock-1 +N_OVERSHOOT] block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
((UInt16*)block32) [0 .. nblock-1] [15:8] holds block ((UChar*)block32) [0 .. nblock-1] holds block
ptr exists for [0 .. nblock-1] ptr exists for [0 .. nblock-1]
Post: Post:
((UInt16*)block32) [0 .. nblock-1] [15:8] holds block ((UChar*)block32) [0 .. nblock-1] holds block
All other areas of block32 destroyed All other areas of block32 destroyed
ftab [0 .. 65536 ] destroyed ftab [0 .. 65536 ] destroyed
ptr [0 .. nblock-1] holds sorted order ptr [0 .. nblock-1] holds sorted order
@ -739,40 +796,47 @@ void mainQSort3 ( UInt32* ptr,
static static
void mainSort ( UInt32* ptr, void mainSort ( UInt32* ptr,
UInt16* block, UChar* block,
UInt16* quadrant, UInt16* quadrant,
UInt32* ftab, UInt32* ftab,
Int32 nblock, Int32 nblock,
Int32 verb, Int32 verb,
Int32* budget ) Int32* budget )
{ {
Int32 i, j, k, m, ss, sb; Int32 i, j, k, ss, sb;
Int32 runningOrder[256]; Int32 runningOrder[256];
Int32 copy[256];
Bool bigDone[256]; Bool bigDone[256];
Int32 copyStart[256];
Int32 copyEnd [256];
UChar c1; UChar c1;
Int32 numQSorted; Int32 numQSorted;
Int32 biggestSoFar;
UInt16 s; UInt16 s;
if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
/*-- Stripe the block data into 16 bits, and at the /*-- set up the 2-byte frequency table --*/
same time set up the 2-byte frequency table
--*/
for (i = 65536; i >= 0; i--) ftab[i] = 0; for (i = 65536; i >= 0; i--) ftab[i] = 0;
s = block[0]; j = block[0] << 8;
for (i = 1; i < nblock; i++) { i = nblock-1;
for (; i >= 3; i -= 4) {
quadrant[i] = 0; quadrant[i] = 0;
s = (s << 8) | block[i]; j = (j >> 8) | ( ((UInt16)block[i]) << 8);
block[i-1] = s; ftab[j]++;
ftab[s]++; quadrant[i-1] = 0;
j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
ftab[j]++;
quadrant[i-2] = 0;
j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
ftab[j]++;
quadrant[i-3] = 0;
j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
ftab[j]++;
}
for (; i >= 0; i--) {
quadrant[i] = 0;
j = (j >> 8) | ( ((UInt16)block[i]) << 8);
ftab[j]++;
} }
quadrant[0] = 0;
s = (s << 8) | (block[0] >> 8);
block[nblock-1] = s;
ftab[s]++;
/*-- (emphasises close relationship of block & quadrant) --*/ /*-- (emphasises close relationship of block & quadrant) --*/
for (i = 0; i < BZ_N_OVERSHOOT; i++) { for (i = 0; i < BZ_N_OVERSHOOT; i++) {
@ -785,9 +849,29 @@ void mainSort ( UInt32* ptr,
/*-- Complete the initial radix sort --*/ /*-- Complete the initial radix sort --*/
for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
for (i = 0; i < nblock; i++) { s = block[0] << 8;
s = block[i]; i = nblock-1;
j = ftab[s] - 1; for (; i >= 3; i -= 4) {
s = (s >> 8) | (block[i] << 8);
j = ftab[s] -1;
ftab[s] = j;
ptr[j] = i;
s = (s >> 8) | (block[i-1] << 8);
j = ftab[s] -1;
ftab[s] = j;
ptr[j] = i-1;
s = (s >> 8) | (block[i-2] << 8);
j = ftab[s] -1;
ftab[s] = j;
ptr[j] = i-2;
s = (s >> 8) | (block[i-3] << 8);
j = ftab[s] -1;
ftab[s] = j;
ptr[j] = i-3;
}
for (; i >= 0; i--) {
s = (s >> 8) | (block[i] << 8);
j = ftab[s] -1;
ftab[s] = j; ftab[s] = j;
ptr[j] = i; ptr[j] = i;
} }
@ -826,13 +910,13 @@ void mainSort ( UInt32* ptr,
The main sorting loop. The main sorting loop.
--*/ --*/
biggestSoFar = numQSorted = 0; numQSorted = 0;
for (i = 0; i <= 255; i++) { for (i = 0; i <= 255; i++) {
/*-- /*--
Process big buckets, starting with the least full. Process big buckets, starting with the least full.
Basically this is a 4-step process in which we call Basically this is a 3-step process in which we call
mainQSort3 to sort the small buckets [ss, j], but mainQSort3 to sort the small buckets [ss, j], but
also make a big effort to avoid the calls if we can. also make a big effort to avoid the calls if we can.
--*/ --*/
@ -869,38 +953,37 @@ void mainSort ( UInt32* ptr,
} }
} }
AssertH ( !bigDone[ss], 1006 );
/*-- /*--
Step 2: Step 2:
Deal specially with case [ss, ss]. This establishes the Now scan this big bucket [ss] so as to synthesise the
sorted order for [ss, ss] without any comparisons. sorted order for small buckets [t, ss] for all t,
A clever trick, cryptically described as steps Q6b and Q6c including, magically, the bucket [ss,ss] too.
in SRC-124 (aka BW94). Compared to bzip2, this makes it This will avoid doing Real Work in subsequent Step 1's.
practical not to use a preliminary run-length coder.
--*/ --*/
{ {
Int32 put0, get0, put1, get1; for (j = 0; j <= 255; j++) {
Int32 sbn = (ss << 8) + ss; copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
Int32 lo = ftab[sbn] & CLEARMASK; copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
Int32 hi = (ftab[sbn+1] & CLEARMASK) - 1;
UChar ssc = (UChar)ss;
put0 = lo;
get0 = ftab[ss << 8] & CLEARMASK;
put1 = hi;
get1 = (ftab[(ss+1) << 8] & CLEARMASK) - 1;
while (get0 < put0) {
j = ptr[get0]-1; if (j < 0) j += nblock;
c1 = (UChar)(block[j] >> 8);
if (c1 == ssc) { ptr[put0] = j; put0++; };
get0++;
} }
while (get1 > put1) { for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
j = ptr[get1]-1; if (j < 0) j += nblock; k = ptr[j]-1; if (k < 0) k += nblock;
c1 = (UChar)(block[j] >> 8); c1 = block[k];
if (c1 == ssc) { ptr[put1] = j; put1--; }; if (!bigDone[c1])
get1--; ptr[ copyStart[c1]++ ] = k;
} }
ftab[sbn] |= SETMASK; for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
k = ptr[j]-1; if (k < 0) k += nblock;
c1 = block[k];
if (!bigDone[c1])
ptr[ copyEnd[c1]-- ] = k;
} }
}
AssertH ( copyStart[ss]-1 == copyEnd[ss], 1007 );
for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
/*-- /*--
Step 3: Step 3:
@ -950,7 +1033,7 @@ void mainSort ( UInt32* ptr,
while ((bbSize >> shifts) > 65534) shifts++; while ((bbSize >> shifts) > 65534) shifts++;
for (j = 0; j < bbSize; j++) { for (j = bbSize-1; j >= 0; j--) {
Int32 a2update = ptr[bbStart + j]; Int32 a2update = ptr[bbStart + j];
UInt16 qVal = (UInt16)(j >> shifts); UInt16 qVal = (UInt16)(j >> shifts);
quadrant[a2update] = qVal; quadrant[a2update] = qVal;
@ -960,26 +1043,6 @@ void mainSort ( UInt32* ptr,
AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
} }
/*--
Step 4:
Now scan this big bucket [ss] so as to synthesise the
sorted order for small buckets [t, ss] for all t != ss.
This will avoid doing Real Work in subsequent Step 1's.
--*/
for (j = 0; j <= 255; j++)
copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
m = ftab[(ss+1) << 8] & CLEARMASK;
for (j = ftab[ss << 8] & CLEARMASK; j < m; j++) {
k = ptr[j] - 1; if (k < 0) k += nblock;
c1 = (UChar)(block[k] >> 8);
if ( ! bigDone[c1] ) {
ptr[copy[c1]] = k;
copy[c1] ++;
}
}
for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
} }
if (verb >= 4) if (verb >= 4)
@ -996,19 +1059,19 @@ void mainSort ( UInt32* ptr,
/* Pre: /* Pre:
nblock > 0 nblock > 0
arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
((UInt16*)arr2) [0 .. nblock-1] [15:8] holds block ((UChar*)arr2) [0 .. nblock-1] holds block
arr1 exists for [0 .. nblock-1] arr1 exists for [0 .. nblock-1]
Post: Post:
((UInt16*)arr2) [0 .. nblock-1] [15:8] holds block ((UChar*)arr2) [0 .. nblock-1] holds block
All other areas of block destroyed All other areas of block destroyed
ftab [ 0 .. 65536 ] destroyed ftab [ 0 .. 65536 ] destroyed
arr1 [0 .. nblock-1] holds sorted order arr1 [0 .. nblock-1] holds sorted order
*/ */
void blockSort ( EState* s ) void BZ2_blockSort ( EState* s )
{ {
UInt32* ptr = s->ptr; UInt32* ptr = s->ptr;
UInt16* block = s->block; UChar* block = s->block;
UInt32* ftab = s->ftab; UInt32* ftab = s->ftab;
Int32 nblock = s->nblock; Int32 nblock = s->nblock;
Int32 verb = s->verbosity; Int32 verb = s->verbosity;
@ -1019,10 +1082,16 @@ void blockSort ( EState* s )
Int32 i; Int32 i;
if (nblock < 10000) { if (nblock < 10000) {
for (i = 0; i < nblock; i++) block[i] <<= 8;
fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
} else { } else {
quadrant = &(block[nblock+BZ_N_OVERSHOOT]); /* Calculate the location for quadrant, remembering to get
the alignment right. Assumes that &(block[0]) is at least
2-byte aligned -- this should be ok since block is really
the first section of arr2.
*/
i = nblock+BZ_N_OVERSHOOT;
if (i & 1) i++;
quadrant = (UInt16*)(&(block[i]));
/* (wfact-1) / 3 puts the default-factor-30 /* (wfact-1) / 3 puts the default-factor-30
transition point at very roughly the same place as transition point at very roughly the same place as

View File

@ -1,7 +1,7 @@
.PU .PU
.TH bzip2 1 .TH bzip2 1
.SH NAME .SH NAME
bzip2, bunzip2 \- a block-sorting file compressor, v0.9.5 bzip2, bunzip2 \- a block-sorting file compressor, v1.0
.br .br
bzcat \- decompresses files to stdout bzcat \- decompresses files to stdout
.br .br
@ -397,11 +397,12 @@ I/O error messages are not as helpful as they could be.
tries hard to detect I/O errors and exit cleanly, but the details of tries hard to detect I/O errors and exit cleanly, but the details of
what the problem is sometimes seem rather misleading. what the problem is sometimes seem rather misleading.
This manual page pertains to version 0.9.5 of This manual page pertains to version 1.0 of
.I bzip2. .I bzip2.
Compressed Compressed
data created by this version is entirely forwards and backwards data created by this version is entirely forwards and backwards
compatible with the previous public releases, versions 0.1pl2 and 0.9.0, compatible with the previous public releases, versions 0.1pl2, 0.9.0
and 0.9.5,
but with the following exception: 0.9.0 and above can correctly but with the following exception: 0.9.0 and above can correctly
decompress multiple concatenated compressed files. 0.1pl2 cannot do decompress multiple concatenated compressed files. 0.1pl2 cannot do
this; it will stop after decompressing just the first file in the this; it will stop after decompressing just the first file in the
@ -415,6 +416,7 @@ megabytes long. This could easily be fixed.
.SH AUTHOR .SH AUTHOR
Julian Seward, jseward@acm.org. Julian Seward, jseward@acm.org.
http://sourceware.cygnus.com/bzip2
http://www.muraroa.demon.co.uk http://www.muraroa.demon.co.uk
The ideas embodied in The ideas embodied in

View File

@ -1,7 +1,11 @@
bzip2(1) bzip2(1)
NNAAMMEE NNAAMMEE
bzip2, bunzip2 - a block-sorting file compressor, v0.9.5 bzip2, bunzip2 - a block-sorting file compressor, v1.0
bzcat - decompresses files to stdout bzcat - decompresses files to stdout
bzip2recover - recovers data from damaged bzip2 files bzip2recover - recovers data from damaged bzip2 files
@ -54,6 +58,18 @@ DDEESSCCRRIIPPTTIIOONN
filename.bz2 becomes filename filename.bz2 becomes filename
filename.bz becomes filename filename.bz becomes filename
filename.tbz2 becomes filename.tar filename.tbz2 becomes filename.tar
1
bzip2(1) bzip2(1)
filename.tbz becomes filename.tar filename.tbz becomes filename.tar
anyothername becomes anyothername.out anyothername becomes anyothername.out
@ -109,6 +125,17 @@ DDEESSCCRRIIPPTTIIOONN
you recover the original uncompressed data. You can use you recover the original uncompressed data. You can use
_b_z_i_p_2_r_e_c_o_v_e_r to try to recover data from damaged files. _b_z_i_p_2_r_e_c_o_v_e_r to try to recover data from damaged files.
2
bzip2(1) bzip2(1)
Return values: 0 for a normal exit, 1 for environmental Return values: 0 for a normal exit, 1 for environmental
problems (file not found, invalid flags, I/O errors, &c), problems (file not found, invalid flags, I/O errors, &c),
2 to indicate a corrupt compressed file, 3 for an internal 2 to indicate a corrupt compressed file, 3 for an internal
@ -163,6 +190,18 @@ OOPPTTIIOONNSS
--qq ----qquuiieett --qq ----qquuiieett
Suppress non-essential warning messages. Messages Suppress non-essential warning messages. Messages
pertaining to I/O errors and other critical events pertaining to I/O errors and other critical events
3
bzip2(1) bzip2(1)
will not be suppressed. will not be suppressed.
--vv ----vveerrbboossee --vv ----vveerrbboossee
@ -217,6 +256,18 @@ MMEEMMOORRYY MMAANNAAGGEEMMEENNTT
Larger block sizes give rapidly diminishing marginal Larger block sizes give rapidly diminishing marginal
returns. Most of the compression comes from the first two returns. Most of the compression comes from the first two
4
bzip2(1) bzip2(1)
or three hundred k of block size, a fact worth bearing in or three hundred k of block size, a fact worth bearing in
mind when using _b_z_i_p_2 on small machines. It is also mind when using _b_z_i_p_2 on small machines. It is also
important to appreciate that the decompression memory important to appreciate that the decompression memory
@ -270,6 +321,19 @@ MMEEMMOORRYY MMAANNAAGGEEMMEENNTT
-9 7600k 3700k 2350k 828642 -9 7600k 3700k 2350k 828642
5
bzip2(1) bzip2(1)
RREECCOOVVEERRIINNGG DDAATTAA FFRROOMM DDAAMMAAGGEEDD FFIILLEESS RREECCOOVVEERRIINNGG DDAATTAA FFRROOMM DDAAMMAAGGEEDD FFIILLEESS
_b_z_i_p_2 compresses files in blocks, usually 900kbytes long. _b_z_i_p_2 compresses files in blocks, usually 900kbytes long.
Each block is handled independently. If a media or trans- Each block is handled independently. If a media or trans-
@ -324,6 +388,18 @@ PPEERRFFOORRMMAANNCCEE NNOOTTEESS
operate in, and then charges all over it in a fairly ran- operate in, and then charges all over it in a fairly ran-
dom fashion. This means that performance, both for com- dom fashion. This means that performance, both for com-
pressing and decompressing, is largely determined by the pressing and decompressing, is largely determined by the
6
bzip2(1) bzip2(1)
speed at which your machine can service cache misses. speed at which your machine can service cache misses.
Because of this, small changes to the code to reduce the Because of this, small changes to the code to reduce the
miss rate have been observed to give disproportionately miss rate have been observed to give disproportionately
@ -337,14 +413,14 @@ CCAAVVEEAATTSS
but the details of what the problem is sometimes seem but the details of what the problem is sometimes seem
rather misleading. rather misleading.
This manual page pertains to version 0.9.5 of _b_z_i_p_2_. Com- This manual page pertains to version 1.0 of _b_z_i_p_2_. Com-
pressed data created by this version is entirely forwards pressed data created by this version is entirely forwards
and backwards compatible with the previous public and backwards compatible with the previous public
releases, versions 0.1pl2 and 0.9.0, but with the follow- releases, versions 0.1pl2, 0.9.0 and 0.9.5, but with the
ing exception: 0.9.0 and above can correctly decompress following exception: 0.9.0 and above can correctly decom-
multiple concatenated compressed files. 0.1pl2 cannot do press multiple concatenated compressed files. 0.1pl2 can-
this; it will stop after decompressing just the first file not do this; it will stop after decompressing just the
in the stream. first file in the stream.
_b_z_i_p_2_r_e_c_o_v_e_r uses 32-bit integers to represent bit posi- _b_z_i_p_2_r_e_c_o_v_e_r uses 32-bit integers to represent bit posi-
tions in compressed files, so it cannot handle compressed tions in compressed files, so it cannot handle compressed
@ -355,6 +431,7 @@ CCAAVVEEAATTSS
AAUUTTHHOORR AAUUTTHHOORR
Julian Seward, jseward@acm.org. Julian Seward, jseward@acm.org.
http://sourceware.cygnus.com/bzip2
http://www.muraroa.demon.co.uk http://www.muraroa.demon.co.uk
The ideas embodied in _b_z_i_p_2 are due to (at least) the fol- The ideas embodied in _b_z_i_p_2 are due to (at least) the fol-
@ -373,3 +450,13 @@ AAUUTTHHOORR
with portability problems, lent machines, gave advice and with portability problems, lent machines, gave advice and
were generally helpful. were generally helpful.
7

496
bzip2.c

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
NAME NAME
bzip2, bunzip2 - a block-sorting file compressor, v0.9.5 bzip2, bunzip2 - a block-sorting file compressor, v1.0
bzcat - decompresses files to stdout bzcat - decompresses files to stdout
bzip2recover - recovers data from damaged bzip2 files bzip2recover - recovers data from damaged bzip2 files
@ -337,14 +337,14 @@ CAVEATS
but the details of what the problem is sometimes seem but the details of what the problem is sometimes seem
rather misleading. rather misleading.
This manual page pertains to version 0.9.5 of bzip2. Com- This manual page pertains to version 1.0 of bzip2. Com-
pressed data created by this version is entirely forwards pressed data created by this version is entirely forwards
and backwards compatible with the previous public and backwards compatible with the previous public
releases, versions 0.1pl2 and 0.9.0, but with the follow- releases, versions 0.1pl2, 0.9.0 and 0.9.5, but with the
ing exception: 0.9.0 and above can correctly decompress following exception: 0.9.0 and above can correctly decom-
multiple concatenated compressed files. 0.1pl2 cannot do press multiple concatenated compressed files. 0.1pl2 can-
this; it will stop after decompressing just the first file not do this; it will stop after decompressing just the
in the stream. first file in the stream.
bzip2recover uses 32-bit integers to represent bit posi- bzip2recover uses 32-bit integers to represent bit posi-
tions in compressed files, so it cannot handle compressed tions in compressed files, so it cannot handle compressed
@ -355,6 +355,7 @@ CAVEATS
AUTHOR AUTHOR
Julian Seward, jseward@acm.org. Julian Seward, jseward@acm.org.
http://sourceware.cygnus.com/bzip2
http://www.muraroa.demon.co.uk http://www.muraroa.demon.co.uk
The ideas embodied in bzip2 are due to (at least) the fol- The ideas embodied in bzip2 are due to (at least) the fol-

View File

@ -7,9 +7,9 @@
/*-- /*--
This program is bzip2recover, a program to attempt data This program is bzip2recover, a program to attempt data
salvage from damaged files created by the accompanying salvage from damaged files created by the accompanying
bzip2-0.9.5 program. bzip2-1.0 program.
Copyright (C) 1996-1999 Julian R Seward. All rights reserved. Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions modification, are permitted provided that the following conditions
@ -44,7 +44,7 @@
Julian Seward, Cambridge, UK. Julian Seward, Cambridge, UK.
jseward@acm.org jseward@acm.org
bzip2/libbzip2 version 0.9.5 of 24 May 1999 bzip2/libbzip2 version 1.0 of 21 March 2000
--*/ --*/
/*-- /*--
@ -282,7 +282,7 @@ Int32 main ( Int32 argc, Char** argv )
strcpy ( progName, argv[0] ); strcpy ( progName, argv[0] );
inFileName[0] = outFileName[0] = 0; inFileName[0] = outFileName[0] = 0;
fprintf ( stderr, "bzip2recover 0.9.5d: extracts blocks from damaged .bz2 files.\n" ); fprintf ( stderr, "bzip2recover 1.0: extracts blocks from damaged .bz2 files.\n" );
if (argc != 2) { if (argc != 2) {
fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",

240
bzlib.c
View File

@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression. library for lossless, block-sorting data compression.
Copyright (C) 1996-1999 Julian R Seward. All rights reserved. Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions modification, are permitted provided that the following conditions
@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK. Julian Seward, Cambridge, UK.
jseward@acm.org jseward@acm.org
bzip2/libbzip2 version 0.9.5 of 24 May 1999 bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of: This program is based on (at least) the work of:
Mike Burrows Mike Burrows
@ -83,24 +83,36 @@
/*---------------------------------------------------*/ /*---------------------------------------------------*/
#ifndef BZ_NO_STDIO #ifndef BZ_NO_STDIO
void bz__AssertH__fail ( int errcode ) void BZ2_bz__AssertH__fail ( int errcode )
{ {
fprintf(stderr, fprintf(stderr,
"\n\nbzip2/libbzip2, v0.9.5d: internal error number %d.\n" "\n\nbzip2/libbzip2: internal error number %d.\n"
"This is a bug in bzip2/libbzip2, v0.9.5d. Please report\n" "This is a bug in bzip2/libbzip2, %s.\n"
"it to me at: jseward@acm.org. If this happened when\n" "Please report it to me at: jseward@acm.org. If this happened\n"
"you were using some program which uses libbzip2 as a\n" "when you were using some program which uses libbzip2 as a\n"
"component, you should also report this bug to the author(s)\n" "component, you should also report this bug to the author(s)\n"
"of that program. Please make an effort to report this bug;\n" "of that program. Please make an effort to report this bug;\n"
"timely and accurate bug reports eventually lead to higher\n" "timely and accurate bug reports eventually lead to higher\n"
"quality software. Thanks. Julian Seward, 4 Sept 1999.\n\n", "quality software. Thanks. Julian Seward, 21 March 2000.\n\n",
errcode errcode,
BZ2_bzlibVersion()
); );
exit(3); exit(3);
} }
#endif #endif
/*---------------------------------------------------*/
static
int bz_config_ok ( void )
{
if (sizeof(int) != 4) return 0;
if (sizeof(short) != 2) return 0;
if (sizeof(char) != 1) return 0;
return 1;
}
/*---------------------------------------------------*/ /*---------------------------------------------------*/
static static
void* default_bzalloc ( void* opaque, Int32 items, Int32 size ) void* default_bzalloc ( void* opaque, Int32 items, Int32 size )
@ -149,7 +161,7 @@ Bool isempty_RL ( EState* s )
/*---------------------------------------------------*/ /*---------------------------------------------------*/
int BZ_API(bzCompressInit) int BZ_API(BZ2_bzCompressInit)
( bz_stream* strm, ( bz_stream* strm,
int blockSize100k, int blockSize100k,
int verbosity, int verbosity,
@ -158,6 +170,8 @@ int BZ_API(bzCompressInit)
Int32 n; Int32 n;
EState* s; EState* s;
if (!bz_config_ok()) return BZ_CONFIG_ERROR;
if (strm == NULL || if (strm == NULL ||
blockSize100k < 1 || blockSize100k > 9 || blockSize100k < 1 || blockSize100k > 9 ||
workFactor < 0 || workFactor > 250) workFactor < 0 || workFactor > 250)
@ -197,14 +211,16 @@ int BZ_API(bzCompressInit)
s->verbosity = verbosity; s->verbosity = verbosity;
s->workFactor = workFactor; s->workFactor = workFactor;
s->block = (UInt16*)s->arr2; s->block = (UChar*)s->arr2;
s->mtfv = (UInt16*)s->arr1; s->mtfv = (UInt16*)s->arr1;
s->zbits = NULL; s->zbits = NULL;
s->ptr = (UInt32*)s->arr1; s->ptr = (UInt32*)s->arr1;
strm->state = s; strm->state = s;
strm->total_in = 0; strm->total_in_lo32 = 0;
strm->total_out = 0; strm->total_in_hi32 = 0;
strm->total_out_lo32 = 0;
strm->total_out_hi32 = 0;
init_RL ( s ); init_RL ( s );
prepare_new_block ( s ); prepare_new_block ( s );
return BZ_OK; return BZ_OK;
@ -223,24 +239,24 @@ void add_pair_to_block ( EState* s )
s->inUse[s->state_in_ch] = True; s->inUse[s->state_in_ch] = True;
switch (s->state_in_len) { switch (s->state_in_len) {
case 1: case 1:
s->block[s->nblock] = (UInt16)ch; s->nblock++; s->block[s->nblock] = (UChar)ch; s->nblock++;
break; break;
case 2: case 2:
s->block[s->nblock] = (UInt16)ch; s->nblock++; s->block[s->nblock] = (UChar)ch; s->nblock++;
s->block[s->nblock] = (UInt16)ch; s->nblock++; s->block[s->nblock] = (UChar)ch; s->nblock++;
break; break;
case 3: case 3:
s->block[s->nblock] = (UInt16)ch; s->nblock++; s->block[s->nblock] = (UChar)ch; s->nblock++;
s->block[s->nblock] = (UInt16)ch; s->nblock++; s->block[s->nblock] = (UChar)ch; s->nblock++;
s->block[s->nblock] = (UInt16)ch; s->nblock++; s->block[s->nblock] = (UChar)ch; s->nblock++;
break; break;
default: default:
s->inUse[s->state_in_len-4] = True; s->inUse[s->state_in_len-4] = True;
s->block[s->nblock] = (UInt16)ch; s->nblock++; s->block[s->nblock] = (UChar)ch; s->nblock++;
s->block[s->nblock] = (UInt16)ch; s->nblock++; s->block[s->nblock] = (UChar)ch; s->nblock++;
s->block[s->nblock] = (UInt16)ch; s->nblock++; s->block[s->nblock] = (UChar)ch; s->nblock++;
s->block[s->nblock] = (UInt16)ch; s->nblock++; s->block[s->nblock] = (UChar)ch; s->nblock++;
s->block[s->nblock] = ((UInt16)(s->state_in_len-4)); s->block[s->nblock] = ((UChar)(s->state_in_len-4));
s->nblock++; s->nblock++;
break; break;
} }
@ -266,7 +282,7 @@ void flush_RL ( EState* s )
UChar ch = (UChar)(zs->state_in_ch); \ UChar ch = (UChar)(zs->state_in_ch); \
BZ_UPDATE_CRC( zs->blockCRC, ch ); \ BZ_UPDATE_CRC( zs->blockCRC, ch ); \
zs->inUse[zs->state_in_ch] = True; \ zs->inUse[zs->state_in_ch] = True; \
zs->block[zs->nblock] = (UInt16)ch; \ zs->block[zs->nblock] = (UChar)ch; \
zs->nblock++; \ zs->nblock++; \
zs->state_in_ch = zchh; \ zs->state_in_ch = zchh; \
} \ } \
@ -302,7 +318,8 @@ Bool copy_input_until_stop ( EState* s )
ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) );
s->strm->next_in++; s->strm->next_in++;
s->strm->avail_in--; s->strm->avail_in--;
s->strm->total_in++; s->strm->total_in_lo32++;
if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
} }
} else { } else {
@ -319,7 +336,8 @@ Bool copy_input_until_stop ( EState* s )
ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) );
s->strm->next_in++; s->strm->next_in++;
s->strm->avail_in--; s->strm->avail_in--;
s->strm->total_in++; s->strm->total_in_lo32++;
if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
s->avail_in_expect--; s->avail_in_expect--;
} }
} }
@ -346,8 +364,8 @@ Bool copy_output_until_stop ( EState* s )
s->state_out_pos++; s->state_out_pos++;
s->strm->avail_out--; s->strm->avail_out--;
s->strm->next_out++; s->strm->next_out++;
s->strm->total_out++; s->strm->total_out_lo32++;
if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
} }
return progress_out; return progress_out;
@ -381,12 +399,12 @@ Bool handle_compress ( bz_stream* strm )
progress_in |= copy_input_until_stop ( s ); progress_in |= copy_input_until_stop ( s );
if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) { if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) {
flush_RL ( s ); flush_RL ( s );
compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) ); BZ2_compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) );
s->state = BZ_S_OUTPUT; s->state = BZ_S_OUTPUT;
} }
else else
if (s->nblock >= s->nblockMAX) { if (s->nblock >= s->nblockMAX) {
compressBlock ( s, False ); BZ2_compressBlock ( s, False );
s->state = BZ_S_OUTPUT; s->state = BZ_S_OUTPUT;
} }
else else
@ -402,7 +420,7 @@ Bool handle_compress ( bz_stream* strm )
/*---------------------------------------------------*/ /*---------------------------------------------------*/
int BZ_API(bzCompress) ( bz_stream *strm, int action ) int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action )
{ {
Bool progress; Bool progress;
EState* s; EState* s;
@ -439,7 +457,8 @@ int BZ_API(bzCompress) ( bz_stream *strm, int action )
case BZ_M_FLUSHING: case BZ_M_FLUSHING:
if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR; if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR;
if (s->avail_in_expect != s->strm->avail_in) return BZ_SEQUENCE_ERROR; if (s->avail_in_expect != s->strm->avail_in)
return BZ_SEQUENCE_ERROR;
progress = handle_compress ( strm ); progress = handle_compress ( strm );
if (s->avail_in_expect > 0 || !isempty_RL(s) || if (s->avail_in_expect > 0 || !isempty_RL(s) ||
s->state_out_pos < s->numZ) return BZ_FLUSH_OK; s->state_out_pos < s->numZ) return BZ_FLUSH_OK;
@ -448,7 +467,8 @@ int BZ_API(bzCompress) ( bz_stream *strm, int action )
case BZ_M_FINISHING: case BZ_M_FINISHING:
if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR; if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR;
if (s->avail_in_expect != s->strm->avail_in) return BZ_SEQUENCE_ERROR; if (s->avail_in_expect != s->strm->avail_in)
return BZ_SEQUENCE_ERROR;
progress = handle_compress ( strm ); progress = handle_compress ( strm );
if (!progress) return BZ_SEQUENCE_ERROR; if (!progress) return BZ_SEQUENCE_ERROR;
if (s->avail_in_expect > 0 || !isempty_RL(s) || if (s->avail_in_expect > 0 || !isempty_RL(s) ||
@ -461,7 +481,7 @@ int BZ_API(bzCompress) ( bz_stream *strm, int action )
/*---------------------------------------------------*/ /*---------------------------------------------------*/
int BZ_API(bzCompressEnd) ( bz_stream *strm ) int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm )
{ {
EState* s; EState* s;
if (strm == NULL) return BZ_PARAM_ERROR; if (strm == NULL) return BZ_PARAM_ERROR;
@ -485,13 +505,15 @@ int BZ_API(bzCompressEnd) ( bz_stream *strm )
/*---------------------------------------------------*/ /*---------------------------------------------------*/
/*---------------------------------------------------*/ /*---------------------------------------------------*/
int BZ_API(bzDecompressInit) int BZ_API(BZ2_bzDecompressInit)
( bz_stream* strm, ( bz_stream* strm,
int verbosity, int verbosity,
int small ) int small )
{ {
DState* s; DState* s;
if (!bz_config_ok()) return BZ_CONFIG_ERROR;
if (strm == NULL) return BZ_PARAM_ERROR; if (strm == NULL) return BZ_PARAM_ERROR;
if (small != 0 && small != 1) return BZ_PARAM_ERROR; if (small != 0 && small != 1) return BZ_PARAM_ERROR;
if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR; if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR;
@ -507,8 +529,10 @@ int BZ_API(bzDecompressInit)
s->bsLive = 0; s->bsLive = 0;
s->bsBuff = 0; s->bsBuff = 0;
s->calculatedCombinedCRC = 0; s->calculatedCombinedCRC = 0;
strm->total_in = 0; strm->total_in_lo32 = 0;
strm->total_out = 0; strm->total_in_hi32 = 0;
strm->total_out_lo32 = 0;
strm->total_out_hi32 = 0;
s->smallDecompress = (Bool)small; s->smallDecompress = (Bool)small;
s->ll4 = NULL; s->ll4 = NULL;
s->ll16 = NULL; s->ll16 = NULL;
@ -538,7 +562,8 @@ void unRLE_obuf_to_output_FAST ( DState* s )
s->state_out_len--; s->state_out_len--;
s->strm->next_out++; s->strm->next_out++;
s->strm->avail_out--; s->strm->avail_out--;
s->strm->total_out++; s->strm->total_out_lo32++;
if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
} }
/* can a new run be started? */ /* can a new run be started? */
@ -587,6 +612,7 @@ void unRLE_obuf_to_output_FAST ( DState* s )
UInt32 avail_out_INIT = cs_avail_out; UInt32 avail_out_INIT = cs_avail_out;
Int32 s_save_nblockPP = s->save_nblock+1; Int32 s_save_nblockPP = s->save_nblock+1;
unsigned int total_out_lo32_old;
while (True) { while (True) {
@ -640,7 +666,10 @@ void unRLE_obuf_to_output_FAST ( DState* s )
} }
return_notr: return_notr:
s->strm->total_out += (avail_out_INIT - cs_avail_out); total_out_lo32_old = s->strm->total_out_lo32;
s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out);
if (s->strm->total_out_lo32 < total_out_lo32_old)
s->strm->total_out_hi32++;
/* save */ /* save */
s->calculatedBlockCRC = c_calculatedBlockCRC; s->calculatedBlockCRC = c_calculatedBlockCRC;
@ -659,7 +688,7 @@ void unRLE_obuf_to_output_FAST ( DState* s )
/*---------------------------------------------------*/ /*---------------------------------------------------*/
__inline__ Int32 indexIntoF ( Int32 indx, Int32 *cftab ) __inline__ Int32 BZ2_indexIntoF ( Int32 indx, Int32 *cftab )
{ {
Int32 nb, na, mid; Int32 nb, na, mid;
nb = 0; nb = 0;
@ -691,7 +720,8 @@ void unRLE_obuf_to_output_SMALL ( DState* s )
s->state_out_len--; s->state_out_len--;
s->strm->next_out++; s->strm->next_out++;
s->strm->avail_out--; s->strm->avail_out--;
s->strm->total_out++; s->strm->total_out_lo32++;
if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
} }
/* can a new run be started? */ /* can a new run be started? */
@ -736,7 +766,8 @@ void unRLE_obuf_to_output_SMALL ( DState* s )
s->state_out_len--; s->state_out_len--;
s->strm->next_out++; s->strm->next_out++;
s->strm->avail_out--; s->strm->avail_out--;
s->strm->total_out++; s->strm->total_out_lo32++;
if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
} }
/* can a new run be started? */ /* can a new run be started? */
@ -768,7 +799,7 @@ void unRLE_obuf_to_output_SMALL ( DState* s )
/*---------------------------------------------------*/ /*---------------------------------------------------*/
int BZ_API(bzDecompress) ( bz_stream *strm ) int BZ_API(BZ2_bzDecompress) ( bz_stream *strm )
{ {
DState* s; DState* s;
if (strm == NULL) return BZ_PARAM_ERROR; if (strm == NULL) return BZ_PARAM_ERROR;
@ -800,7 +831,7 @@ int BZ_API(bzDecompress) ( bz_stream *strm )
} }
} }
if (s->state >= BZ_X_MAGIC_1) { if (s->state >= BZ_X_MAGIC_1) {
Int32 r = decompress ( s ); Int32 r = BZ2_decompress ( s );
if (r == BZ_STREAM_END) { if (r == BZ_STREAM_END) {
if (s->verbosity >= 3) if (s->verbosity >= 3)
VPrintf2 ( "\n combined CRCs: stored = 0x%x, computed = 0x%x", VPrintf2 ( "\n combined CRCs: stored = 0x%x, computed = 0x%x",
@ -820,7 +851,7 @@ int BZ_API(bzDecompress) ( bz_stream *strm )
/*---------------------------------------------------*/ /*---------------------------------------------------*/
int BZ_API(bzDecompressEnd) ( bz_stream *strm ) int BZ_API(BZ2_bzDecompressEnd) ( bz_stream *strm )
{ {
DState* s; DState* s;
if (strm == NULL) return BZ_PARAM_ERROR; if (strm == NULL) return BZ_PARAM_ERROR;
@ -874,7 +905,7 @@ static Bool myfeof ( FILE* f )
/*---------------------------------------------------*/ /*---------------------------------------------------*/
BZFILE* BZ_API(bzWriteOpen) BZFILE* BZ_API(BZ2_bzWriteOpen)
( int* bzerror, ( int* bzerror,
FILE* f, FILE* f,
int blockSize100k, int blockSize100k,
@ -909,7 +940,7 @@ BZFILE* BZ_API(bzWriteOpen)
bzf->strm.opaque = NULL; bzf->strm.opaque = NULL;
if (workFactor == 0) workFactor = 30; if (workFactor == 0) workFactor = 30;
ret = bzCompressInit ( &(bzf->strm), blockSize100k, ret = BZ2_bzCompressInit ( &(bzf->strm), blockSize100k,
verbosity, workFactor ); verbosity, workFactor );
if (ret != BZ_OK) if (ret != BZ_OK)
{ BZ_SETERR(ret); free(bzf); return NULL; }; { BZ_SETERR(ret); free(bzf); return NULL; };
@ -922,7 +953,7 @@ BZFILE* BZ_API(bzWriteOpen)
/*---------------------------------------------------*/ /*---------------------------------------------------*/
void BZ_API(bzWrite) void BZ_API(BZ2_bzWrite)
( int* bzerror, ( int* bzerror,
BZFILE* b, BZFILE* b,
void* buf, void* buf,
@ -948,7 +979,7 @@ void BZ_API(bzWrite)
while (True) { while (True) {
bzf->strm.avail_out = BZ_MAX_UNUSED; bzf->strm.avail_out = BZ_MAX_UNUSED;
bzf->strm.next_out = bzf->buf; bzf->strm.next_out = bzf->buf;
ret = bzCompress ( &(bzf->strm), BZ_RUN ); ret = BZ2_bzCompress ( &(bzf->strm), BZ_RUN );
if (ret != BZ_RUN_OK) if (ret != BZ_RUN_OK)
{ BZ_SETERR(ret); return; }; { BZ_SETERR(ret); return; };
@ -967,12 +998,26 @@ void BZ_API(bzWrite)
/*---------------------------------------------------*/ /*---------------------------------------------------*/
void BZ_API(bzWriteClose) void BZ_API(BZ2_bzWriteClose)
( int* bzerror, ( int* bzerror,
BZFILE* b, BZFILE* b,
int abandon, int abandon,
unsigned int* nbytes_in, unsigned int* nbytes_in,
unsigned int* nbytes_out ) unsigned int* nbytes_out )
{
BZ2_bzWriteClose64 ( bzerror, b, abandon,
nbytes_in, NULL, nbytes_out, NULL );
}
void BZ_API(BZ2_bzWriteClose64)
( int* bzerror,
BZFILE* b,
int abandon,
unsigned int* nbytes_in_lo32,
unsigned int* nbytes_in_hi32,
unsigned int* nbytes_out_lo32,
unsigned int* nbytes_out_hi32 )
{ {
Int32 n, n2, ret; Int32 n, n2, ret;
bzFile* bzf = (bzFile*)b; bzFile* bzf = (bzFile*)b;
@ -984,14 +1029,16 @@ void BZ_API(bzWriteClose)
if (ferror(bzf->handle)) if (ferror(bzf->handle))
{ BZ_SETERR(BZ_IO_ERROR); return; }; { BZ_SETERR(BZ_IO_ERROR); return; };
if (nbytes_in != NULL) *nbytes_in = 0; if (nbytes_in_lo32 != NULL) *nbytes_in_lo32 = 0;
if (nbytes_out != NULL) *nbytes_out = 0; if (nbytes_in_hi32 != NULL) *nbytes_in_hi32 = 0;
if (nbytes_out_lo32 != NULL) *nbytes_out_lo32 = 0;
if (nbytes_out_hi32 != NULL) *nbytes_out_hi32 = 0;
if ((!abandon) && bzf->lastErr == BZ_OK) { if ((!abandon) && bzf->lastErr == BZ_OK) {
while (True) { while (True) {
bzf->strm.avail_out = BZ_MAX_UNUSED; bzf->strm.avail_out = BZ_MAX_UNUSED;
bzf->strm.next_out = bzf->buf; bzf->strm.next_out = bzf->buf;
ret = bzCompress ( &(bzf->strm), BZ_FINISH ); ret = BZ2_bzCompress ( &(bzf->strm), BZ_FINISH );
if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END) if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
{ BZ_SETERR(ret); return; }; { BZ_SETERR(ret); return; };
@ -1013,17 +1060,23 @@ void BZ_API(bzWriteClose)
{ BZ_SETERR(BZ_IO_ERROR); return; }; { BZ_SETERR(BZ_IO_ERROR); return; };
} }
if (nbytes_in != NULL) *nbytes_in = bzf->strm.total_in; if (nbytes_in_lo32 != NULL)
if (nbytes_out != NULL) *nbytes_out = bzf->strm.total_out; *nbytes_in_lo32 = bzf->strm.total_in_lo32;
if (nbytes_in_hi32 != NULL)
*nbytes_in_hi32 = bzf->strm.total_in_hi32;
if (nbytes_out_lo32 != NULL)
*nbytes_out_lo32 = bzf->strm.total_out_lo32;
if (nbytes_out_hi32 != NULL)
*nbytes_out_hi32 = bzf->strm.total_out_hi32;
BZ_SETERR(BZ_OK); BZ_SETERR(BZ_OK);
bzCompressEnd ( &(bzf->strm) ); BZ2_bzCompressEnd ( &(bzf->strm) );
free ( bzf ); free ( bzf );
} }
/*---------------------------------------------------*/ /*---------------------------------------------------*/
BZFILE* BZ_API(bzReadOpen) BZFILE* BZ_API(BZ2_bzReadOpen)
( int* bzerror, ( int* bzerror,
FILE* f, FILE* f,
int verbosity, int verbosity,
@ -1066,7 +1119,7 @@ BZFILE* BZ_API(bzReadOpen)
nUnused--; nUnused--;
} }
ret = bzDecompressInit ( &(bzf->strm), verbosity, small ); ret = BZ2_bzDecompressInit ( &(bzf->strm), verbosity, small );
if (ret != BZ_OK) if (ret != BZ_OK)
{ BZ_SETERR(ret); free(bzf); return NULL; }; { BZ_SETERR(ret); free(bzf); return NULL; };
@ -1079,7 +1132,7 @@ BZFILE* BZ_API(bzReadOpen)
/*---------------------------------------------------*/ /*---------------------------------------------------*/
void BZ_API(bzReadClose) ( int *bzerror, BZFILE *b ) void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b )
{ {
bzFile* bzf = (bzFile*)b; bzFile* bzf = (bzFile*)b;
@ -1091,13 +1144,13 @@ void BZ_API(bzReadClose) ( int *bzerror, BZFILE *b )
{ BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
if (bzf->initialisedOk) if (bzf->initialisedOk)
(void)bzDecompressEnd ( &(bzf->strm) ); (void)BZ2_bzDecompressEnd ( &(bzf->strm) );
free ( bzf ); free ( bzf );
} }
/*---------------------------------------------------*/ /*---------------------------------------------------*/
int BZ_API(bzRead) int BZ_API(BZ2_bzRead)
( int* bzerror, ( int* bzerror,
BZFILE* b, BZFILE* b,
void* buf, void* buf,
@ -1135,7 +1188,7 @@ int BZ_API(bzRead)
bzf->strm.next_in = bzf->buf; bzf->strm.next_in = bzf->buf;
} }
ret = bzDecompress ( &(bzf->strm) ); ret = BZ2_bzDecompress ( &(bzf->strm) );
if (ret != BZ_OK && ret != BZ_STREAM_END) if (ret != BZ_OK && ret != BZ_STREAM_END)
{ BZ_SETERR(ret); return 0; }; { BZ_SETERR(ret); return 0; };
@ -1157,7 +1210,7 @@ int BZ_API(bzRead)
/*---------------------------------------------------*/ /*---------------------------------------------------*/
void BZ_API(bzReadGetUnused) void BZ_API(BZ2_bzReadGetUnused)
( int* bzerror, ( int* bzerror,
BZFILE* b, BZFILE* b,
void** unused, void** unused,
@ -1183,7 +1236,7 @@ void BZ_API(bzReadGetUnused)
/*---------------------------------------------------*/ /*---------------------------------------------------*/
/*---------------------------------------------------*/ /*---------------------------------------------------*/
int BZ_API(bzBuffToBuffCompress) int BZ_API(BZ2_bzBuffToBuffCompress)
( char* dest, ( char* dest,
unsigned int* destLen, unsigned int* destLen,
char* source, char* source,
@ -1206,7 +1259,7 @@ int BZ_API(bzBuffToBuffCompress)
strm.bzalloc = NULL; strm.bzalloc = NULL;
strm.bzfree = NULL; strm.bzfree = NULL;
strm.opaque = NULL; strm.opaque = NULL;
ret = bzCompressInit ( &strm, blockSize100k, ret = BZ2_bzCompressInit ( &strm, blockSize100k,
verbosity, workFactor ); verbosity, workFactor );
if (ret != BZ_OK) return ret; if (ret != BZ_OK) return ret;
@ -1215,27 +1268,27 @@ int BZ_API(bzBuffToBuffCompress)
strm.avail_in = sourceLen; strm.avail_in = sourceLen;
strm.avail_out = *destLen; strm.avail_out = *destLen;
ret = bzCompress ( &strm, BZ_FINISH ); ret = BZ2_bzCompress ( &strm, BZ_FINISH );
if (ret == BZ_FINISH_OK) goto output_overflow; if (ret == BZ_FINISH_OK) goto output_overflow;
if (ret != BZ_STREAM_END) goto errhandler; if (ret != BZ_STREAM_END) goto errhandler;
/* normal termination */ /* normal termination */
*destLen -= strm.avail_out; *destLen -= strm.avail_out;
bzCompressEnd ( &strm ); BZ2_bzCompressEnd ( &strm );
return BZ_OK; return BZ_OK;
output_overflow: output_overflow:
bzCompressEnd ( &strm ); BZ2_bzCompressEnd ( &strm );
return BZ_OUTBUFF_FULL; return BZ_OUTBUFF_FULL;
errhandler: errhandler:
bzCompressEnd ( &strm ); BZ2_bzCompressEnd ( &strm );
return ret; return ret;
} }
/*---------------------------------------------------*/ /*---------------------------------------------------*/
int BZ_API(bzBuffToBuffDecompress) int BZ_API(BZ2_bzBuffToBuffDecompress)
( char* dest, ( char* dest,
unsigned int* destLen, unsigned int* destLen,
char* source, char* source,
@ -1255,7 +1308,7 @@ int BZ_API(bzBuffToBuffDecompress)
strm.bzalloc = NULL; strm.bzalloc = NULL;
strm.bzfree = NULL; strm.bzfree = NULL;
strm.opaque = NULL; strm.opaque = NULL;
ret = bzDecompressInit ( &strm, verbosity, small ); ret = BZ2_bzDecompressInit ( &strm, verbosity, small );
if (ret != BZ_OK) return ret; if (ret != BZ_OK) return ret;
strm.next_in = source; strm.next_in = source;
@ -1263,26 +1316,26 @@ int BZ_API(bzBuffToBuffDecompress)
strm.avail_in = sourceLen; strm.avail_in = sourceLen;
strm.avail_out = *destLen; strm.avail_out = *destLen;
ret = bzDecompress ( &strm ); ret = BZ2_bzDecompress ( &strm );
if (ret == BZ_OK) goto output_overflow_or_eof; if (ret == BZ_OK) goto output_overflow_or_eof;
if (ret != BZ_STREAM_END) goto errhandler; if (ret != BZ_STREAM_END) goto errhandler;
/* normal termination */ /* normal termination */
*destLen -= strm.avail_out; *destLen -= strm.avail_out;
bzDecompressEnd ( &strm ); BZ2_bzDecompressEnd ( &strm );
return BZ_OK; return BZ_OK;
output_overflow_or_eof: output_overflow_or_eof:
if (strm.avail_out > 0) { if (strm.avail_out > 0) {
bzDecompressEnd ( &strm ); BZ2_bzDecompressEnd ( &strm );
return BZ_UNEXPECTED_EOF; return BZ_UNEXPECTED_EOF;
} else { } else {
bzDecompressEnd ( &strm ); BZ2_bzDecompressEnd ( &strm );
return BZ_OUTBUFF_FULL; return BZ_OUTBUFF_FULL;
}; };
errhandler: errhandler:
bzDecompressEnd ( &strm ); BZ2_bzDecompressEnd ( &strm );
return ret; return ret;
} }
@ -1303,7 +1356,7 @@ int BZ_API(bzBuffToBuffDecompress)
/*-- /*--
return version like "0.9.0c". return version like "0.9.0c".
--*/ --*/
const char * BZ_API(bzlibVersion)(void) const char * BZ_API(BZ2_bzlibVersion)(void)
{ {
return BZ_VERSION; return BZ_VERSION;
} }
@ -1377,9 +1430,11 @@ BZFILE * bzopen_or_bzdopen
/* Guard against total chaos and anarchy -- JRS */ /* Guard against total chaos and anarchy -- JRS */
if (blockSize100k < 1) blockSize100k = 1; if (blockSize100k < 1) blockSize100k = 1;
if (blockSize100k > 9) blockSize100k = 9; if (blockSize100k > 9) blockSize100k = 9;
bzfp = bzWriteOpen(&bzerr,fp,blockSize100k,verbosity,workFactor); bzfp = BZ2_bzWriteOpen(&bzerr,fp,blockSize100k,
verbosity,workFactor);
} else { } else {
bzfp = bzReadOpen(&bzerr,fp,verbosity,smallMode,unused,nUnused); bzfp = BZ2_bzReadOpen(&bzerr,fp,verbosity,smallMode,
unused,nUnused);
} }
if (bzfp == NULL) { if (bzfp == NULL) {
if (fp != stdin && fp != stdout) fclose(fp); if (fp != stdin && fp != stdout) fclose(fp);
@ -1395,7 +1450,7 @@ BZFILE * bzopen_or_bzdopen
ex) bzopen("file","w9") ex) bzopen("file","w9")
case path="" or NULL => use stdin or stdout. case path="" or NULL => use stdin or stdout.
--*/ --*/
BZFILE * BZ_API(bzopen) BZFILE * BZ_API(BZ2_bzopen)
( const char *path, ( const char *path,
const char *mode ) const char *mode )
{ {
@ -1404,7 +1459,7 @@ BZFILE * BZ_API(bzopen)
/*---------------------------------------------------*/ /*---------------------------------------------------*/
BZFILE * BZ_API(bzdopen) BZFILE * BZ_API(BZ2_bzdopen)
( int fd, ( int fd,
const char *mode ) const char *mode )
{ {
@ -1413,11 +1468,11 @@ BZFILE * BZ_API(bzdopen)
/*---------------------------------------------------*/ /*---------------------------------------------------*/
int BZ_API(bzread) (BZFILE* b, void* buf, int len ) int BZ_API(BZ2_bzread) (BZFILE* b, void* buf, int len )
{ {
int bzerr, nread; int bzerr, nread;
if (((bzFile*)b)->lastErr == BZ_STREAM_END) return 0; if (((bzFile*)b)->lastErr == BZ_STREAM_END) return 0;
nread = bzRead(&bzerr,b,buf,len); nread = BZ2_bzRead(&bzerr,b,buf,len);
if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) { if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) {
return nread; return nread;
} else { } else {
@ -1427,11 +1482,11 @@ int BZ_API(bzread) (BZFILE* b, void* buf, int len )
/*---------------------------------------------------*/ /*---------------------------------------------------*/
int BZ_API(bzwrite) (BZFILE* b, void* buf, int len ) int BZ_API(BZ2_bzwrite) (BZFILE* b, void* buf, int len )
{ {
int bzerr; int bzerr;
bzWrite(&bzerr,b,buf,len); BZ2_bzWrite(&bzerr,b,buf,len);
if(bzerr == BZ_OK){ if(bzerr == BZ_OK){
return len; return len;
}else{ }else{
@ -1441,7 +1496,7 @@ int BZ_API(bzwrite) (BZFILE* b, void* buf, int len )
/*---------------------------------------------------*/ /*---------------------------------------------------*/
int BZ_API(bzflush) (BZFILE *b) int BZ_API(BZ2_bzflush) (BZFILE *b)
{ {
/* do nothing now... */ /* do nothing now... */
return 0; return 0;
@ -1449,19 +1504,19 @@ int BZ_API(bzflush) (BZFILE *b)
/*---------------------------------------------------*/ /*---------------------------------------------------*/
void BZ_API(bzclose) (BZFILE* b) void BZ_API(BZ2_bzclose) (BZFILE* b)
{ {
int bzerr; int bzerr;
FILE *fp = ((bzFile *)b)->handle; FILE *fp = ((bzFile *)b)->handle;
if (b==NULL) {return;} if (b==NULL) {return;}
if(((bzFile*)b)->writing){ if(((bzFile*)b)->writing){
bzWriteClose(&bzerr,b,0,NULL,NULL); BZ2_bzWriteClose(&bzerr,b,0,NULL,NULL);
if(bzerr != BZ_OK){ if(bzerr != BZ_OK){
bzWriteClose(NULL,b,1,NULL,NULL); BZ2_bzWriteClose(NULL,b,1,NULL,NULL);
} }
}else{ }else{
bzReadClose(&bzerr,b); BZ2_bzReadClose(&bzerr,b);
} }
if(fp!=stdin && fp!=stdout){ if(fp!=stdin && fp!=stdout){
fclose(fp); fclose(fp);
@ -1483,6 +1538,7 @@ static char *bzerrorstrings[] = {
,"IO_ERROR" ,"IO_ERROR"
,"UNEXPECTED_EOF" ,"UNEXPECTED_EOF"
,"OUTBUFF_FULL" ,"OUTBUFF_FULL"
,"CONFIG_ERROR"
,"???" /* for future */ ,"???" /* for future */
,"???" /* for future */ ,"???" /* for future */
,"???" /* for future */ ,"???" /* for future */
@ -1492,7 +1548,7 @@ static char *bzerrorstrings[] = {
}; };
const char * BZ_API(bzerror) (BZFILE *b, int *errnum) const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum)
{ {
int err = ((bzFile *)b)->lastErr; int err = ((bzFile *)b)->lastErr;

67
bzlib.h
View File

@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression. library for lossless, block-sorting data compression.
Copyright (C) 1996-1999 Julian R Seward. All rights reserved. Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions modification, are permitted provided that the following conditions
@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK. Julian Seward, Cambridge, UK.
jseward@acm.org jseward@acm.org
bzip2/libbzip2 version 0.9.5 of 24 May 1999 bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of: This program is based on (at least) the work of:
Mike Burrows Mike Burrows
@ -83,16 +83,19 @@ extern "C" {
#define BZ_IO_ERROR (-6) #define BZ_IO_ERROR (-6)
#define BZ_UNEXPECTED_EOF (-7) #define BZ_UNEXPECTED_EOF (-7)
#define BZ_OUTBUFF_FULL (-8) #define BZ_OUTBUFF_FULL (-8)
#define BZ_CONFIG_ERROR (-9)
typedef typedef
struct { struct {
char *next_in; char *next_in;
unsigned int avail_in; unsigned int avail_in;
unsigned int total_in; unsigned int total_in_lo32;
unsigned int total_in_hi32;
char *next_out; char *next_out;
unsigned int avail_out; unsigned int avail_out;
unsigned int total_out; unsigned int total_out_lo32;
unsigned int total_out_hi32;
void *state; void *state;
@ -130,33 +133,33 @@ typedef
/*-- Core (low-level) library functions --*/ /*-- Core (low-level) library functions --*/
BZ_EXTERN int BZ_API(bzCompressInit) ( BZ_EXTERN int BZ_API(BZ2_bzCompressInit) (
bz_stream* strm, bz_stream* strm,
int blockSize100k, int blockSize100k,
int verbosity, int verbosity,
int workFactor int workFactor
); );
BZ_EXTERN int BZ_API(bzCompress) ( BZ_EXTERN int BZ_API(BZ2_bzCompress) (
bz_stream* strm, bz_stream* strm,
int action int action
); );
BZ_EXTERN int BZ_API(bzCompressEnd) ( BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) (
bz_stream* strm bz_stream* strm
); );
BZ_EXTERN int BZ_API(bzDecompressInit) ( BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) (
bz_stream *strm, bz_stream *strm,
int verbosity, int verbosity,
int small int small
); );
BZ_EXTERN int BZ_API(bzDecompress) ( BZ_EXTERN int BZ_API(BZ2_bzDecompress) (
bz_stream* strm bz_stream* strm
); );
BZ_EXTERN int BZ_API(bzDecompressEnd) ( BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) (
bz_stream *strm bz_stream *strm
); );
@ -169,7 +172,7 @@ BZ_EXTERN int BZ_API(bzDecompressEnd) (
typedef void BZFILE; typedef void BZFILE;
BZ_EXTERN BZFILE* BZ_API(bzReadOpen) ( BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) (
int* bzerror, int* bzerror,
FILE* f, FILE* f,
int verbosity, int verbosity,
@ -178,26 +181,26 @@ BZ_EXTERN BZFILE* BZ_API(bzReadOpen) (
int nUnused int nUnused
); );
BZ_EXTERN void BZ_API(bzReadClose) ( BZ_EXTERN void BZ_API(BZ2_bzReadClose) (
int* bzerror, int* bzerror,
BZFILE* b BZFILE* b
); );
BZ_EXTERN void BZ_API(bzReadGetUnused) ( BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) (
int* bzerror, int* bzerror,
BZFILE* b, BZFILE* b,
void** unused, void** unused,
int* nUnused int* nUnused
); );
BZ_EXTERN int BZ_API(bzRead) ( BZ_EXTERN int BZ_API(BZ2_bzRead) (
int* bzerror, int* bzerror,
BZFILE* b, BZFILE* b,
void* buf, void* buf,
int len int len
); );
BZ_EXTERN BZFILE* BZ_API(bzWriteOpen) ( BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) (
int* bzerror, int* bzerror,
FILE* f, FILE* f,
int blockSize100k, int blockSize100k,
@ -205,26 +208,36 @@ BZ_EXTERN BZFILE* BZ_API(bzWriteOpen) (
int workFactor int workFactor
); );
BZ_EXTERN void BZ_API(bzWrite) ( BZ_EXTERN void BZ_API(BZ2_bzWrite) (
int* bzerror, int* bzerror,
BZFILE* b, BZFILE* b,
void* buf, void* buf,
int len int len
); );
BZ_EXTERN void BZ_API(bzWriteClose) ( BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
int* bzerror, int* bzerror,
BZFILE* b, BZFILE* b,
int abandon, int abandon,
unsigned int* nbytes_in, unsigned int* nbytes_in,
unsigned int* nbytes_out unsigned int* nbytes_out
); );
BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
int* bzerror,
BZFILE* b,
int abandon,
unsigned int* nbytes_in_lo32,
unsigned int* nbytes_in_hi32,
unsigned int* nbytes_out_lo32,
unsigned int* nbytes_out_hi32
);
#endif #endif
/*-- Utility functions --*/ /*-- Utility functions --*/
BZ_EXTERN int BZ_API(bzBuffToBuffCompress) ( BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) (
char* dest, char* dest,
unsigned int* destLen, unsigned int* destLen,
char* source, char* source,
@ -234,7 +247,7 @@ BZ_EXTERN int BZ_API(bzBuffToBuffCompress) (
int workFactor int workFactor
); );
BZ_EXTERN int BZ_API(bzBuffToBuffDecompress) ( BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) (
char* dest, char* dest,
unsigned int* destLen, unsigned int* destLen,
char* source, char* source,
@ -254,42 +267,42 @@ BZ_EXTERN int BZ_API(bzBuffToBuffDecompress) (
If this code breaks, please contact both Yoshioka and me. If this code breaks, please contact both Yoshioka and me.
--*/ --*/
BZ_EXTERN const char * BZ_API(bzlibVersion) ( BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) (
void void
); );
#ifndef BZ_NO_STDIO #ifndef BZ_NO_STDIO
BZ_EXTERN BZFILE * BZ_API(bzopen) ( BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) (
const char *path, const char *path,
const char *mode const char *mode
); );
BZ_EXTERN BZFILE * BZ_API(bzdopen) ( BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) (
int fd, int fd,
const char *mode const char *mode
); );
BZ_EXTERN int BZ_API(bzread) ( BZ_EXTERN int BZ_API(BZ2_bzread) (
BZFILE* b, BZFILE* b,
void* buf, void* buf,
int len int len
); );
BZ_EXTERN int BZ_API(bzwrite) ( BZ_EXTERN int BZ_API(BZ2_bzwrite) (
BZFILE* b, BZFILE* b,
void* buf, void* buf,
int len int len
); );
BZ_EXTERN int BZ_API(bzflush) ( BZ_EXTERN int BZ_API(BZ2_bzflush) (
BZFILE* b BZFILE* b
); );
BZ_EXTERN void BZ_API(bzclose) ( BZ_EXTERN void BZ_API(BZ2_bzclose) (
BZFILE* b BZFILE* b
); );
BZ_EXTERN const char * BZ_API(bzerror) ( BZ_EXTERN const char * BZ_API(BZ2_bzerror) (
BZFILE *b, BZFILE *b,
int *errnum int *errnum
); );

View File

@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression. library for lossless, block-sorting data compression.
Copyright (C) 1996-1999 Julian R Seward. All rights reserved. Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions modification, are permitted provided that the following conditions
@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK. Julian Seward, Cambridge, UK.
jseward@acm.org jseward@acm.org
bzip2/libbzip2 version 0.9.5 of 24 May 1999 bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of: This program is based on (at least) the work of:
Mike Burrows Mike Burrows
@ -76,7 +76,7 @@
/*-- General stuff. --*/ /*-- General stuff. --*/
#define BZ_VERSION "0.9.5d" #define BZ_VERSION "1.0.1, 23-June-2000"
typedef char Char; typedef char Char;
typedef unsigned char Bool; typedef unsigned char Bool;
@ -94,9 +94,9 @@ typedef unsigned short UInt16;
#endif #endif
#ifndef BZ_NO_STDIO #ifndef BZ_NO_STDIO
extern void bz__AssertH__fail ( int errcode ); extern void BZ2_bz__AssertH__fail ( int errcode );
#define AssertH(cond,errcode) \ #define AssertH(cond,errcode) \
{ if (!(cond)) bz__AssertH__fail ( errcode ); } { if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); }
#if BZ_DEBUG #if BZ_DEBUG
#define AssertD(cond,msg) \ #define AssertD(cond,msg) \
{ if (!(cond)) { \ { if (!(cond)) { \
@ -155,7 +155,7 @@ extern void bz_internal_error ( int errcode );
/*-- Stuff for randomising repetitive blocks. --*/ /*-- Stuff for randomising repetitive blocks. --*/
extern Int32 rNums[512]; extern Int32 BZ2_rNums[512];
#define BZ_RAND_DECLS \ #define BZ_RAND_DECLS \
Int32 rNToGo; \ Int32 rNToGo; \
@ -169,7 +169,7 @@ extern Int32 rNums[512];
#define BZ_RAND_UPD_MASK \ #define BZ_RAND_UPD_MASK \
if (s->rNToGo == 0) { \ if (s->rNToGo == 0) { \
s->rNToGo = rNums[s->rTPos]; \ s->rNToGo = BZ2_rNums[s->rTPos]; \
s->rTPos++; \ s->rTPos++; \
if (s->rTPos == 512) s->rTPos = 0; \ if (s->rTPos == 512) s->rTPos = 0; \
} \ } \
@ -179,7 +179,7 @@ extern Int32 rNums[512];
/*-- Stuff for doing CRCs. --*/ /*-- Stuff for doing CRCs. --*/
extern UInt32 crc32Table[256]; extern UInt32 BZ2_crc32Table[256];
#define BZ_INITIALISE_CRC(crcVar) \ #define BZ_INITIALISE_CRC(crcVar) \
{ \ { \
@ -194,7 +194,7 @@ extern UInt32 crc32Table[256];
#define BZ_UPDATE_CRC(crcVar,cha) \ #define BZ_UPDATE_CRC(crcVar,cha) \
{ \ { \
crcVar = (crcVar << 8) ^ \ crcVar = (crcVar << 8) ^ \
crc32Table[(crcVar >> 24) ^ \ BZ2_crc32Table[(crcVar >> 24) ^ \
((UChar)cha)]; \ ((UChar)cha)]; \
} }
@ -241,7 +241,7 @@ typedef
/* aliases for arr1 and arr2 */ /* aliases for arr1 and arr2 */
UInt32* ptr; UInt32* ptr;
UInt16* block; UChar* block;
UInt16* mtfv; UInt16* mtfv;
UChar* zbits; UChar* zbits;
@ -285,7 +285,9 @@ typedef
UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; Int32 rfreq [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
/* second dimension: only 3 needed; 4 makes index calculations faster */
UInt32 len_pack[BZ_MAX_ALPHA_SIZE][4];
} }
EState; EState;
@ -295,19 +297,19 @@ typedef
/*-- externs for compression. --*/ /*-- externs for compression. --*/
extern void extern void
blockSort ( EState* ); BZ2_blockSort ( EState* );
extern void extern void
compressBlock ( EState*, Bool ); BZ2_compressBlock ( EState*, Bool );
extern void extern void
bsInitWrite ( EState* ); BZ2_bsInitWrite ( EState* );
extern void extern void
hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 ); BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 );
extern void extern void
hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 ); BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 );
@ -494,20 +496,20 @@ typedef
(((UInt32)s->ll16[i]) | (GET_LL4(i) << 16)) (((UInt32)s->ll16[i]) | (GET_LL4(i) << 16))
#define BZ_GET_SMALL(cccc) \ #define BZ_GET_SMALL(cccc) \
cccc = indexIntoF ( s->tPos, s->cftab ); \ cccc = BZ2_indexIntoF ( s->tPos, s->cftab ); \
s->tPos = GET_LL(s->tPos); s->tPos = GET_LL(s->tPos);
/*-- externs for decompression. --*/ /*-- externs for decompression. --*/
extern Int32 extern Int32
indexIntoF ( Int32, Int32* ); BZ2_indexIntoF ( Int32, Int32* );
extern Int32 extern Int32
decompress ( DState* ); BZ2_decompress ( DState* );
extern void extern void
hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*, BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*,
Int32, Int32, Int32 ); Int32, Int32, Int32 );

View File

@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression. library for lossless, block-sorting data compression.
Copyright (C) 1996-1999 Julian R Seward. All rights reserved. Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions modification, are permitted provided that the following conditions
@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK. Julian Seward, Cambridge, UK.
jseward@acm.org jseward@acm.org
bzip2/libbzip2 version 0.9.5 of 24 May 1999 bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of: This program is based on (at least) the work of:
Mike Burrows Mike Burrows
@ -78,7 +78,7 @@
/*---------------------------------------------------*/ /*---------------------------------------------------*/
/*---------------------------------------------------*/ /*---------------------------------------------------*/
void bsInitWrite ( EState* s ) void BZ2_bsInitWrite ( EState* s )
{ {
s->bsLive = 0; s->bsLive = 0;
s->bsBuff = 0; s->bsBuff = 0;
@ -113,6 +113,7 @@ void bsFinishWrite ( EState* s )
/*---------------------------------------------------*/ /*---------------------------------------------------*/
static static
__inline__
void bsW ( EState* s, Int32 n, UInt32 v ) void bsW ( EState* s, Int32 n, UInt32 v )
{ {
bsNEEDW ( n ); bsNEEDW ( n );
@ -164,8 +165,6 @@ void generateMTFValues ( EState* s )
{ {
UChar yy[256]; UChar yy[256];
Int32 i, j; Int32 i, j;
UChar tmp;
UChar tmp2;
Int32 zPend; Int32 zPend;
Int32 wr; Int32 wr;
Int32 EOB; Int32 EOB;
@ -174,7 +173,7 @@ void generateMTFValues ( EState* s )
After sorting (eg, here), After sorting (eg, here),
s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
and and
((UInt16*)s->arr2) [ 0 .. s->nblock-1 ] [15:8] ((UChar*)s->arr2) [ 0 .. s->nblock-1 ]
holds the original block data. holds the original block data.
The first thing to do is generate the MTF values, The first thing to do is generate the MTF values,
@ -186,14 +185,14 @@ void generateMTFValues ( EState* s )
The final compressed bitstream is generated into the The final compressed bitstream is generated into the
area starting at area starting at
(UChar*) (&((UInt16)s->arr2)[s->nblock]) (UChar*) (&((UChar*)s->arr2)[s->nblock])
These storage aliases are set up in bzCompressInit(), These storage aliases are set up in bzCompressInit(),
except for the last one, which is arranged in except for the last one, which is arranged in
compressBlock(). compressBlock().
*/ */
UInt32* ptr = s->ptr; UInt32* ptr = s->ptr;
UInt16* block = s->block; UChar* block = s->block;
UInt16* mtfv = s->mtfv; UInt16* mtfv = s->mtfv;
makeMaps_e ( s ); makeMaps_e ( s );
@ -207,27 +206,14 @@ void generateMTFValues ( EState* s )
for (i = 0; i < s->nblock; i++) { for (i = 0; i < s->nblock; i++) {
UChar ll_i; UChar ll_i;
AssertD ( wr <= i, "generateMTFValues(1)" ); AssertD ( wr <= i, "generateMTFValues(1)" );
j = ptr[i]-1; if (j < 0) j += s->nblock; j = ptr[i]-1; if (j < 0) j += s->nblock;
ll_i = s->unseqToSeq[block[j] >> 8]; ll_i = s->unseqToSeq[block[j]];
AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
tmp = yy[0]; if (yy[0] == ll_i) {
if (tmp == ll_i) {
zPend++; zPend++;
} else { } else {
tmp2 = tmp;
tmp = yy[1];
yy[1] = tmp2;
j = 1;
while ( ll_i != tmp ) {
j++;
tmp2 = tmp;
tmp = yy[j];
yy[j] = tmp2;
};
yy[0] = tmp;
if (zPend > 0) { if (zPend > 0) {
zPend--; zPend--;
@ -244,8 +230,27 @@ void generateMTFValues ( EState* s )
}; };
zPend = 0; zPend = 0;
} }
{
register UChar rtmp;
register UChar* ryy_j;
register UChar rll_i;
rtmp = yy[1];
yy[1] = yy[0];
ryy_j = &(yy[1]);
rll_i = ll_i;
while ( rll_i != rtmp ) {
register UChar rtmp2;
ryy_j++;
rtmp2 = rtmp;
rtmp = *ryy_j;
*ryy_j = rtmp2;
};
yy[0] = rtmp;
j = ryy_j - &(yy[0]);
mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
} }
}
} }
if (zPend > 0) { if (zPend > 0) {
@ -261,6 +266,7 @@ void generateMTFValues ( EState* s )
if (zPend < 2) break; if (zPend < 2) break;
zPend = (zPend - 2) / 2; zPend = (zPend - 2) / 2;
}; };
zPend = 0;
} }
mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
@ -365,6 +371,18 @@ void sendMTFValues ( EState* s )
for (v = 0; v < alphaSize; v++) for (v = 0; v < alphaSize; v++)
s->rfreq[t][v] = 0; s->rfreq[t][v] = 0;
/*---
Set up an auxiliary length table which is used to fast-track
the common case (nGroups == 6).
---*/
if (nGroups == 6) {
for (v = 0; v < alphaSize; v++) {
s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
}
}
nSelectors = 0; nSelectors = 0;
totc = 0; totc = 0;
gs = 0; gs = 0;
@ -381,21 +399,37 @@ void sendMTFValues ( EState* s )
--*/ --*/
for (t = 0; t < nGroups; t++) cost[t] = 0; for (t = 0; t < nGroups; t++) cost[t] = 0;
if (nGroups == 6) { if (nGroups == 6 && 50 == ge-gs+1) {
register UInt16 cost0, cost1, cost2, cost3, cost4, cost5; /*--- fast track the common case ---*/
cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0; register UInt32 cost01, cost23, cost45;
for (i = gs; i <= ge; i++) { register UInt16 icv;
UInt16 icv = mtfv[i]; cost01 = cost23 = cost45 = 0;
cost0 += s->len[0][icv];
cost1 += s->len[1][icv]; # define BZ_ITER(nn) \
cost2 += s->len[2][icv]; icv = mtfv[gs+(nn)]; \
cost3 += s->len[3][icv]; cost01 += s->len_pack[icv][0]; \
cost4 += s->len[4][icv]; cost23 += s->len_pack[icv][1]; \
cost5 += s->len[5][icv]; cost45 += s->len_pack[icv][2]; \
}
cost[0] = cost0; cost[1] = cost1; cost[2] = cost2; BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
cost[3] = cost3; cost[4] = cost4; cost[5] = cost5; BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
# undef BZ_ITER
cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
} else { } else {
/*--- slow version which correctly handles all situations ---*/
for (i = gs; i <= ge; i++) { for (i = gs; i <= ge; i++) {
UInt16 icv = mtfv[i]; UInt16 icv = mtfv[i];
for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
@ -417,8 +451,29 @@ void sendMTFValues ( EState* s )
/*-- /*--
Increment the symbol frequencies for the selected table. Increment the symbol frequencies for the selected table.
--*/ --*/
if (nGroups == 6 && 50 == ge-gs+1) {
/*--- fast track the common case ---*/
# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
# undef BZ_ITUR
} else {
/*--- slow version which correctly handles all situations ---*/
for (i = gs; i <= ge; i++) for (i = gs; i <= ge; i++)
s->rfreq[bt][ mtfv[i] ]++; s->rfreq[bt][ mtfv[i] ]++;
}
gs = ge+1; gs = ge+1;
} }
@ -434,7 +489,7 @@ void sendMTFValues ( EState* s )
Recompute the tables based on the accumulated frequencies. Recompute the tables based on the accumulated frequencies.
--*/ --*/
for (t = 0; t < nGroups; t++) for (t = 0; t < nGroups; t++)
hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
alphaSize, 20 ); alphaSize, 20 );
} }
@ -474,7 +529,7 @@ void sendMTFValues ( EState* s )
} }
AssertH ( !(maxLen > 20), 3004 ); AssertH ( !(maxLen > 20), 3004 );
AssertH ( !(minLen < 1), 3005 ); AssertH ( !(minLen < 1), 3005 );
hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
minLen, maxLen, alphaSize ); minLen, maxLen, alphaSize );
} }
@ -536,12 +591,44 @@ void sendMTFValues ( EState* s )
if (gs >= s->nMTF) break; if (gs >= s->nMTF) break;
ge = gs + BZ_G_SIZE - 1; ge = gs + BZ_G_SIZE - 1;
if (ge >= s->nMTF) ge = s->nMTF-1; if (ge >= s->nMTF) ge = s->nMTF-1;
for (i = gs; i <= ge; i++) {
AssertH ( s->selector[selCtr] < nGroups, 3006 ); AssertH ( s->selector[selCtr] < nGroups, 3006 );
if (nGroups == 6 && 50 == ge-gs+1) {
/*--- fast track the common case ---*/
UInt16 mtfv_i;
UChar* s_len_sel_selCtr
= &(s->len[s->selector[selCtr]][0]);
Int32* s_code_sel_selCtr
= &(s->code[s->selector[selCtr]][0]);
# define BZ_ITAH(nn) \
mtfv_i = mtfv[gs+(nn)]; \
bsW ( s, \
s_len_sel_selCtr[mtfv_i], \
s_code_sel_selCtr[mtfv_i] )
BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
# undef BZ_ITAH
} else {
/*--- slow version which correctly handles all situations ---*/
for (i = gs; i <= ge; i++) {
bsW ( s, bsW ( s,
s->len [s->selector[selCtr]] [mtfv[i]], s->len [s->selector[selCtr]] [mtfv[i]],
s->code [s->selector[selCtr]] [mtfv[i]] ); s->code [s->selector[selCtr]] [mtfv[i]] );
} }
}
gs = ge+1; gs = ge+1;
selCtr++; selCtr++;
@ -554,7 +641,7 @@ void sendMTFValues ( EState* s )
/*---------------------------------------------------*/ /*---------------------------------------------------*/
void compressBlock ( EState* s, Bool is_last_block ) void BZ2_compressBlock ( EState* s, Bool is_last_block )
{ {
if (s->nblock > 0) { if (s->nblock > 0) {
@ -568,14 +655,14 @@ void compressBlock ( EState* s, Bool is_last_block )
"combined CRC = 0x%8x, size = %d\n", "combined CRC = 0x%8x, size = %d\n",
s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
blockSort ( s ); BZ2_blockSort ( s );
} }
s->zbits = (UChar*) (&((UInt16*)s->arr2)[s->nblock]); s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
/*-- If this is the first block, create the stream header. --*/ /*-- If this is the first block, create the stream header. --*/
if (s->blockNo == 1) { if (s->blockNo == 1) {
bsInitWrite ( s ); BZ2_bsInitWrite ( s );
bsPutUChar ( s, 'B' ); bsPutUChar ( s, 'B' );
bsPutUChar ( s, 'Z' ); bsPutUChar ( s, 'Z' );
bsPutUChar ( s, 'h' ); bsPutUChar ( s, 'h' );

View File

@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression. library for lossless, block-sorting data compression.
Copyright (C) 1996-1999 Julian R Seward. All rights reserved. Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions modification, are permitted provided that the following conditions
@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK. Julian Seward, Cambridge, UK.
jseward@acm.org jseward@acm.org
bzip2/libbzip2 version 0.9.5 of 24 May 1999 bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of: This program is based on (at least) the work of:
Mike Burrows Mike Burrows
@ -68,7 +68,7 @@
comp.compression FAQ. comp.compression FAQ.
--*/ --*/
UInt32 crc32Table[256] = { UInt32 BZ2_crc32Table[256] = {
/*-- Ugly, innit? --*/ /*-- Ugly, innit? --*/

View File

@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression. library for lossless, block-sorting data compression.
Copyright (C) 1996-1999 Julian R Seward. All rights reserved. Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions modification, are permitted provided that the following conditions
@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK. Julian Seward, Cambridge, UK.
jseward@acm.org jseward@acm.org
bzip2/libbzip2 version 0.9.5 of 24 May 1999 bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of: This program is based on (at least) the work of:
Mike Burrows Mike Burrows
@ -99,7 +99,9 @@ void makeMaps_d ( DState* s )
s->bsLive += 8; \ s->bsLive += 8; \
s->strm->next_in++; \ s->strm->next_in++; \
s->strm->avail_in--; \ s->strm->avail_in--; \
s->strm->total_in++; \ s->strm->total_in_lo32++; \
if (s->strm->total_in_lo32 == 0) \
s->strm->total_in_hi32++; \
} }
#define GET_UCHAR(lll,uuu) \ #define GET_UCHAR(lll,uuu) \
@ -113,6 +115,8 @@ void makeMaps_d ( DState* s )
{ \ { \
if (groupPos == 0) { \ if (groupPos == 0) { \
groupNo++; \ groupNo++; \
if (groupNo >= nSelectors) \
RETURN(BZ_DATA_ERROR); \
groupPos = BZ_G_SIZE; \ groupPos = BZ_G_SIZE; \
gSel = s->selector[groupNo]; \ gSel = s->selector[groupNo]; \
gMinlen = s->minLens[gSel]; \ gMinlen = s->minLens[gSel]; \
@ -123,17 +127,23 @@ void makeMaps_d ( DState* s )
groupPos--; \ groupPos--; \
zn = gMinlen; \ zn = gMinlen; \
GET_BITS(label1, zvec, zn); \ GET_BITS(label1, zvec, zn); \
while (zvec > gLimit[zn]) { \ while (1) { \
if (zn > 20 /* the longest code */) \
RETURN(BZ_DATA_ERROR); \
if (zvec <= gLimit[zn]) break; \
zn++; \ zn++; \
GET_BIT(label2, zj); \ GET_BIT(label2, zj); \
zvec = (zvec << 1) | zj; \ zvec = (zvec << 1) | zj; \
}; \ }; \
if (zvec - gBase[zn] < 0 \
|| zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
RETURN(BZ_DATA_ERROR); \
lval = gPerm[zvec - gBase[zn]]; \ lval = gPerm[zvec - gBase[zn]]; \
} }
/*---------------------------------------------------*/ /*---------------------------------------------------*/
Int32 decompress ( DState* s ) Int32 BZ2_decompress ( DState* s )
{ {
UChar uc; UChar uc;
Int32 retVal; Int32 retVal;
@ -288,6 +298,11 @@ Int32 decompress ( DState* s )
GET_UCHAR(BZ_X_ORIGPTR_3, uc); GET_UCHAR(BZ_X_ORIGPTR_3, uc);
s->origPtr = (s->origPtr << 8) | ((Int32)uc); s->origPtr = (s->origPtr << 8) | ((Int32)uc);
if (s->origPtr < 0)
RETURN(BZ_DATA_ERROR);
if (s->origPtr > 10 + 100000*s->blockSize100k)
RETURN(BZ_DATA_ERROR);
/*--- Receive the mapping table ---*/ /*--- Receive the mapping table ---*/
for (i = 0; i < 16; i++) { for (i = 0; i < 16; i++) {
GET_BIT(BZ_X_MAPPING_1, uc); GET_BIT(BZ_X_MAPPING_1, uc);
@ -305,18 +320,21 @@ Int32 decompress ( DState* s )
if (uc == 1) s->inUse[i * 16 + j] = True; if (uc == 1) s->inUse[i * 16 + j] = True;
} }
makeMaps_d ( s ); makeMaps_d ( s );
if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
alphaSize = s->nInUse+2; alphaSize = s->nInUse+2;
/*--- Now the selectors ---*/ /*--- Now the selectors ---*/
GET_BITS(BZ_X_SELECTOR_1, nGroups, 3); GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR);
GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15); GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
for (i = 0; i < nSelectors; i++) { for (i = 0; i < nSelectors; i++) {
j = 0; j = 0;
while (True) { while (True) {
GET_BIT(BZ_X_SELECTOR_3, uc); GET_BIT(BZ_X_SELECTOR_3, uc);
if (uc == 0) break; if (uc == 0) break;
j++; j++;
if (j > 5) RETURN(BZ_DATA_ERROR); if (j >= nGroups) RETURN(BZ_DATA_ERROR);
} }
s->selectorMtf[i] = j; s->selectorMtf[i] = j;
} }
@ -358,7 +376,7 @@ Int32 decompress ( DState* s )
if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
if (s->len[t][i] < minLen) minLen = s->len[t][i]; if (s->len[t][i] < minLen) minLen = s->len[t][i];
} }
hbCreateDecodeTables ( BZ2_hbCreateDecodeTables (
&(s->limit[t][0]), &(s->limit[t][0]),
&(s->base[t][0]), &(s->base[t][0]),
&(s->perm[t][0]), &(s->perm[t][0]),
@ -392,7 +410,6 @@ Int32 decompress ( DState* s )
/*-- end MTF init --*/ /*-- end MTF init --*/
nblock = 0; nblock = 0;
GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym); GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
while (True) { while (True) {
@ -417,23 +434,24 @@ Int32 decompress ( DState* s )
if (s->smallDecompress) if (s->smallDecompress)
while (es > 0) { while (es > 0) {
if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
s->ll16[nblock] = (UInt16)uc; s->ll16[nblock] = (UInt16)uc;
nblock++; nblock++;
es--; es--;
} }
else else
while (es > 0) { while (es > 0) {
if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
s->tt[nblock] = (UInt32)uc; s->tt[nblock] = (UInt32)uc;
nblock++; nblock++;
es--; es--;
}; };
if (nblock > nblockMAX) RETURN(BZ_DATA_ERROR);
continue; continue;
} else { } else {
if (nblock > nblockMAX) RETURN(BZ_DATA_ERROR); if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
/*-- uc = MTF ( nextSym-1 ) --*/ /*-- uc = MTF ( nextSym-1 ) --*/
{ {
@ -500,6 +518,12 @@ Int32 decompress ( DState* s )
} }
} }
/* Now we know what nblock is, we can do a better sanity
check on s->origPtr.
*/
if (s->origPtr < 0 || s->origPtr >= nblock)
RETURN(BZ_DATA_ERROR);
s->state_out_len = 0; s->state_out_len = 0;
s->state_out_ch = 0; s->state_out_ch = 0;
BZ_INITIALISE_CRC ( s->calculatedBlockCRC ); BZ_INITIALISE_CRC ( s->calculatedBlockCRC );

View File

@ -19,6 +19,8 @@
#ifdef _WIN32 #ifdef _WIN32
#define BZ2_LIBNAME "libbz2-1.0.0.DLL"
#include <windows.h> #include <windows.h>
static int BZ2DLLLoaded = 0; static int BZ2DLLLoaded = 0;
static HINSTANCE BZ2DLLhLib; static HINSTANCE BZ2DLLhLib;
@ -27,21 +29,28 @@ int BZ2DLLLoadLibrary(void)
HINSTANCE hLib; HINSTANCE hLib;
if(BZ2DLLLoaded==1){return 0;} if(BZ2DLLLoaded==1){return 0;}
hLib=LoadLibrary("libbz2.dll"); hLib=LoadLibrary(BZ2_LIBNAME);
if(hLib == NULL){ if(hLib == NULL){
puts("Can't load libbz2.dll"); fprintf(stderr,"Can't load %s\n",BZ2_LIBNAME);
return -1;
}
BZ2_bzlibVersion=GetProcAddress(hLib,"BZ2_bzlibVersion");
BZ2_bzopen=GetProcAddress(hLib,"BZ2_bzopen");
BZ2_bzdopen=GetProcAddress(hLib,"BZ2_bzdopen");
BZ2_bzread=GetProcAddress(hLib,"BZ2_bzread");
BZ2_bzwrite=GetProcAddress(hLib,"BZ2_bzwrite");
BZ2_bzflush=GetProcAddress(hLib,"BZ2_bzflush");
BZ2_bzclose=GetProcAddress(hLib,"BZ2_bzclose");
BZ2_bzerror=GetProcAddress(hLib,"BZ2_bzerror");
if (!BZ2_bzlibVersion || !BZ2_bzopen || !BZ2_bzdopen
|| !BZ2_bzread || !BZ2_bzwrite || !BZ2_bzflush
|| !BZ2_bzclose || !BZ2_bzerror) {
fprintf(stderr,"GetProcAddress failed.\n");
return -1; return -1;
} }
BZ2DLLLoaded=1; BZ2DLLLoaded=1;
BZ2DLLhLib=hLib; BZ2DLLhLib=hLib;
bzlibVersion=GetProcAddress(hLib,"bzlibVersion");
bzopen=GetProcAddress(hLib,"bzopen");
bzdopen=GetProcAddress(hLib,"bzdopen");
bzread=GetProcAddress(hLib,"bzread");
bzwrite=GetProcAddress(hLib,"bzwrite");
bzflush=GetProcAddress(hLib,"bzflush");
bzclose=GetProcAddress(hLib,"bzclose");
bzerror=GetProcAddress(hLib,"bzerror");
return 0; return 0;
} }
@ -67,9 +76,11 @@ int main(int argc,char *argv[])
#ifdef _WIN32 #ifdef _WIN32
if(BZ2DLLLoadLibrary()<0){ if(BZ2DLLLoadLibrary()<0){
puts("can't load dll"); fprintf(stderr,"Loading of %s failed. Giving up.\n", BZ2_LIBNAME);
exit(1); exit(1);
} }
printf("Loading of %s succeeded. Library version is %s.\n",
BZ2_LIBNAME, BZ2_bzlibVersion() );
#endif #endif
while(++argv,--argc){ while(++argv,--argc){
if(**argv =='-' || **argv=='/'){ if(**argv =='-' || **argv=='/'){
@ -119,15 +130,15 @@ int main(int argc,char *argv[])
}else{ }else{
fp_w = stdout; fp_w = stdout;
} }
if((BZ2fp_r == NULL && (BZ2fp_r = bzdopen(fileno(stdin),"rb"))==NULL) if((BZ2fp_r == NULL && (BZ2fp_r = BZ2_bzdopen(fileno(stdin),"rb"))==NULL)
|| (BZ2fp_r != NULL && (BZ2fp_r = bzopen(fn_r,"rb"))==NULL)){ || (BZ2fp_r != NULL && (BZ2fp_r = BZ2_bzopen(fn_r,"rb"))==NULL)){
printf("can't bz2openstream\n"); printf("can't bz2openstream\n");
exit(1); exit(1);
} }
while((len=bzread(BZ2fp_r,buff,0x1000))>0){ while((len=BZ2_bzread(BZ2fp_r,buff,0x1000))>0){
fwrite(buff,1,len,fp_w); fwrite(buff,1,len,fp_w);
} }
bzclose(BZ2fp_r); BZ2_bzclose(BZ2fp_r);
if(fp_w != stdout) fclose(fp_w); if(fp_w != stdout) fclose(fp_w);
}else{ }else{
BZFILE *BZ2fp_w = NULL; BZFILE *BZ2fp_w = NULL;
@ -146,15 +157,15 @@ int main(int argc,char *argv[])
mode[1] = '0' + level; mode[1] = '0' + level;
mode[2] = '\0'; mode[2] = '\0';
if((fn_w == NULL && (BZ2fp_w = bzdopen(fileno(stdout),mode))==NULL) if((fn_w == NULL && (BZ2fp_w = BZ2_bzdopen(fileno(stdout),mode))==NULL)
|| (fn_w !=NULL && (BZ2fp_w = bzopen(fn_w,mode))==NULL)){ || (fn_w !=NULL && (BZ2fp_w = BZ2_bzopen(fn_w,mode))==NULL)){
printf("can't bz2openstream\n"); printf("can't bz2openstream\n");
exit(1); exit(1);
} }
while((len=fread(buff,1,0x1000,fp_r))>0){ while((len=fread(buff,1,0x1000,fp_r))>0){
bzwrite(BZ2fp_w,buff,len); BZ2_bzwrite(BZ2fp_w,buff,len);
} }
bzclose(BZ2fp_w); BZ2_bzclose(BZ2fp_w);
if(fp_r!=stdin)fclose(fp_r); if(fp_r!=stdin)fclose(fp_r);
} }
} }

View File

@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression. library for lossless, block-sorting data compression.
Copyright (C) 1996-1999 Julian R Seward. All rights reserved. Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions modification, are permitted provided that the following conditions
@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK. Julian Seward, Cambridge, UK.
jseward@acm.org jseward@acm.org
bzip2/libbzip2 version 0.9.5 of 24 May 1999 bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of: This program is based on (at least) the work of:
Mike Burrows Mike Burrows
@ -100,7 +100,7 @@
/*---------------------------------------------------*/ /*---------------------------------------------------*/
void hbMakeCodeLengths ( UChar *len, void BZ2_hbMakeCodeLengths ( UChar *len,
Int32 *freq, Int32 *freq,
Int32 alphaSize, Int32 alphaSize,
Int32 maxLen ) Int32 maxLen )
@ -172,7 +172,7 @@ void hbMakeCodeLengths ( UChar *len,
/*---------------------------------------------------*/ /*---------------------------------------------------*/
void hbAssignCodes ( Int32 *code, void BZ2_hbAssignCodes ( Int32 *code,
UChar *length, UChar *length,
Int32 minLen, Int32 minLen,
Int32 maxLen, Int32 maxLen,
@ -190,7 +190,7 @@ void hbAssignCodes ( Int32 *code,
/*---------------------------------------------------*/ /*---------------------------------------------------*/
void hbCreateDecodeTables ( Int32 *limit, void BZ2_hbCreateDecodeTables ( Int32 *limit,
Int32 *base, Int32 *base,
Int32 *perm, Int32 *perm,
UChar *length, UChar *length,

View File

@ -1,25 +1,27 @@
LIBRARY LIBBZ2 LIBRARY LIBBZ2
DESCRIPTION "libbzip2: library for data compression" DESCRIPTION "libbzip2: library for data compression"
EXPORTS EXPORTS
bzCompressInit BZ2_bzCompressInit
bzCompress BZ2_bzCompress
bzCompressEnd BZ2_bzCompressEnd
bzDecompressInit BZ2_bzDecompressInit
bzDecompress BZ2_bzDecompress
bzDecompressEnd BZ2_bzDecompressEnd
bzReadOpen BZ2_bzReadOpen
bzReadClose BZ2_bzReadClose
bzReadGetUnused BZ2_bzReadGetUnused
bzRead BZ2_bzRead
bzWriteOpen BZ2_bzWriteOpen
bzWrite BZ2_bzWrite
bzWriteClose BZ2_bzWriteClose
bzBuffToBuffCompress BZ2_bzWriteClose64
bzBuffToBuffDecompress BZ2_bzBuffToBuffCompress
bzlibVersion BZ2_bzBuffToBuffDecompress
bzopen BZ2_bzlibVersion
bzdopen BZ2_bzopen
bzread BZ2_bzdopen
bzwrite BZ2_bzread
bzflush BZ2_bzwrite
bzclose BZ2_bzflush
BZ2_bzclose
BZ2_bzerror

View File

@ -4,7 +4,7 @@
# Fixed up by JRS for bzip2-0.9.5d release. # Fixed up by JRS for bzip2-0.9.5d release.
CC=cl CC=cl
CFLAGS= -DWIN32 -MD -Ox CFLAGS= -DWIN32 -MD -Ox -D_FILE_OFFSET_BITS=64
OBJS= blocksort.obj \ OBJS= blocksort.obj \
huffman.obj \ huffman.obj \
@ -21,7 +21,6 @@ bzip2: lib
$(CC) $(CFLAGS) -o bzip2recover bzip2recover.c $(CC) $(CFLAGS) -o bzip2recover bzip2recover.c
lib: $(OBJS) lib: $(OBJS)
del libbz2.lib
lib /out:libbz2.lib $(OBJS) lib /out:libbz2.lib $(OBJS)
test: bzip2 test: bzip2
@ -32,20 +31,19 @@ test: bzip2
.\\bzip2 -d < sample1.bz2 > sample1.tst .\\bzip2 -d < sample1.bz2 > sample1.tst
.\\bzip2 -d < sample2.bz2 > sample2.tst .\\bzip2 -d < sample2.bz2 > sample2.tst
.\\bzip2 -ds < sample3.bz2 > sample3.tst .\\bzip2 -ds < sample3.bz2 > sample3.tst
@echo All six of the fc's should find no differences.
@echo If fc finds an error on sample3.bz2, this could be
@echo because WinZip's 'TAR file smart CR/LF conversion'
@echo is too clever for its own good. Disable this option.
@echo The correct size for sample3.ref is 120,244. If it
@echo is 150,251, WinZip has messed it up.
fc sample1.bz2 sample1.rb2 fc sample1.bz2 sample1.rb2
fc sample2.bz2 sample2.rb2 fc sample2.bz2 sample2.rb2
fc sample3.bz2 sample3.rb2 fc sample3.bz2 sample3.rb2
fc sample1.tst sample1.ref fc sample1.tst sample1.ref
fc sample2.tst sample2.ref fc sample2.tst sample2.ref
fc sample3.tst sample3.ref fc sample3.tst sample3.ref
@echo All six of the fc's should find no differences.
@echo If fc finds an error on sample3.tst, this could be
@echo because WinZips 'TAR file smart CR/LF conversion'
@echo is too clever for its own good. Disable this option.
@echo The correct size for sample3.ref is 120,244. If it
@echo is around 150k, WinZip has stuffed it up.
@echo Also remember to set BZ_UNIX to 0 and BZ_LCCWIN32
@echo to 1 in bzip2.c.
clean: clean:

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression. library for lossless, block-sorting data compression.
Copyright (C) 1996-1999 Julian R Seward. All rights reserved. Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions modification, are permitted provided that the following conditions
@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK. Julian Seward, Cambridge, UK.
jseward@acm.org jseward@acm.org
bzip2/libbzip2 version 0.9.5 of 24 May 1999 bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of: This program is based on (at least) the work of:
Mike Burrows Mike Burrows
@ -63,7 +63,7 @@
/*---------------------------------------------*/ /*---------------------------------------------*/
Int32 rNums[512] = { Int32 BZ2_rNums[512] = {
619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
985, 724, 205, 454, 863, 491, 741, 242, 949, 214, 985, 724, 205, 454, 863, 491, 741, 242, 949, 214,
733, 859, 335, 708, 621, 574, 73, 654, 730, 472, 733, 859, 335, 708, 621, 574, 73, 654, 730, 472,

39
spewG.c Normal file
View File

@ -0,0 +1,39 @@
/* spew out a thoroughly gigantic file designed so that bzip2
can compress it reasonably rapidly. This is to help test
support for large files (> 2GB) in a reasonable amount of time.
I suggest you use the undocumented --exponential option to
bzip2 when compressing the resulting file; this saves a bit of
time. Note: *don't* bother with --exponential when compressing
Real Files; it'll just waste a lot of CPU time :-)
(but is otherwise harmless).
*/
#define _FILE_OFFSET_BITS 64
#include <stdio.h>
#include <stdlib.h>
/* The number of megabytes of junk to spew out (roughly) */
#define MEGABYTES 5000
#define N_BUF 1000000
char buf[N_BUF];
int main ( int argc, char** argv )
{
int ii, kk, p;
srandom(1);
setbuffer ( stdout, buf, N_BUF );
for (kk = 0; kk < MEGABYTES * 515; kk+=3) {
p = 25+random()%50;
for (ii = 0; ii < p; ii++)
printf ( "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" );
for (ii = 0; ii < p-1; ii++)
printf ( "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" );
for (ii = 0; ii < p+1; ii++)
printf ( "ccccccccccccccccccccccccccccccccccccc" );
}
fflush(stdout);
return 0;
}

126
unzcrash.c Normal file
View File

@ -0,0 +1,126 @@
/* A test program written to test robustness to decompression of
corrupted data. Usage is
unzcrash filename
and the program will read the specified file, compress it (in memory),
and then repeatedly decompress it, each time with a different bit of
the compressed data inverted, so as to test all possible one-bit errors.
This should not cause any invalid memory accesses. If it does,
I want to know about it!
p.s. As you can see from the above description, the process is
incredibly slow. A file of size eg 5KB will cause it to run for
many hours.
*/
#include <stdio.h>
#include <assert.h>
#include "bzlib.h"
#define M_BLOCK 1000000
typedef unsigned char uchar;
#define M_BLOCK_OUT (M_BLOCK + 1000000)
uchar inbuf[M_BLOCK];
uchar outbuf[M_BLOCK_OUT];
uchar zbuf[M_BLOCK + 600 + (M_BLOCK / 100)];
int nIn, nOut, nZ;
static char *bzerrorstrings[] = {
"OK"
,"SEQUENCE_ERROR"
,"PARAM_ERROR"
,"MEM_ERROR"
,"DATA_ERROR"
,"DATA_ERROR_MAGIC"
,"IO_ERROR"
,"UNEXPECTED_EOF"
,"OUTBUFF_FULL"
,"???" /* for future */
,"???" /* for future */
,"???" /* for future */
,"???" /* for future */
,"???" /* for future */
,"???" /* for future */
};
void flip_bit ( int bit )
{
int byteno = bit / 8;
int bitno = bit % 8;
uchar mask = 1 << bitno;
//fprintf ( stderr, "(byte %d bit %d mask %d)",
// byteno, bitno, (int)mask );
zbuf[byteno] ^= mask;
}
int main ( int argc, char** argv )
{
FILE* f;
int r;
int bit;
int i;
if (argc != 2) {
fprintf ( stderr, "usage: unzcrash filename\n" );
return 1;
}
f = fopen ( argv[1], "r" );
if (!f) {
fprintf ( stderr, "unzcrash: can't open %s\n", argv[1] );
return 1;
}
nIn = fread ( inbuf, 1, M_BLOCK, f );
fprintf ( stderr, "%d bytes read\n", nIn );
nZ = M_BLOCK;
r = BZ2_bzBuffToBuffCompress (
zbuf, &nZ, inbuf, nIn, 9, 0, 30 );
assert (r == BZ_OK);
fprintf ( stderr, "%d after compression\n", nZ );
for (bit = 0; bit < nZ*8; bit++) {
fprintf ( stderr, "bit %d ", bit );
flip_bit ( bit );
nOut = M_BLOCK_OUT;
r = BZ2_bzBuffToBuffDecompress (
outbuf, &nOut, zbuf, nZ, 0, 0 );
fprintf ( stderr, " %d %s ", r, bzerrorstrings[-r] );
if (r != BZ_OK) {
fprintf ( stderr, "\n" );
} else {
if (nOut != nIn) {
fprintf(stderr, "nIn/nOut mismatch %d %d\n", nIn, nOut );
return 1;
} else {
for (i = 0; i < nOut; i++)
if (inbuf[i] != outbuf[i]) {
fprintf(stderr, "mismatch at %d\n", i );
return 1;
}
if (i == nOut) fprintf(stderr, "really ok!\n" );
}
}
flip_bit ( bit );
}
#if 0
assert (nOut == nIn);
for (i = 0; i < nOut; i++) {
if (inbuf[i] != outbuf[i]) {
fprintf ( stderr, "difference at %d !\n", i );
return 1;
}
}
#endif
fprintf ( stderr, "all ok\n" );
return 0;
}

5
words0 Normal file
View File

@ -0,0 +1,5 @@
If compilation produces errors, or a large number of warnings,
please read README.COMPILATION.PROBLEMS -- you might be able to
adjust the flags in this Makefile to improve matters.