bzip2/bzip2.c
1997-08-29 22:13:13 +02:00

4045 lines
115 KiB
C

/*-----------------------------------------------------------*/
/*--- A block-sorting, lossless compressor bzip2.c ---*/
/*-----------------------------------------------------------*/
/*--
This program is bzip2, a lossless, block-sorting data compressor,
version 0.1pl2, dated 29-Aug-1997.
Copyright (C) 1996, 1997 by Julian Seward.
Guildford, Surrey, UK
email: jseward@acm.org
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
The GNU General Public License is contained in the file LICENSE.
This program is based on (at least) the work of:
Mike Burrows
David Wheeler
Peter Fenwick
Alistair Moffat
Radford Neal
Ian H. Witten
Robert Sedgewick
Jon L. Bentley
For more information on these sources, see the file ALGORITHMS.
--*/
/*----------------------------------------------------*/
/*--- IMPORTANT ---*/
/*----------------------------------------------------*/
/*--
WARNING:
This program (attempts to) compress data by performing several
non-trivial transformations on it. Unless you are 100% familiar
with *all* the algorithms contained herein, and with the
consequences of modifying them, you should NOT meddle with the
compression or decompression machinery. Incorrect changes can
and very likely *will* lead to disasterous loss of data.
DISCLAIMER:
I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE
USE OF THIS PROGRAM, HOWSOEVER CAUSED.
Every compression of a file implies an assumption that the
compressed file can be decompressed to reproduce the original.
Great efforts in design, coding and testing have been made to
ensure that this program works correctly. However, the
complexity of the algorithms, and, in particular, the presence
of various special cases in the code which occur with very low
but non-zero probability make it impossible to rule out the
possibility of bugs remaining in the program. DO NOT COMPRESS
ANY DATA WITH THIS PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE
POSSIBILITY, HOWEVER SMALL, THAT THE DATA WILL NOT BE RECOVERABLE.
That is not to say this program is inherently unreliable.
Indeed, I very much hope the opposite is true. bzip2 has been
carefully constructed and extensively tested.
PATENTS:
To the best of my knowledge, bzip2 does not use any patented
algorithms. However, I do not have the resources available to
carry out a full patent search. Therefore I cannot give any
guarantee of the above statement.
--*/
/*----------------------------------------------------*/
/*--- and now for something much more pleasant :-) ---*/
/*----------------------------------------------------*/
/*---------------------------------------------*/
/*--
Place a 1 beside your platform, and 0 elsewhere.
--*/
/*--
Generic 32-bit Unix.
Also works on 64-bit Unix boxes.
--*/
#define BZ_UNIX 1
/*--
Win32, as seen by Jacob Navia's excellent
port of (Chris Fraser & David Hanson)'s excellent
lcc compiler.
--*/
#define BZ_LCCWIN32 0
/*---------------------------------------------*/
/*--
Some stuff for all platforms.
--*/
#include <stdio.h>
#include <stdlib.h>
#if DEBUG
#include <assert.h>
#endif
#include <string.h>
#include <signal.h>
#include <math.h>
#define ERROR_IF_EOF(i) { if ((i) == EOF) ioError(); }
#define ERROR_IF_NOT_ZERO(i) { if ((i) != 0) ioError(); }
#define ERROR_IF_MINUS_ONE(i) { if ((i) == (-1)) ioError(); }
/*---------------------------------------------*/
/*--
Platform-specific stuff.
--*/
#if BZ_UNIX
#include <sys/types.h>
#include <utime.h>
#include <unistd.h>
#include <malloc.h>
#include <sys/stat.h>
#include <sys/times.h>
#define Int32 int
#define UInt32 unsigned int
#define Char char
#define UChar unsigned char
#define Int16 short
#define UInt16 unsigned short
#define PATH_SEP '/'
#define MY_LSTAT lstat
#define MY_S_IFREG S_ISREG
#define MY_STAT stat
#define APPEND_FILESPEC(root, name) \
root=snocString((root), (name))
#define SET_BINARY_MODE(fd) /**/
/*--
You should try very hard to persuade your C compiler
to inline the bits marked INLINE. Otherwise bzip2 will
run rather slowly. gcc version 2.x is recommended.
--*/
#ifdef __GNUC__
#define INLINE inline
#define NORETURN __attribute__ ((noreturn))
#else
#define INLINE /**/
#define NORETURN /**/
#endif
#endif
#if BZ_LCCWIN32
#include <io.h>
#include <fcntl.h>
#include <sys\stat.h>
#define Int32 int
#define UInt32 unsigned int
#define Int16 short
#define UInt16 unsigned short
#define Char char
#define UChar unsigned char
#define INLINE /**/
#define NORETURN /**/
#define PATH_SEP '\\'
#define MY_LSTAT _stat
#define MY_STAT _stat
#define MY_S_IFREG(x) ((x) & _S_IFREG)
#if 0
/*-- lcc-win32 seems to expand wildcards itself --*/
#define APPEND_FILESPEC(root, spec) \
do { \
if ((spec)[0] == '-') { \
root = snocString((root), (spec)); \
} else { \
struct _finddata_t c_file; \
long hFile; \
hFile = _findfirst((spec), &c_file); \
if ( hFile == -1L ) { \
root = snocString ((root), (spec)); \
} else { \
int anInt = 0; \
while ( anInt == 0 ) { \
root = snocString((root), \
&c_file.name[0]); \
anInt = _findnext(hFile, &c_file); \
} \
} \
} \
} while ( 0 )
#else
#define APPEND_FILESPEC(root, name) \
root = snocString ((root), (name))
#endif
#define SET_BINARY_MODE(fd) \
do { \
int retVal = setmode ( fileno ( fd ), \
O_BINARY ); \
ERROR_IF_MINUS_ONE ( retVal ); \
} while ( 0 )
#endif
/*---------------------------------------------*/
/*--
Some more stuff for all platforms :-)
--*/
#define Bool unsigned char
#define True 1
#define False 0
/*--
IntNative is your platform's `native' int size.
Only here to avoid probs with 64-bit platforms.
--*/
#define IntNative int
/*--
change to 1, or compile with -DDEBUG=1 to debug
--*/
#ifndef DEBUG
#define DEBUG 0
#endif
/*---------------------------------------------------*/
/*--- ---*/
/*---------------------------------------------------*/
/*--
Implementation notes, July 1997
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Memory allocation
~~~~~~~~~~~~~~~~~
All large data structures are allocated on the C heap,
for better or for worse. That includes the various
arrays of pointers, striped words, bytes, frequency
tables and buffers for compression and decompression.
bzip2 can operate at various block-sizes, ranging from
100k to 900k in 100k steps, and it allocates only as
much as it needs to. When compressing, we know from the
command-line options what the block-size is going to be,
so all allocation can be done at start-up; if that
succeeds, there can be no further allocation problems.
Decompression is more complicated. Each compressed file
contains, in its header, a byte indicating the block
size used for compression. This means bzip2 potentially
needs to reallocate memory for each file it deals with,
which in turn opens the possibility for a memory allocation
failure part way through a run of files, by encountering
a file requiring a much larger block size than all the
ones preceding it.
The policy is to simply give up if a memory allocation
failure occurs. During decompression, it would be
possible to move on to subsequent files in the hope that
some might ask for a smaller block size, but the
complications for doing this seem more trouble than they
are worth.
Compressed file formats
~~~~~~~~~~~~~~~~~~~~~~~
[This is now entirely different from both 0.21, and from
any previous Huffman-coded variant of bzip.
See the associated file bzip2.txt for details.]
Error conditions
~~~~~~~~~~~~~~~~
Dealing with error conditions is the least satisfactory
aspect of bzip2. The policy is to try and leave the
filesystem in a consistent state, then quit, even if it
means not processing some of the files mentioned in the
command line. `A consistent state' means that a file
exists either in its compressed or uncompressed form,
but not both. This boils down to the rule `delete the
output file if an error condition occurs, leaving the
input intact'. Input files are only deleted when we can
be pretty sure the output file has been written and
closed successfully.
Errors are a dog because there's so many things to
deal with. The following can happen mid-file, and
require cleaning up.
internal `panics' -- indicating a bug
corrupted or inconsistent compressed file
can't allocate enough memory to decompress this file
I/O error reading/writing/opening/closing
signal catches -- Control-C, SIGTERM, SIGHUP.
Other conditions, primarily pertaining to file names,
can be checked in-between files, which makes dealing
with them easier.
--*/
/*---------------------------------------------------*/
/*--- Misc (file handling) data decls ---*/
/*---------------------------------------------------*/
UInt32 bytesIn, bytesOut;
Int32 verbosity;
Bool keepInputFiles, smallMode, testFailsExist;
UInt32 globalCrc;
Int32 numFileNames, numFilesProcessed;
/*-- source modes; F==file, I==stdin, O==stdout --*/
#define SM_I2O 1
#define SM_F2O 2
#define SM_F2F 3
/*-- operation modes --*/
#define OM_Z 1
#define OM_UNZ 2
#define OM_TEST 3
Int32 opMode;
Int32 srcMode;
Int32 longestFileName;
Char inName[1024];
Char outName[1024];
Char *progName;
Char progNameReally[1024];
FILE *outputHandleJustInCase;
void panic ( Char* ) NORETURN;
void ioError ( void ) NORETURN;
void compressOutOfMemory ( Int32, Int32 ) NORETURN;
void uncompressOutOfMemory ( Int32, Int32 ) NORETURN;
void blockOverrun ( void ) NORETURN;
void badBlockHeader ( void ) NORETURN;
void badBGLengths ( void ) NORETURN;
void crcError ( UInt32, UInt32 ) NORETURN;
void bitStreamEOF ( void ) NORETURN;
void cleanUpAndFail ( Int32 ) NORETURN;
void compressedStreamEOF ( void ) NORETURN;
void* myMalloc ( Int32 );
/*---------------------------------------------------*/
/*--- Data decls for the front end ---*/
/*---------------------------------------------------*/
/*--
The overshoot bytes allow us to avoid most of
the cost of pointer renormalisation during
comparison of rotations in sorting.
The figure of 20 is derived as follows:
qSort3 allows an overshoot of up to 10.
It then calls simpleSort, which calls
fullGtU, also with max overshoot 10.
fullGtU does up to 10 comparisons without
renormalising, giving 10+10 == 20.
--*/
#define NUM_OVERSHOOT_BYTES 20
/*--
These are the main data structures for
the Burrows-Wheeler transform.
--*/
/*--
Pointers to compression and decompression
structures. Set by
allocateCompressStructures and
setDecompressStructureSizes
The structures are always set to be suitable
for a block of size 100000 * blockSize100k.
--*/
UChar *block; /*-- compress --*/
UInt16 *quadrant; /*-- compress --*/
Int32 *zptr; /*-- compress --*/
UInt16 *szptr; /*-- overlays zptr ---*/
Int32 *ftab; /*-- compress --*/
UInt16 *ll16; /*-- small decompress --*/
UChar *ll4; /*-- small decompress --*/
Int32 *tt; /*-- fast decompress --*/
UChar *ll8; /*-- fast decompress --*/
/*--
freq table collected to save a pass over the data
during decompression.
--*/
Int32 unzftab[256];
/*--
index of the last char in the block, so
the block size == last + 1.
--*/
Int32 last;
/*--
index in zptr[] of original string after sorting.
--*/
Int32 origPtr;
/*--
always: in the range 0 .. 9.
The current block size is 100000 * this number.
--*/
Int32 blockSize100k;
/*--
Used when sorting. If too many long comparisons
happen, we stop sorting, randomise the block
slightly, and try again.
--*/
Int32 workFactor;
Int32 workDone;
Int32 workLimit;
Bool blockRandomised;
Bool firstAttempt;
Int32 nBlocksRandomised;
/*---------------------------------------------------*/
/*--- Data decls for the back end ---*/
/*---------------------------------------------------*/
#define MAX_ALPHA_SIZE 258
#define MAX_CODE_LEN 23
#define RUNA 0
#define RUNB 1
#define N_GROUPS 6
#define G_SIZE 50
#define N_ITERS 4
#define MAX_SELECTORS (2 + (900000 / G_SIZE))
Bool inUse[256];
Int32 nInUse;
UChar seqToUnseq[256];
UChar unseqToSeq[256];
UChar selector [MAX_SELECTORS];
UChar selectorMtf[MAX_SELECTORS];
Int32 nMTF;
Int32 mtfFreq[MAX_ALPHA_SIZE];
UChar len [N_GROUPS][MAX_ALPHA_SIZE];
/*-- decompress only --*/
Int32 limit [N_GROUPS][MAX_ALPHA_SIZE];
Int32 base [N_GROUPS][MAX_ALPHA_SIZE];
Int32 perm [N_GROUPS][MAX_ALPHA_SIZE];
Int32 minLens[N_GROUPS];
/*-- compress only --*/
Int32 code [N_GROUPS][MAX_ALPHA_SIZE];
Int32 rfreq[N_GROUPS][MAX_ALPHA_SIZE];
/*---------------------------------------------------*/
/*--- 32-bit CRC grunge ---*/
/*---------------------------------------------------*/
/*--
I think this is an implementation of the AUTODIN-II,
Ethernet & FDDI 32-bit CRC standard. Vaguely derived
from code by Rob Warnock, in Section 51 of the
comp.compression FAQ.
--*/
UInt32 crc32Table[256] = {
/*-- Ugly, innit? --*/
0x00000000UL, 0x04c11db7UL, 0x09823b6eUL, 0x0d4326d9UL,
0x130476dcUL, 0x17c56b6bUL, 0x1a864db2UL, 0x1e475005UL,
0x2608edb8UL, 0x22c9f00fUL, 0x2f8ad6d6UL, 0x2b4bcb61UL,
0x350c9b64UL, 0x31cd86d3UL, 0x3c8ea00aUL, 0x384fbdbdUL,
0x4c11db70UL, 0x48d0c6c7UL, 0x4593e01eUL, 0x4152fda9UL,
0x5f15adacUL, 0x5bd4b01bUL, 0x569796c2UL, 0x52568b75UL,
0x6a1936c8UL, 0x6ed82b7fUL, 0x639b0da6UL, 0x675a1011UL,
0x791d4014UL, 0x7ddc5da3UL, 0x709f7b7aUL, 0x745e66cdUL,
0x9823b6e0UL, 0x9ce2ab57UL, 0x91a18d8eUL, 0x95609039UL,
0x8b27c03cUL, 0x8fe6dd8bUL, 0x82a5fb52UL, 0x8664e6e5UL,
0xbe2b5b58UL, 0xbaea46efUL, 0xb7a96036UL, 0xb3687d81UL,
0xad2f2d84UL, 0xa9ee3033UL, 0xa4ad16eaUL, 0xa06c0b5dUL,
0xd4326d90UL, 0xd0f37027UL, 0xddb056feUL, 0xd9714b49UL,
0xc7361b4cUL, 0xc3f706fbUL, 0xceb42022UL, 0xca753d95UL,
0xf23a8028UL, 0xf6fb9d9fUL, 0xfbb8bb46UL, 0xff79a6f1UL,
0xe13ef6f4UL, 0xe5ffeb43UL, 0xe8bccd9aUL, 0xec7dd02dUL,
0x34867077UL, 0x30476dc0UL, 0x3d044b19UL, 0x39c556aeUL,
0x278206abUL, 0x23431b1cUL, 0x2e003dc5UL, 0x2ac12072UL,
0x128e9dcfUL, 0x164f8078UL, 0x1b0ca6a1UL, 0x1fcdbb16UL,
0x018aeb13UL, 0x054bf6a4UL, 0x0808d07dUL, 0x0cc9cdcaUL,
0x7897ab07UL, 0x7c56b6b0UL, 0x71159069UL, 0x75d48ddeUL,
0x6b93dddbUL, 0x6f52c06cUL, 0x6211e6b5UL, 0x66d0fb02UL,
0x5e9f46bfUL, 0x5a5e5b08UL, 0x571d7dd1UL, 0x53dc6066UL,
0x4d9b3063UL, 0x495a2dd4UL, 0x44190b0dUL, 0x40d816baUL,
0xaca5c697UL, 0xa864db20UL, 0xa527fdf9UL, 0xa1e6e04eUL,
0xbfa1b04bUL, 0xbb60adfcUL, 0xb6238b25UL, 0xb2e29692UL,
0x8aad2b2fUL, 0x8e6c3698UL, 0x832f1041UL, 0x87ee0df6UL,
0x99a95df3UL, 0x9d684044UL, 0x902b669dUL, 0x94ea7b2aUL,
0xe0b41de7UL, 0xe4750050UL, 0xe9362689UL, 0xedf73b3eUL,
0xf3b06b3bUL, 0xf771768cUL, 0xfa325055UL, 0xfef34de2UL,
0xc6bcf05fUL, 0xc27dede8UL, 0xcf3ecb31UL, 0xcbffd686UL,
0xd5b88683UL, 0xd1799b34UL, 0xdc3abdedUL, 0xd8fba05aUL,
0x690ce0eeUL, 0x6dcdfd59UL, 0x608edb80UL, 0x644fc637UL,
0x7a089632UL, 0x7ec98b85UL, 0x738aad5cUL, 0x774bb0ebUL,
0x4f040d56UL, 0x4bc510e1UL, 0x46863638UL, 0x42472b8fUL,
0x5c007b8aUL, 0x58c1663dUL, 0x558240e4UL, 0x51435d53UL,
0x251d3b9eUL, 0x21dc2629UL, 0x2c9f00f0UL, 0x285e1d47UL,
0x36194d42UL, 0x32d850f5UL, 0x3f9b762cUL, 0x3b5a6b9bUL,
0x0315d626UL, 0x07d4cb91UL, 0x0a97ed48UL, 0x0e56f0ffUL,
0x1011a0faUL, 0x14d0bd4dUL, 0x19939b94UL, 0x1d528623UL,
0xf12f560eUL, 0xf5ee4bb9UL, 0xf8ad6d60UL, 0xfc6c70d7UL,
0xe22b20d2UL, 0xe6ea3d65UL, 0xeba91bbcUL, 0xef68060bUL,
0xd727bbb6UL, 0xd3e6a601UL, 0xdea580d8UL, 0xda649d6fUL,
0xc423cd6aUL, 0xc0e2d0ddUL, 0xcda1f604UL, 0xc960ebb3UL,
0xbd3e8d7eUL, 0xb9ff90c9UL, 0xb4bcb610UL, 0xb07daba7UL,
0xae3afba2UL, 0xaafbe615UL, 0xa7b8c0ccUL, 0xa379dd7bUL,
0x9b3660c6UL, 0x9ff77d71UL, 0x92b45ba8UL, 0x9675461fUL,
0x8832161aUL, 0x8cf30badUL, 0x81b02d74UL, 0x857130c3UL,
0x5d8a9099UL, 0x594b8d2eUL, 0x5408abf7UL, 0x50c9b640UL,
0x4e8ee645UL, 0x4a4ffbf2UL, 0x470cdd2bUL, 0x43cdc09cUL,
0x7b827d21UL, 0x7f436096UL, 0x7200464fUL, 0x76c15bf8UL,
0x68860bfdUL, 0x6c47164aUL, 0x61043093UL, 0x65c52d24UL,
0x119b4be9UL, 0x155a565eUL, 0x18197087UL, 0x1cd86d30UL,
0x029f3d35UL, 0x065e2082UL, 0x0b1d065bUL, 0x0fdc1becUL,
0x3793a651UL, 0x3352bbe6UL, 0x3e119d3fUL, 0x3ad08088UL,
0x2497d08dUL, 0x2056cd3aUL, 0x2d15ebe3UL, 0x29d4f654UL,
0xc5a92679UL, 0xc1683bceUL, 0xcc2b1d17UL, 0xc8ea00a0UL,
0xd6ad50a5UL, 0xd26c4d12UL, 0xdf2f6bcbUL, 0xdbee767cUL,
0xe3a1cbc1UL, 0xe760d676UL, 0xea23f0afUL, 0xeee2ed18UL,
0xf0a5bd1dUL, 0xf464a0aaUL, 0xf9278673UL, 0xfde69bc4UL,
0x89b8fd09UL, 0x8d79e0beUL, 0x803ac667UL, 0x84fbdbd0UL,
0x9abc8bd5UL, 0x9e7d9662UL, 0x933eb0bbUL, 0x97ffad0cUL,
0xafb010b1UL, 0xab710d06UL, 0xa6322bdfUL, 0xa2f33668UL,
0xbcb4666dUL, 0xb8757bdaUL, 0xb5365d03UL, 0xb1f740b4UL
};
/*---------------------------------------------*/
void initialiseCRC ( void )
{
globalCrc = 0xffffffffUL;
}
/*---------------------------------------------*/
UInt32 getFinalCRC ( void )
{
return ~globalCrc;
}
/*---------------------------------------------*/
UInt32 getGlobalCRC ( void )
{
return globalCrc;
}
/*---------------------------------------------*/
void setGlobalCRC ( UInt32 newCrc )
{
globalCrc = newCrc;
}
/*---------------------------------------------*/
#define UPDATE_CRC(crcVar,cha) \
{ \
crcVar = (crcVar << 8) ^ \
crc32Table[(crcVar >> 24) ^ \
((UChar)cha)]; \
}
/*---------------------------------------------------*/
/*--- Bit stream I/O ---*/
/*---------------------------------------------------*/
UInt32 bsBuff;
Int32 bsLive;
FILE* bsStream;
Bool bsWriting;
/*---------------------------------------------*/
void bsSetStream ( FILE* f, Bool wr )
{
if (bsStream != NULL) panic ( "bsSetStream" );
bsStream = f;
bsLive = 0;
bsBuff = 0;
bytesOut = 0;
bytesIn = 0;
bsWriting = wr;
}
/*---------------------------------------------*/
void bsFinishedWithStream ( void )
{
if (bsWriting)
while (bsLive > 0) {
fputc ( (UChar)(bsBuff >> 24), bsStream );
bsBuff <<= 8;
bsLive -= 8;
bytesOut++;
}
bsStream = NULL;
}
/*---------------------------------------------*/
#define bsNEEDR(nz) \
{ \
while (bsLive < nz) { \
Int32 zzi = fgetc ( bsStream ); \
if (zzi == EOF) compressedStreamEOF(); \
bsBuff = (bsBuff << 8) | (zzi & 0xffL); \
bsLive += 8; \
} \
}
/*---------------------------------------------*/
#define bsNEEDW(nz) \
{ \
while (bsLive >= 8) { \
fputc ( (UChar)(bsBuff >> 24), \
bsStream ); \
bsBuff <<= 8; \
bsLive -= 8; \
bytesOut++; \
} \
}
/*---------------------------------------------*/
#define bsR1(vz) \
{ \
bsNEEDR(1); \
vz = (bsBuff >> (bsLive-1)) & 1; \
bsLive--; \
}
/*---------------------------------------------*/
INLINE UInt32 bsR ( Int32 n )
{
UInt32 v;
bsNEEDR ( n );
v = (bsBuff >> (bsLive-n)) & ((1 << n)-1);
bsLive -= n;
return v;
}
/*---------------------------------------------*/
INLINE void bsW ( Int32 n, UInt32 v )
{
bsNEEDW ( n );
bsBuff |= (v << (32 - bsLive - n));
bsLive += n;
}
/*---------------------------------------------*/
UChar bsGetUChar ( void )
{
return (UChar)bsR(8);
}
/*---------------------------------------------*/
void bsPutUChar ( UChar c )
{
bsW(8, (UInt32)c );
}
/*---------------------------------------------*/
Int32 bsGetUInt32 ( void )
{
UInt32 u;
u = 0;
u = (u << 8) | bsR(8);
u = (u << 8) | bsR(8);
u = (u << 8) | bsR(8);
u = (u << 8) | bsR(8);
return u;
}
/*---------------------------------------------*/
UInt32 bsGetIntVS ( UInt32 numBits )
{
return (UInt32)bsR(numBits);
}
/*---------------------------------------------*/
UInt32 bsGetInt32 ( void )
{
return (Int32)bsGetUInt32();
}
/*---------------------------------------------*/
void bsPutUInt32 ( UInt32 u )
{
bsW ( 8, (u >> 24) & 0xffL );
bsW ( 8, (u >> 16) & 0xffL );
bsW ( 8, (u >> 8) & 0xffL );
bsW ( 8, u & 0xffL );
}
/*---------------------------------------------*/
void bsPutInt32 ( Int32 c )
{
bsPutUInt32 ( (UInt32)c );
}
/*---------------------------------------------*/
void bsPutIntVS ( Int32 numBits, UInt32 c )
{
bsW ( numBits, c );
}
/*---------------------------------------------------*/
/*--- Huffman coding low-level stuff ---*/
/*---------------------------------------------------*/
#define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
#define DEPTHOF(zz1) ((zz1) & 0x000000ff)
#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
#define ADDWEIGHTS(zw1,zw2) \
(WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
(1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
#define UPHEAP(z) \
{ \
Int32 zz, tmp; \
zz = z; tmp = heap[zz]; \
while (weight[tmp] < weight[heap[zz >> 1]]) { \
heap[zz] = heap[zz >> 1]; \
zz >>= 1; \
} \
heap[zz] = tmp; \
}
#define DOWNHEAP(z) \
{ \
Int32 zz, yy, tmp; \
zz = z; tmp = heap[zz]; \
while (True) { \
yy = zz << 1; \
if (yy > nHeap) break; \
if (yy < nHeap && \
weight[heap[yy+1]] < weight[heap[yy]]) \
yy++; \
if (weight[tmp] < weight[heap[yy]]) break; \
heap[zz] = heap[yy]; \
zz = yy; \
} \
heap[zz] = tmp; \
}
/*---------------------------------------------*/
void hbMakeCodeLengths ( UChar *len,
Int32 *freq,
Int32 alphaSize,
Int32 maxLen )
{
/*--
Nodes and heap entries run from 1. Entry 0
for both the heap and nodes is a sentinel.
--*/
Int32 nNodes, nHeap, n1, n2, i, j, k;
Bool tooLong;
Int32 heap [ MAX_ALPHA_SIZE + 2 ];
Int32 weight [ MAX_ALPHA_SIZE * 2 ];
Int32 parent [ MAX_ALPHA_SIZE * 2 ];
for (i = 0; i < alphaSize; i++)
weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
while (True) {
nNodes = alphaSize;
nHeap = 0;
heap[0] = 0;
weight[0] = 0;
parent[0] = -2;
for (i = 1; i <= alphaSize; i++) {
parent[i] = -1;
nHeap++;
heap[nHeap] = i;
UPHEAP(nHeap);
}
if (!(nHeap < (MAX_ALPHA_SIZE+2)))
panic ( "hbMakeCodeLengths(1)" );
while (nHeap > 1) {
n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
nNodes++;
parent[n1] = parent[n2] = nNodes;
weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
parent[nNodes] = -1;
nHeap++;
heap[nHeap] = nNodes;
UPHEAP(nHeap);
}
if (!(nNodes < (MAX_ALPHA_SIZE * 2)))
panic ( "hbMakeCodeLengths(2)" );
tooLong = False;
for (i = 1; i <= alphaSize; i++) {
j = 0;
k = i;
while (parent[k] >= 0) { k = parent[k]; j++; }
len[i-1] = j;
if (j > maxLen) tooLong = True;
}
if (! tooLong) break;
for (i = 1; i < alphaSize; i++) {
j = weight[i] >> 8;
j = 1 + (j / 2);
weight[i] = j << 8;
}
}
}
/*---------------------------------------------*/
void hbAssignCodes ( Int32 *code,
UChar *length,
Int32 minLen,
Int32 maxLen,
Int32 alphaSize )
{
Int32 n, vec, i;
vec = 0;
for (n = minLen; n <= maxLen; n++) {
for (i = 0; i < alphaSize; i++)
if (length[i] == n) { code[i] = vec; vec++; };
vec <<= 1;
}
}
/*---------------------------------------------*/
void hbCreateDecodeTables ( Int32 *limit,
Int32 *base,
Int32 *perm,
UChar *length,
Int32 minLen,
Int32 maxLen,
Int32 alphaSize )
{
Int32 pp, i, j, vec;
pp = 0;
for (i = minLen; i <= maxLen; i++)
for (j = 0; j < alphaSize; j++)
if (length[j] == i) { perm[pp] = j; pp++; };
for (i = 0; i < MAX_CODE_LEN; i++) base[i] = 0;
for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
for (i = 1; i < MAX_CODE_LEN; i++) base[i] += base[i-1];
for (i = 0; i < MAX_CODE_LEN; i++) limit[i] = 0;
vec = 0;
for (i = minLen; i <= maxLen; i++) {
vec += (base[i+1] - base[i]);
limit[i] = vec-1;
vec <<= 1;
}
for (i = minLen + 1; i <= maxLen; i++)
base[i] = ((limit[i-1] + 1) << 1) - base[i];
}
/*---------------------------------------------------*/
/*--- Undoing the reversible transformation ---*/
/*---------------------------------------------------*/
/*---------------------------------------------*/
#define SET_LL4(i,n) \
{ if (((i) & 0x1) == 0) \
ll4[(i) >> 1] = (ll4[(i) >> 1] & 0xf0) | (n); else \
ll4[(i) >> 1] = (ll4[(i) >> 1] & 0x0f) | ((n) << 4); \
}
#define GET_LL4(i) \
(((UInt32)(ll4[(i) >> 1])) >> (((i) << 2) & 0x4) & 0xF)
#define SET_LL(i,n) \
{ ll16[i] = (UInt16)(n & 0x0000ffff); \
SET_LL4(i, n >> 16); \
}
#define GET_LL(i) \
(((UInt32)ll16[i]) | (GET_LL4(i) << 16))
/*---------------------------------------------*/
/*--
Manage memory for compression/decompression.
When compressing, a single block size applies to
all files processed, and that's set when the
program starts. But when decompressing, each file
processed could have been compressed with a
different block size, so we may have to free
and reallocate on a per-file basis.
A call with argument of zero means
`free up everything.' And a value of zero for
blockSize100k means no memory is currently allocated.
--*/
/*---------------------------------------------*/
void allocateCompressStructures ( void )
{
Int32 n = 100000 * blockSize100k;
block = malloc ( (n + 1 + NUM_OVERSHOOT_BYTES) * sizeof(UChar) );
quadrant = malloc ( (n + NUM_OVERSHOOT_BYTES) * sizeof(Int16) );
zptr = malloc ( n * sizeof(Int32) );
ftab = malloc ( 65537 * sizeof(Int32) );
if (block == NULL || quadrant == NULL ||
zptr == NULL || ftab == NULL) {
Int32 totalDraw
= (n + 1 + NUM_OVERSHOOT_BYTES) * sizeof(UChar) +
(n + NUM_OVERSHOOT_BYTES) * sizeof(Int16) +
n * sizeof(Int32) +
65537 * sizeof(Int32);
compressOutOfMemory ( totalDraw, n );
}
/*--
Since we want valid indexes for block of
-1 to n + NUM_OVERSHOOT_BYTES - 1
inclusive.
--*/
block++;
/*--
The back end needs a place to store the MTF values
whilst it calculates the coding tables. We could
put them in the zptr array. However, these values
will fit in a short, so we overlay szptr at the
start of zptr, in the hope of reducing the number
of cache misses induced by the multiple traversals
of the MTF values when calculating coding tables.
Seems to improve compression speed by about 1%.
--*/
szptr = (UInt16*)zptr;
}
/*---------------------------------------------*/
void setDecompressStructureSizes ( Int32 newSize100k )
{
if (! (0 <= newSize100k && newSize100k <= 9 &&
0 <= blockSize100k && blockSize100k <= 9))
panic ( "setDecompressStructureSizes" );
if (newSize100k == blockSize100k) return;
blockSize100k = newSize100k;
if (ll16 != NULL) free ( ll16 );
if (ll4 != NULL) free ( ll4 );
if (ll8 != NULL) free ( ll8 );
if (tt != NULL) free ( tt );
if (newSize100k == 0) return;
if (smallMode) {
Int32 n = 100000 * newSize100k;
ll16 = malloc ( n * sizeof(UInt16) );
ll4 = malloc ( ((n+1) >> 1) * sizeof(UChar) );
if (ll4 == NULL || ll16 == NULL) {
Int32 totalDraw
= n * sizeof(Int16) + ((n+1) >> 1) * sizeof(UChar);
uncompressOutOfMemory ( totalDraw, n );
}
} else {
Int32 n = 100000 * newSize100k;
ll8 = malloc ( n * sizeof(UChar) );
tt = malloc ( n * sizeof(Int32) );
if (ll8 == NULL || tt == NULL) {
Int32 totalDraw
= n * sizeof(UChar) + n * sizeof(UInt32);
uncompressOutOfMemory ( totalDraw, n );
}
}
}
/*---------------------------------------------------*/
/*--- The new back end ---*/
/*---------------------------------------------------*/
/*---------------------------------------------*/
void makeMaps ( void )
{
Int32 i;
nInUse = 0;
for (i = 0; i < 256; i++)
if (inUse[i]) {
seqToUnseq[nInUse] = i;
unseqToSeq[i] = nInUse;
nInUse++;
}
}
/*---------------------------------------------*/
void generateMTFValues ( void )
{
UChar yy[256];
Int32 i, j;
UChar tmp;
UChar tmp2;
Int32 zPend;
Int32 wr;
Int32 EOB;
makeMaps();
EOB = nInUse+1;
for (i = 0; i <= EOB; i++) mtfFreq[i] = 0;
wr = 0;
zPend = 0;
for (i = 0; i < nInUse; i++) yy[i] = (UChar) i;
for (i = 0; i <= last; i++) {
UChar ll_i;
#if DEBUG
assert (wr <= i);
#endif
ll_i = unseqToSeq[block[zptr[i] - 1]];
#if DEBUG
assert (ll_i < nInUse);
#endif
j = 0;
tmp = yy[j];
while ( ll_i != tmp ) {
j++;
tmp2 = tmp;
tmp = yy[j];
yy[j] = tmp2;
};
yy[0] = tmp;
if (j == 0) {
zPend++;
} else {
if (zPend > 0) {
zPend--;
while (True) {
switch (zPend % 2) {
case 0: szptr[wr] = RUNA; wr++; mtfFreq[RUNA]++; break;
case 1: szptr[wr] = RUNB; wr++; mtfFreq[RUNB]++; break;
};
if (zPend < 2) break;
zPend = (zPend - 2) / 2;
};
zPend = 0;
}
szptr[wr] = j+1; wr++; mtfFreq[j+1]++;
}
}
if (zPend > 0) {
zPend--;
while (True) {
switch (zPend % 2) {
case 0: szptr[wr] = RUNA; wr++; mtfFreq[RUNA]++; break;
case 1: szptr[wr] = RUNB; wr++; mtfFreq[RUNB]++; break;
};
if (zPend < 2) break;
zPend = (zPend - 2) / 2;
};
}
szptr[wr] = EOB; wr++; mtfFreq[EOB]++;
nMTF = wr;
}
/*---------------------------------------------*/
#define LESSER_ICOST 0
#define GREATER_ICOST 15
void sendMTFValues ( void )
{
Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
Int32 nGroups, nBytes;
/*--
UChar len [N_GROUPS][MAX_ALPHA_SIZE];
is a global since the decoder also needs it.
Int32 code[N_GROUPS][MAX_ALPHA_SIZE];
Int32 rfreq[N_GROUPS][MAX_ALPHA_SIZE];
are also globals only used in this proc.
Made global to keep stack frame size small.
--*/
UInt16 cost[N_GROUPS];
Int32 fave[N_GROUPS];
if (verbosity >= 3)
fprintf ( stderr,
" %d in block, %d after MTF & 1-2 coding, %d+2 syms in use\n",
last+1, nMTF, nInUse );
alphaSize = nInUse+2;
for (t = 0; t < N_GROUPS; t++)
for (v = 0; v < alphaSize; v++)
len[t][v] = GREATER_ICOST;
/*--- Decide how many coding tables to use ---*/
if (nMTF <= 0) panic ( "sendMTFValues(0)" );
if (nMTF < 200) nGroups = 2; else
if (nMTF < 800) nGroups = 4; else
nGroups = 6;
/*--- Generate an initial set of coding tables ---*/
{
Int32 nPart, remF, tFreq, aFreq;
nPart = nGroups;
remF = nMTF;
gs = 0;
while (nPart > 0) {
tFreq = remF / nPart;
ge = gs-1;
aFreq = 0;
while (aFreq < tFreq && ge < alphaSize-1) {
ge++;
aFreq += mtfFreq[ge];
}
if (ge > gs
&& nPart != nGroups && nPart != 1
&& ((nGroups-nPart) % 2 == 1)) {
aFreq -= mtfFreq[ge];
ge--;
}
if (verbosity >= 3)
fprintf ( stderr,
" initial group %d, [%d .. %d], has %d syms (%4.1f%%)\n",
nPart, gs, ge, aFreq,
(100.0 * (float)aFreq) / (float)nMTF );
for (v = 0; v < alphaSize; v++)
if (v >= gs && v <= ge)
len[nPart-1][v] = LESSER_ICOST; else
len[nPart-1][v] = GREATER_ICOST;
nPart--;
gs = ge+1;
remF -= aFreq;
}
}
/*---
Iterate up to N_ITERS times to improve the tables.
---*/
for (iter = 0; iter < N_ITERS; iter++) {
for (t = 0; t < nGroups; t++) fave[t] = 0;
for (t = 0; t < nGroups; t++)
for (v = 0; v < alphaSize; v++)
rfreq[t][v] = 0;
nSelectors = 0;
totc = 0;
gs = 0;
while (True) {
/*--- Set group start & end marks. --*/
if (gs >= nMTF) break;
ge = gs + G_SIZE - 1;
if (ge >= nMTF) ge = nMTF-1;
/*--
Calculate the cost of this group as coded
by each of the coding tables.
--*/
for (t = 0; t < nGroups; t++) cost[t] = 0;
if (nGroups == 6) {
register UInt16 cost0, cost1, cost2, cost3, cost4, cost5;
cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
for (i = gs; i <= ge; i++) {
UInt16 icv = szptr[i];
cost0 += len[0][icv];
cost1 += len[1][icv];
cost2 += len[2][icv];
cost3 += len[3][icv];
cost4 += len[4][icv];
cost5 += len[5][icv];
}
cost[0] = cost0; cost[1] = cost1; cost[2] = cost2;
cost[3] = cost3; cost[4] = cost4; cost[5] = cost5;
} else {
for (i = gs; i <= ge; i++) {
UInt16 icv = szptr[i];
for (t = 0; t < nGroups; t++) cost[t] += len[t][icv];
}
}
/*--
Find the coding table which is best for this group,
and record its identity in the selector table.
--*/
bc = 999999999; bt = -1;
for (t = 0; t < nGroups; t++)
if (cost[t] < bc) { bc = cost[t]; bt = t; };
totc += bc;
fave[bt]++;
selector[nSelectors] = bt;
nSelectors++;
/*--
Increment the symbol frequencies for the selected table.
--*/
for (i = gs; i <= ge; i++)
rfreq[bt][ szptr[i] ]++;
gs = ge+1;
}
if (verbosity >= 3) {
fprintf ( stderr,
" pass %d: size is %d, grp uses are ",
iter+1, totc/8 );
for (t = 0; t < nGroups; t++)
fprintf ( stderr, "%d ", fave[t] );
fprintf ( stderr, "\n" );
}
/*--
Recompute the tables based on the accumulated frequencies.
--*/
for (t = 0; t < nGroups; t++)
hbMakeCodeLengths ( &len[t][0], &rfreq[t][0], alphaSize, 20 );
}
if (!(nGroups < 8)) panic ( "sendMTFValues(1)" );
if (!(nSelectors < 32768 &&
nSelectors <= (2 + (900000 / G_SIZE))))
panic ( "sendMTFValues(2)" );
/*--- Compute MTF values for the selectors. ---*/
{
UChar pos[N_GROUPS], ll_i, tmp2, tmp;
for (i = 0; i < nGroups; i++) pos[i] = i;
for (i = 0; i < nSelectors; i++) {
ll_i = selector[i];
j = 0;
tmp = pos[j];
while ( ll_i != tmp ) {
j++;
tmp2 = tmp;
tmp = pos[j];
pos[j] = tmp2;
};
pos[0] = tmp;
selectorMtf[i] = j;
}
};
/*--- Assign actual codes for the tables. --*/
for (t = 0; t < nGroups; t++) {
minLen = 32;
maxLen = 0;
for (i = 0; i < alphaSize; i++) {
if (len[t][i] > maxLen) maxLen = len[t][i];
if (len[t][i] < minLen) minLen = len[t][i];
}
if (maxLen > 20) panic ( "sendMTFValues(3)" );
if (minLen < 1) panic ( "sendMTFValues(4)" );
hbAssignCodes ( &code[t][0], &len[t][0],
minLen, maxLen, alphaSize );
}
/*--- Transmit the mapping table. ---*/
{
Bool inUse16[16];
for (i = 0; i < 16; i++) {
inUse16[i] = False;
for (j = 0; j < 16; j++)
if (inUse[i * 16 + j]) inUse16[i] = True;
}
nBytes = bytesOut;
for (i = 0; i < 16; i++)
if (inUse16[i]) bsW(1,1); else bsW(1,0);
for (i = 0; i < 16; i++)
if (inUse16[i])
for (j = 0; j < 16; j++)
if (inUse[i * 16 + j]) bsW(1,1); else bsW(1,0);
if (verbosity >= 3)
fprintf ( stderr, " bytes: mapping %d, ", bytesOut-nBytes );
}
/*--- Now the selectors. ---*/
nBytes = bytesOut;
bsW ( 3, nGroups );
bsW ( 15, nSelectors );
for (i = 0; i < nSelectors; i++) {
for (j = 0; j < selectorMtf[i]; j++) bsW(1,1);
bsW(1,0);
}
if (verbosity >= 3)
fprintf ( stderr, "selectors %d, ", bytesOut-nBytes );
/*--- Now the coding tables. ---*/
nBytes = bytesOut;
for (t = 0; t < nGroups; t++) {
Int32 curr = len[t][0];
bsW ( 5, curr );
for (i = 0; i < alphaSize; i++) {
while (curr < len[t][i]) { bsW(2,2); curr++; /* 10 */ };
while (curr > len[t][i]) { bsW(2,3); curr--; /* 11 */ };
bsW ( 1, 0 );
}
}
if (verbosity >= 3)
fprintf ( stderr, "code lengths %d, ", bytesOut-nBytes );
/*--- And finally, the block data proper ---*/
nBytes = bytesOut;
selCtr = 0;
gs = 0;
while (True) {
if (gs >= nMTF) break;
ge = gs + G_SIZE - 1;
if (ge >= nMTF) ge = nMTF-1;
for (i = gs; i <= ge; i++) {
#if DEBUG
assert (selector[selCtr] < nGroups);
#endif
bsW ( len [selector[selCtr]] [szptr[i]],
code [selector[selCtr]] [szptr[i]] );
}
gs = ge+1;
selCtr++;
}
if (!(selCtr == nSelectors)) panic ( "sendMTFValues(5)" );
if (verbosity >= 3)
fprintf ( stderr, "codes %d\n", bytesOut-nBytes );
}
/*---------------------------------------------*/
void moveToFrontCodeAndSend ( void )
{
bsPutIntVS ( 24, origPtr );
generateMTFValues();
sendMTFValues();
}
/*---------------------------------------------*/
void recvDecodingTables ( void )
{
Int32 i, j, t, nGroups, nSelectors, alphaSize;
Int32 minLen, maxLen;
Bool inUse16[16];
/*--- Receive the mapping table ---*/
for (i = 0; i < 16; i++)
if (bsR(1) == 1)
inUse16[i] = True; else
inUse16[i] = False;
for (i = 0; i < 256; i++) inUse[i] = False;
for (i = 0; i < 16; i++)
if (inUse16[i])
for (j = 0; j < 16; j++)
if (bsR(1) == 1) inUse[i * 16 + j] = True;
makeMaps();
alphaSize = nInUse+2;
/*--- Now the selectors ---*/
nGroups = bsR ( 3 );
nSelectors = bsR ( 15 );
for (i = 0; i < nSelectors; i++) {
j = 0;
while (bsR(1) == 1) j++;
selectorMtf[i] = j;
}
/*--- Undo the MTF values for the selectors. ---*/
{
UChar pos[N_GROUPS], tmp, v;
for (v = 0; v < nGroups; v++) pos[v] = v;
for (i = 0; i < nSelectors; i++) {
v = selectorMtf[i];
tmp = pos[v];
while (v > 0) { pos[v] = pos[v-1]; v--; }
pos[0] = tmp;
selector[i] = tmp;
}
}
/*--- Now the coding tables ---*/
for (t = 0; t < nGroups; t++) {
Int32 curr = bsR ( 5 );
for (i = 0; i < alphaSize; i++) {
while (bsR(1) == 1) {
if (bsR(1) == 0) curr++; else curr--;
}
len[t][i] = curr;
}
}
/*--- Create the Huffman decoding tables ---*/
for (t = 0; t < nGroups; t++) {
minLen = 32;
maxLen = 0;
for (i = 0; i < alphaSize; i++) {
if (len[t][i] > maxLen) maxLen = len[t][i];
if (len[t][i] < minLen) minLen = len[t][i];
}
hbCreateDecodeTables (
&limit[t][0], &base[t][0], &perm[t][0], &len[t][0],
minLen, maxLen, alphaSize
);
minLens[t] = minLen;
}
}
/*---------------------------------------------*/
#define GET_MTF_VAL(lval) \
{ \
Int32 zt, zn, zvec, zj; \
if (groupPos == 0) { \
groupNo++; \
groupPos = G_SIZE; \
} \
groupPos--; \
zt = selector[groupNo]; \
zn = minLens[zt]; \
zvec = bsR ( zn ); \
while (zvec > limit[zt][zn]) { \
zn++; bsR1(zj); \
zvec = (zvec << 1) | zj; \
}; \
lval = perm[zt][zvec - base[zt][zn]]; \
}
/*---------------------------------------------*/
void getAndMoveToFrontDecode ( void )
{
UChar yy[256];
Int32 i, j, nextSym, limitLast;
Int32 EOB, groupNo, groupPos;
limitLast = 100000 * blockSize100k;
origPtr = bsGetIntVS ( 24 );
recvDecodingTables();
EOB = nInUse+1;
groupNo = -1;
groupPos = 0;
/*--
Setting up the unzftab entries here is not strictly
necessary, but it does save having to do it later
in a separate pass, and so saves a block's worth of
cache misses.
--*/
for (i = 0; i <= 255; i++) unzftab[i] = 0;
for (i = 0; i <= 255; i++) yy[i] = (UChar) i;
last = -1;
GET_MTF_VAL(nextSym);
while (True) {
if (nextSym == EOB) break;
if (nextSym == RUNA || nextSym == RUNB) {
UChar ch;
Int32 s = -1;
Int32 N = 1;
do {
if (nextSym == RUNA) s = s + (0+1) * N; else
if (nextSym == RUNB) s = s + (1+1) * N;
N = N * 2;
GET_MTF_VAL(nextSym);
}
while (nextSym == RUNA || nextSym == RUNB);
s++;
ch = seqToUnseq[yy[0]];
unzftab[ch] += s;
if (smallMode)
while (s > 0) {
last++;
ll16[last] = ch;
s--;
}
else
while (s > 0) {
last++;
ll8[last] = ch;
s--;
};
if (last >= limitLast) blockOverrun();
continue;
} else {
UChar tmp;
last++; if (last >= limitLast) blockOverrun();
tmp = yy[nextSym-1];
unzftab[seqToUnseq[tmp]]++;
if (smallMode)
ll16[last] = seqToUnseq[tmp]; else
ll8[last] = seqToUnseq[tmp];
/*--
This loop is hammered during decompression,
hence the unrolling.
for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1];
--*/
j = nextSym-1;
for (; j > 3; j -= 4) {
yy[j] = yy[j-1];
yy[j-1] = yy[j-2];
yy[j-2] = yy[j-3];
yy[j-3] = yy[j-4];
}
for (; j > 0; j--) yy[j] = yy[j-1];
yy[0] = tmp;
GET_MTF_VAL(nextSym);
continue;
}
}
}
/*---------------------------------------------------*/
/*--- Block-sorting machinery ---*/
/*---------------------------------------------------*/
/*---------------------------------------------*/
/*--
Compare two strings in block. We assume (see
discussion above) that i1 and i2 have a max
offset of 10 on entry, and that the first
bytes of both block and quadrant have been
copied into the "overshoot area", ie
into the subscript range
[last+1 .. last+NUM_OVERSHOOT_BYTES].
--*/
INLINE Bool fullGtU ( Int32 i1, Int32 i2 )
{
Int32 k;
UChar c1, c2;
UInt16 s1, s2;
#if DEBUG
/*--
shellsort shouldn't ask to compare
something with itself.
--*/
assert (i1 != i2);
#endif
c1 = block[i1];
c2 = block[i2];
if (c1 != c2) return (c1 > c2);
i1++; i2++;
c1 = block[i1];
c2 = block[i2];
if (c1 != c2) return (c1 > c2);
i1++; i2++;
c1 = block[i1];
c2 = block[i2];
if (c1 != c2) return (c1 > c2);
i1++; i2++;
c1 = block[i1];
c2 = block[i2];
if (c1 != c2) return (c1 > c2);
i1++; i2++;
c1 = block[i1];
c2 = block[i2];
if (c1 != c2) return (c1 > c2);
i1++; i2++;
c1 = block[i1];
c2 = block[i2];
if (c1 != c2) return (c1 > c2);
i1++; i2++;
k = last + 1;
do {
c1 = block[i1];
c2 = block[i2];
if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1];
s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
i1++; i2++;
c1 = block[i1];
c2 = block[i2];
if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1];
s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
i1++; i2++;
c1 = block[i1];
c2 = block[i2];
if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1];
s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
i1++; i2++;
c1 = block[i1];
c2 = block[i2];
if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1];
s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
i1++; i2++;
if (i1 > last) { i1 -= last; i1--; };
if (i2 > last) { i2 -= last; i2--; };
k -= 4;
workDone++;
}
while (k >= 0);
return False;
}
/*---------------------------------------------*/
/*--
Knuth's increments seem to work better
than Incerpi-Sedgewick here. Possibly
because the number of elems to sort is
usually small, typically <= 20.
--*/
Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
9841, 29524, 88573, 265720,
797161, 2391484 };
void simpleSort ( Int32 lo, Int32 hi, Int32 d )
{
Int32 i, j, h, bigN, hp;
Int32 v;
bigN = hi - lo + 1;
if (bigN < 2) return;
hp = 0;
while (incs[hp] < bigN) hp++;
hp--;
for (; hp >= 0; hp--) {
h = incs[hp];
if (verbosity >= 5)
fprintf ( stderr, " shell increment %d\n", h );
i = lo + h;
while (True) {
/*-- copy 1 --*/
if (i > hi) break;
v = zptr[i];
j = i;
while ( fullGtU ( zptr[j-h]+d, v+d ) ) {
zptr[j] = zptr[j-h];
j = j - h;
if (j <= (lo + h - 1)) break;
}
zptr[j] = v;
i++;
/*-- copy 2 --*/
if (i > hi) break;
v = zptr[i];
j = i;
while ( fullGtU ( zptr[j-h]+d, v+d ) ) {
zptr[j] = zptr[j-h];
j = j - h;
if (j <= (lo + h - 1)) break;
}
zptr[j] = v;
i++;
/*-- copy 3 --*/
if (i > hi) break;
v = zptr[i];
j = i;
while ( fullGtU ( zptr[j-h]+d, v+d ) ) {
zptr[j] = zptr[j-h];
j = j - h;
if (j <= (lo + h - 1)) break;
}
zptr[j] = v;
i++;
if (workDone > workLimit && firstAttempt) return;
}
}
}
/*---------------------------------------------*/
/*--
The following is an implementation of
an elegant 3-way quicksort for strings,
described in a paper "Fast Algorithms for
Sorting and Searching Strings", by Robert
Sedgewick and Jon L. Bentley.
--*/
#define swap(lv1, lv2) \
{ Int32 tmp = lv1; lv1 = lv2; lv2 = tmp; }
INLINE void vswap ( Int32 p1, Int32 p2, Int32 n )
{
while (n > 0) {
swap(zptr[p1], zptr[p2]);
p1++; p2++; n--;
}
}
INLINE UChar med3 ( UChar a, UChar b, UChar c )
{
UChar t;
if (a > b) { t = a; a = b; b = t; };
if (b > c) { t = b; b = c; c = t; };
if (a > b) b = a;
return b;
}
#define min(a,b) ((a) < (b)) ? (a) : (b)
typedef
struct { Int32 ll; Int32 hh; Int32 dd; }
StackElem;
#define push(lz,hz,dz) { stack[sp].ll = lz; \
stack[sp].hh = hz; \
stack[sp].dd = dz; \
sp++; }
#define pop(lz,hz,dz) { sp--; \
lz = stack[sp].ll; \
hz = stack[sp].hh; \
dz = stack[sp].dd; }
#define SMALL_THRESH 20
#define DEPTH_THRESH 10
/*--
If you are ever unlucky/improbable enough
to get a stack overflow whilst sorting,
increase the following constant and try
again. In practice I have never seen the
stack go above 27 elems, so the following
limit seems very generous.
--*/
#define QSORT_STACK_SIZE 1000
void qSort3 ( Int32 loSt, Int32 hiSt, Int32 dSt )
{
Int32 unLo, unHi, ltLo, gtHi, med, n, m;
Int32 sp, lo, hi, d;
StackElem stack[QSORT_STACK_SIZE];
sp = 0;
push ( loSt, hiSt, dSt );
while (sp > 0) {
if (sp >= QSORT_STACK_SIZE) panic ( "stack overflow in qSort3" );
pop ( lo, hi, d );
if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) {
simpleSort ( lo, hi, d );
if (workDone > workLimit && firstAttempt) return;
continue;
}
med = med3 ( block[zptr[ lo ]+d],
block[zptr[ hi ]+d],
block[zptr[ (lo+hi)>>1 ]+d] );
unLo = ltLo = lo;
unHi = gtHi = hi;
while (True) {
while (True) {
if (unLo > unHi) break;
n = ((Int32)block[zptr[unLo]+d]) - med;
if (n == 0) { swap(zptr[unLo], zptr[ltLo]); ltLo++; unLo++; continue; };
if (n > 0) break;
unLo++;
}
while (True) {
if (unLo > unHi) break;
n = ((Int32)block[zptr[unHi]+d]) - med;
if (n == 0) { swap(zptr[unHi], zptr[gtHi]); gtHi--; unHi--; continue; };
if (n < 0) break;
unHi--;
}
if (unLo > unHi) break;
swap(zptr[unLo], zptr[unHi]); unLo++; unHi--;
}
#if DEBUG
assert (unHi == unLo-1);
#endif
if (gtHi < ltLo) {
push(lo, hi, d+1 );
continue;
}
n = min(ltLo-lo, unLo-ltLo); vswap(lo, unLo-n, n);
m = min(hi-gtHi, gtHi-unHi); vswap(unLo, hi-m+1, m);
n = lo + unLo - ltLo - 1;
m = hi - (gtHi - unHi) + 1;
push ( lo, n, d );
push ( n+1, m-1, d+1 );
push ( m, hi, d );
}
}
/*---------------------------------------------*/
#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
#define SETMASK (1 << 21)
#define CLEARMASK (~(SETMASK))
void sortIt ( void )
{
Int32 i, j, ss, sb;
Int32 runningOrder[256];
Int32 copy[256];
Bool bigDone[256];
UChar c1, c2;
Int32 numQSorted;
/*--
In the various block-sized structures, live data runs
from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First,
set up the overshoot area for block.
--*/
if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" );
for (i = 0; i < NUM_OVERSHOOT_BYTES; i++)
block[last+i+1] = block[i % (last+1)];
for (i = 0; i <= last+NUM_OVERSHOOT_BYTES; i++)
quadrant[i] = 0;
block[-1] = block[last];
if (last < 4000) {
/*--
Use simpleSort(), since the full sorting mechanism
has quite a large constant overhead.
--*/
if (verbosity >= 4) fprintf ( stderr, " simpleSort ...\n" );
for (i = 0; i <= last; i++) zptr[i] = i;
firstAttempt = False;
workDone = workLimit = 0;
simpleSort ( 0, last, 0 );
if (verbosity >= 4) fprintf ( stderr, " simpleSort done.\n" );
} else {
numQSorted = 0;
for (i = 0; i <= 255; i++) bigDone[i] = False;
if (verbosity >= 4) fprintf ( stderr, " bucket sorting ...\n" );
for (i = 0; i <= 65536; i++) ftab[i] = 0;
c1 = block[-1];
for (i = 0; i <= last; i++) {
c2 = block[i];
ftab[(c1 << 8) + c2]++;
c1 = c2;
}
for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
c1 = block[0];
for (i = 0; i < last; i++) {
c2 = block[i+1];
j = (c1 << 8) + c2;
c1 = c2;
ftab[j]--;
zptr[ftab[j]] = i;
}
j = (block[last] << 8) + block[0];
ftab[j]--;
zptr[ftab[j]] = last;
/*--
Now ftab contains the first loc of every small bucket.
Calculate the running order, from smallest to largest
big bucket.
--*/
for (i = 0; i <= 255; i++) runningOrder[i] = i;
{
Int32 vv;
Int32 h = 1;
do h = 3 * h + 1; while (h <= 256);
do {
h = h / 3;
for (i = h; i <= 255; i++) {
vv = runningOrder[i];
j = i;
while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
runningOrder[j] = runningOrder[j-h];
j = j - h;
if (j <= (h - 1)) goto zero;
}
zero:
runningOrder[j] = vv;
}
} while (h != 1);
}
/*--
The main sorting loop.
--*/
for (i = 0; i <= 255; i++) {
/*--
Process big buckets, starting with the least full.
--*/
ss = runningOrder[i];
/*--
Complete the big bucket [ss] by quicksorting
any unsorted small buckets [ss, j]. Hopefully
previous pointer-scanning phases have already
completed many of the small buckets [ss, j], so
we don't have to sort them at all.
--*/
for (j = 0; j <= 255; j++) {
sb = (ss << 8) + j;
if ( ! (ftab[sb] & SETMASK) ) {
Int32 lo = ftab[sb] & CLEARMASK;
Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
if (hi > lo) {
if (verbosity >= 4)
fprintf ( stderr,
" qsort [0x%x, 0x%x] done %d this %d\n",
ss, j, numQSorted, hi - lo + 1 );
qSort3 ( lo, hi, 2 );
numQSorted += ( hi - lo + 1 );
if (workDone > workLimit && firstAttempt) return;
}
ftab[sb] |= SETMASK;
}
}
/*--
The ss big bucket is now done. Record this fact,
and update the quadrant descriptors. Remember to
update quadrants in the overshoot area too, if
necessary. The "if (i < 255)" test merely skips
this updating for the last bucket processed, since
updating for the last bucket is pointless.
--*/
bigDone[ss] = True;
if (i < 255) {
Int32 bbStart = ftab[ss << 8] & CLEARMASK;
Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
Int32 shifts = 0;
while ((bbSize >> shifts) > 65534) shifts++;
for (j = 0; j < bbSize; j++) {
Int32 a2update = zptr[bbStart + j];
UInt16 qVal = (UInt16)(j >> shifts);
quadrant[a2update] = qVal;
if (a2update < NUM_OVERSHOOT_BYTES)
quadrant[a2update + last + 1] = qVal;
}
if (! ( ((bbSize-1) >> shifts) <= 65535 )) panic ( "sortIt" );
}
/*--
Now scan this big bucket so as to synthesise the
sorted order for small buckets [t, ss] for all t != ss.
--*/
for (j = 0; j <= 255; j++)
copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
for (j = ftab[ss << 8] & CLEARMASK;
j < (ftab[(ss+1) << 8] & CLEARMASK);
j++) {
c1 = block[zptr[j]-1];
if ( ! bigDone[c1] ) {
zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1;
copy[c1] ++;
}
}
for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
}
if (verbosity >= 4)
fprintf ( stderr, " %d pointers, %d sorted, %d scanned\n",
last+1, numQSorted, (last+1) - numQSorted );
}
}
/*---------------------------------------------------*/
/*--- Stuff for randomising repetitive blocks ---*/
/*---------------------------------------------------*/
/*---------------------------------------------*/
Int32 rNums[512] = {
619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
985, 724, 205, 454, 863, 491, 741, 242, 949, 214,
733, 859, 335, 708, 621, 574, 73, 654, 730, 472,
419, 436, 278, 496, 867, 210, 399, 680, 480, 51,
878, 465, 811, 169, 869, 675, 611, 697, 867, 561,
862, 687, 507, 283, 482, 129, 807, 591, 733, 623,
150, 238, 59, 379, 684, 877, 625, 169, 643, 105,
170, 607, 520, 932, 727, 476, 693, 425, 174, 647,
73, 122, 335, 530, 442, 853, 695, 249, 445, 515,
909, 545, 703, 919, 874, 474, 882, 500, 594, 612,
641, 801, 220, 162, 819, 984, 589, 513, 495, 799,
161, 604, 958, 533, 221, 400, 386, 867, 600, 782,
382, 596, 414, 171, 516, 375, 682, 485, 911, 276,
98, 553, 163, 354, 666, 933, 424, 341, 533, 870,
227, 730, 475, 186, 263, 647, 537, 686, 600, 224,
469, 68, 770, 919, 190, 373, 294, 822, 808, 206,
184, 943, 795, 384, 383, 461, 404, 758, 839, 887,
715, 67, 618, 276, 204, 918, 873, 777, 604, 560,
951, 160, 578, 722, 79, 804, 96, 409, 713, 940,
652, 934, 970, 447, 318, 353, 859, 672, 112, 785,
645, 863, 803, 350, 139, 93, 354, 99, 820, 908,
609, 772, 154, 274, 580, 184, 79, 626, 630, 742,
653, 282, 762, 623, 680, 81, 927, 626, 789, 125,
411, 521, 938, 300, 821, 78, 343, 175, 128, 250,
170, 774, 972, 275, 999, 639, 495, 78, 352, 126,
857, 956, 358, 619, 580, 124, 737, 594, 701, 612,
669, 112, 134, 694, 363, 992, 809, 743, 168, 974,
944, 375, 748, 52, 600, 747, 642, 182, 862, 81,
344, 805, 988, 739, 511, 655, 814, 334, 249, 515,
897, 955, 664, 981, 649, 113, 974, 459, 893, 228,
433, 837, 553, 268, 926, 240, 102, 654, 459, 51,
686, 754, 806, 760, 493, 403, 415, 394, 687, 700,
946, 670, 656, 610, 738, 392, 760, 799, 887, 653,
978, 321, 576, 617, 626, 502, 894, 679, 243, 440,
680, 879, 194, 572, 640, 724, 926, 56, 204, 700,
707, 151, 457, 449, 797, 195, 791, 558, 945, 679,
297, 59, 87, 824, 713, 663, 412, 693, 342, 606,
134, 108, 571, 364, 631, 212, 174, 643, 304, 329,
343, 97, 430, 751, 497, 314, 983, 374, 822, 928,
140, 206, 73, 263, 980, 736, 876, 478, 430, 305,
170, 514, 364, 692, 829, 82, 855, 953, 676, 246,
369, 970, 294, 750, 807, 827, 150, 790, 288, 923,
804, 378, 215, 828, 592, 281, 565, 555, 710, 82,
896, 831, 547, 261, 524, 462, 293, 465, 502, 56,
661, 821, 976, 991, 658, 869, 905, 758, 745, 193,
768, 550, 608, 933, 378, 286, 215, 979, 792, 961,
61, 688, 793, 644, 986, 403, 106, 366, 905, 644,
372, 567, 466, 434, 645, 210, 389, 550, 919, 135,
780, 773, 635, 389, 707, 100, 626, 958, 165, 504,
920, 176, 193, 713, 857, 265, 203, 50, 668, 108,
645, 990, 626, 197, 510, 357, 358, 850, 858, 364,
936, 638
};
#define RAND_DECLS \
Int32 rNToGo = 0; \
Int32 rTPos = 0; \
#define RAND_MASK ((rNToGo == 1) ? 1 : 0)
#define RAND_UPD_MASK \
if (rNToGo == 0) { \
rNToGo = rNums[rTPos]; \
rTPos++; if (rTPos == 512) rTPos = 0; \
} \
rNToGo--;
/*---------------------------------------------------*/
/*--- The Reversible Transformation (tm) ---*/
/*---------------------------------------------------*/
/*---------------------------------------------*/
void randomiseBlock ( void )
{
Int32 i;
RAND_DECLS;
for (i = 0; i < 256; i++) inUse[i] = False;
for (i = 0; i <= last; i++) {
RAND_UPD_MASK;
block[i] ^= RAND_MASK;
inUse[block[i]] = True;
}
}
/*---------------------------------------------*/
void doReversibleTransformation ( void )
{
Int32 i;
if (verbosity >= 2) fprintf ( stderr, "\n" );
workLimit = workFactor * last;
workDone = 0;
blockRandomised = False;
firstAttempt = True;
sortIt ();
if (verbosity >= 3)
fprintf ( stderr, " %d work, %d block, ratio %5.2f\n",
workDone, last, (float)workDone / (float)(last) );
if (workDone > workLimit && firstAttempt) {
if (verbosity >= 2)
fprintf ( stderr, " sorting aborted; randomising block\n" );
randomiseBlock ();
workLimit = workDone = 0;
blockRandomised = True;
firstAttempt = False;
sortIt();
if (verbosity >= 3)
fprintf ( stderr, " %d work, %d block, ratio %f\n",
workDone, last, (float)workDone / (float)(last) );
}
origPtr = -1;
for (i = 0; i <= last; i++)
if (zptr[i] == 0)
{ origPtr = i; break; };
if (origPtr == -1) panic ( "doReversibleTransformation" );
}
/*---------------------------------------------*/
INLINE Int32 indexIntoF ( Int32 indx, Int32 *cftab )
{
Int32 nb, na, mid;
nb = 0;
na = 256;
do {
mid = (nb + na) >> 1;
if (indx >= cftab[mid]) nb = mid; else na = mid;
}
while (na - nb != 1);
return nb;
}
#define GET_SMALL(cccc) \
\
cccc = indexIntoF ( tPos, cftab ); \
tPos = GET_LL(tPos);
void undoReversibleTransformation_small ( FILE* dst )
{
Int32 cftab[257], cftabAlso[257];
Int32 i, j, tmp, tPos;
UChar ch;
/*--
We assume here that the global array unzftab will
already be holding the frequency counts for
ll8[0 .. last].
--*/
/*-- Set up cftab to facilitate generation of indexIntoF --*/
cftab[0] = 0;
for (i = 1; i <= 256; i++) cftab[i] = unzftab[i-1];
for (i = 1; i <= 256; i++) cftab[i] += cftab[i-1];
/*-- Make a copy of it, used in generation of T --*/
for (i = 0; i <= 256; i++) cftabAlso[i] = cftab[i];
/*-- compute the T vector --*/
for (i = 0; i <= last; i++) {
ch = (UChar)ll16[i];
SET_LL(i, cftabAlso[ch]);
cftabAlso[ch]++;
}
/*--
Compute T^(-1) by pointer reversal on T. This is rather
subtle, in that, if the original block was two or more
(in general, N) concatenated copies of the same thing,
the T vector will consist of N cycles, each of length
blocksize / N, and decoding will involve traversing one
of these cycles N times. Which particular cycle doesn't
matter -- they are all equivalent. The tricky part is to
make sure that the pointer reversal creates a correct
reversed cycle for us to traverse. So, the code below
simply reverses whatever cycle origPtr happens to fall into,
without regard to the cycle length. That gives one reversed
cycle, which for normal blocks, is the entire block-size long.
For repeated blocks, it will be interspersed with the other
N-1 non-reversed cycles. Providing that the F-subscripting
phase which follows starts at origPtr, all then works ok.
--*/
i = origPtr;
j = GET_LL(i);
do {
tmp = GET_LL(j);
SET_LL(j, i);
i = j;
j = tmp;
}
while (i != origPtr);
/*--
We recreate the original by subscripting F through T^(-1).
The run-length-decoder below requires characters incrementally,
so tPos is set to a starting value, and is updated by
the GET_SMALL macro.
--*/
tPos = origPtr;
/*-------------------------------------------------*/
/*--
This is pretty much a verbatim copy of the
run-length decoder present in the distribution
bzip-0.21; it has to be here to avoid creating
block[] as an intermediary structure. As in 0.21,
this code derives from some sent to me by
Christian von Roques.
It allows dst==NULL, so as to support the test (-t)
option without slowing down the fast decompression
code.
--*/
{
IntNative retVal;
Int32 i2, count, chPrev, ch2;
UInt32 localCrc;
count = 0;
i2 = 0;
ch2 = 256; /*-- not a char and not EOF --*/
localCrc = getGlobalCRC();
{
RAND_DECLS;
while ( i2 <= last ) {
chPrev = ch2;
GET_SMALL(ch2);
if (blockRandomised) {
RAND_UPD_MASK;
ch2 ^= (UInt32)RAND_MASK;
}
i2++;
if (dst)
retVal = putc ( ch2, dst );
UPDATE_CRC ( localCrc, (UChar)ch2 );
if (ch2 != chPrev) {
count = 1;
} else {
count++;
if (count >= 4) {
Int32 j2;
UChar z;
GET_SMALL(z);
if (blockRandomised) {
RAND_UPD_MASK;
z ^= RAND_MASK;
}
for (j2 = 0; j2 < (Int32)z; j2++) {
if (dst) retVal = putc (ch2, dst);
UPDATE_CRC ( localCrc, (UChar)ch2 );
}
i2++;
count = 0;
}
}
}
}
setGlobalCRC ( localCrc );
}
/*-- end of the in-line run-length-decoder. --*/
}
#undef GET_SMALL
/*---------------------------------------------*/
#define GET_FAST(cccc) \
\
cccc = ll8[tPos]; \
tPos = tt[tPos];
void undoReversibleTransformation_fast ( FILE* dst )
{
Int32 cftab[257];
Int32 i, tPos;
UChar ch;
/*--
We assume here that the global array unzftab will
already be holding the frequency counts for
ll8[0 .. last].
--*/
/*-- Set up cftab to facilitate generation of T^(-1) --*/
cftab[0] = 0;
for (i = 1; i <= 256; i++) cftab[i] = unzftab[i-1];
for (i = 1; i <= 256; i++) cftab[i] += cftab[i-1];
/*-- compute the T^(-1) vector --*/
for (i = 0; i <= last; i++) {
ch = (UChar)ll8[i];
tt[cftab[ch]] = i;
cftab[ch]++;
}
/*--
We recreate the original by subscripting L through T^(-1).
The run-length-decoder below requires characters incrementally,
so tPos is set to a starting value, and is updated by
the GET_FAST macro.
--*/
tPos = tt[origPtr];
/*-------------------------------------------------*/
/*--
This is pretty much a verbatim copy of the
run-length decoder present in the distribution
bzip-0.21; it has to be here to avoid creating
block[] as an intermediary structure. As in 0.21,
this code derives from some sent to me by
Christian von Roques.
--*/
{
IntNative retVal;
Int32 i2, count, chPrev, ch2;
UInt32 localCrc;
count = 0;
i2 = 0;
ch2 = 256; /*-- not a char and not EOF --*/
localCrc = getGlobalCRC();
if (blockRandomised) {
RAND_DECLS;
while ( i2 <= last ) {
chPrev = ch2;
GET_FAST(ch2);
RAND_UPD_MASK;
ch2 ^= (UInt32)RAND_MASK;
i2++;
retVal = putc ( ch2, dst );
UPDATE_CRC ( localCrc, (UChar)ch2 );
if (ch2 != chPrev) {
count = 1;
} else {
count++;
if (count >= 4) {
Int32 j2;
UChar z;
GET_FAST(z);
RAND_UPD_MASK;
z ^= RAND_MASK;
for (j2 = 0; j2 < (Int32)z; j2++) {
retVal = putc (ch2, dst);
UPDATE_CRC ( localCrc, (UChar)ch2 );
}
i2++;
count = 0;
}
}
}
} else {
while ( i2 <= last ) {
chPrev = ch2;
GET_FAST(ch2);
i2++;
retVal = putc ( ch2, dst );
UPDATE_CRC ( localCrc, (UChar)ch2 );
if (ch2 != chPrev) {
count = 1;
} else {
count++;
if (count >= 4) {
Int32 j2;
UChar z;
GET_FAST(z);
for (j2 = 0; j2 < (Int32)z; j2++) {
retVal = putc (ch2, dst);
UPDATE_CRC ( localCrc, (UChar)ch2 );
}
i2++;
count = 0;
}
}
}
} /*-- if (blockRandomised) --*/
setGlobalCRC ( localCrc );
}
/*-- end of the in-line run-length-decoder. --*/
}
#undef GET_FAST
/*---------------------------------------------------*/
/*--- The block loader and RLEr ---*/
/*---------------------------------------------------*/
/*---------------------------------------------*/
/* Top 16: run length, 1 to 255.
* Lower 16: the char, or MY_EOF for EOF.
*/
#define MY_EOF 257
INLINE Int32 getRLEpair ( FILE* src )
{
Int32 runLength;
IntNative ch, chLatest;
ch = getc ( src );
/*--- Because I have no idea what kind of a value EOF is. ---*/
if (ch == EOF) {
ERROR_IF_NOT_ZERO ( ferror(src));
return (1 << 16) | MY_EOF;
}
runLength = 0;
do {
chLatest = getc ( src );
runLength++;
bytesIn++;
}
while (ch == chLatest && runLength < 255);
if ( chLatest != EOF ) {
if ( ungetc ( chLatest, src ) == EOF )
panic ( "getRLEpair: ungetc failed" );
} else {
ERROR_IF_NOT_ZERO ( ferror(src) );
}
/*--- Conditional is just a speedup hack. ---*/
if (runLength == 1) {
UPDATE_CRC ( globalCrc, (UChar)ch );
return (1 << 16) | ch;
} else {
Int32 i;
for (i = 1; i <= runLength; i++)
UPDATE_CRC ( globalCrc, (UChar)ch );
return (runLength << 16) | ch;
}
}
/*---------------------------------------------*/
void loadAndRLEsource ( FILE* src )
{
Int32 ch, allowableBlockSize, i;
last = -1;
ch = 0;
for (i = 0; i < 256; i++) inUse[i] = False;
/*--- 20 is just a paranoia constant ---*/
allowableBlockSize = 100000 * blockSize100k - 20;
while (last < allowableBlockSize && ch != MY_EOF) {
Int32 rlePair, runLen;
rlePair = getRLEpair ( src );
ch = rlePair & 0xFFFF;
runLen = (UInt32)rlePair >> 16;
#if DEBUG
assert (runLen >= 1 && runLen <= 255);
#endif
if (ch != MY_EOF) {
inUse[ch] = True;
switch (runLen) {
case 1:
last++; block[last] = (UChar)ch; break;
case 2:
last++; block[last] = (UChar)ch;
last++; block[last] = (UChar)ch; break;
case 3:
last++; block[last] = (UChar)ch;
last++; block[last] = (UChar)ch;
last++; block[last] = (UChar)ch; break;
default:
inUse[runLen-4] = True;
last++; block[last] = (UChar)ch;
last++; block[last] = (UChar)ch;
last++; block[last] = (UChar)ch;
last++; block[last] = (UChar)ch;
last++; block[last] = (UChar)(runLen-4); break;
}
}
}
}
/*---------------------------------------------------*/
/*--- Processing of complete files and streams ---*/
/*---------------------------------------------------*/
/*---------------------------------------------*/
void compressStream ( FILE *stream, FILE *zStream )
{
IntNative retVal;
UInt32 blockCRC, combinedCRC;
Int32 blockNo;
blockNo = 0;
bytesIn = 0;
bytesOut = 0;
nBlocksRandomised = 0;
SET_BINARY_MODE(stream);
SET_BINARY_MODE(zStream);
ERROR_IF_NOT_ZERO ( ferror(stream) );
ERROR_IF_NOT_ZERO ( ferror(zStream) );
bsSetStream ( zStream, True );
/*--- Write `magic' bytes B and Z,
then h indicating file-format == huffmanised,
followed by a digit indicating blockSize100k.
---*/
bsPutUChar ( 'B' );
bsPutUChar ( 'Z' );
bsPutUChar ( 'h' );
bsPutUChar ( '0' + blockSize100k );
combinedCRC = 0;
if (verbosity >= 2) fprintf ( stderr, "\n" );
while (True) {
blockNo++;
initialiseCRC ();
loadAndRLEsource ( stream );
ERROR_IF_NOT_ZERO ( ferror(stream) );
if (last == -1) break;
blockCRC = getFinalCRC ();
combinedCRC = (combinedCRC << 1) | (combinedCRC >> 31);
combinedCRC ^= blockCRC;
if (verbosity >= 2)
fprintf ( stderr, " block %d: crc = 0x%8x, combined CRC = 0x%8x, size = %d",
blockNo, blockCRC, combinedCRC, last+1 );
/*-- sort the block and establish posn of original string --*/
doReversibleTransformation ();
/*--
A 6-byte block header, the value chosen arbitrarily
as 0x314159265359 :-). A 32 bit value does not really
give a strong enough guarantee that the value will not
appear by chance in the compressed datastream. Worst-case
probability of this event, for a 900k block, is about
2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits.
For a compressed file of size 100Gb -- about 100000 blocks --
only a 48-bit marker will do. NB: normal compression/
decompression do *not* rely on these statistical properties.
They are only important when trying to recover blocks from
damaged files.
--*/
bsPutUChar ( 0x31 ); bsPutUChar ( 0x41 );
bsPutUChar ( 0x59 ); bsPutUChar ( 0x26 );
bsPutUChar ( 0x53 ); bsPutUChar ( 0x59 );
/*-- Now the block's CRC, so it is in a known place. --*/
bsPutUInt32 ( blockCRC );
/*-- Now a single bit indicating randomisation. --*/
if (blockRandomised) {
bsW(1,1); nBlocksRandomised++;
} else
bsW(1,0);
/*-- Finally, block's contents proper. --*/
moveToFrontCodeAndSend ();
ERROR_IF_NOT_ZERO ( ferror(zStream) );
}
if (verbosity >= 2 && nBlocksRandomised > 0)
fprintf ( stderr, " %d block%s needed randomisation\n",
nBlocksRandomised,
nBlocksRandomised == 1 ? "" : "s" );
/*--
Now another magic 48-bit number, 0x177245385090, to
indicate the end of the last block. (sqrt(pi), if
you want to know. I did want to use e, but it contains
too much repetition -- 27 18 28 18 28 46 -- for me
to feel statistically comfortable. Call me paranoid.)
--*/
bsPutUChar ( 0x17 ); bsPutUChar ( 0x72 );
bsPutUChar ( 0x45 ); bsPutUChar ( 0x38 );
bsPutUChar ( 0x50 ); bsPutUChar ( 0x90 );
bsPutUInt32 ( combinedCRC );
if (verbosity >= 2)
fprintf ( stderr, " final combined CRC = 0x%x\n ", combinedCRC );
/*-- Close the files in an utterly paranoid way. --*/
bsFinishedWithStream ();
ERROR_IF_NOT_ZERO ( ferror(zStream) );
retVal = fflush ( zStream );
ERROR_IF_EOF ( retVal );
retVal = fclose ( zStream );
ERROR_IF_EOF ( retVal );
ERROR_IF_NOT_ZERO ( ferror(stream) );
retVal = fclose ( stream );
ERROR_IF_EOF ( retVal );
if (bytesIn == 0) bytesIn = 1;
if (bytesOut == 0) bytesOut = 1;
if (verbosity >= 1)
fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, "
"%5.2f%% saved, %d in, %d out.\n",
(float)bytesIn / (float)bytesOut,
(8.0 * (float)bytesOut) / (float)bytesIn,
100.0 * (1.0 - (float)bytesOut / (float)bytesIn),
bytesIn,
bytesOut
);
}
/*---------------------------------------------*/
Bool uncompressStream ( FILE *zStream, FILE *stream )
{
UChar magic1, magic2, magic3, magic4;
UChar magic5, magic6;
UInt32 storedBlockCRC, storedCombinedCRC;
UInt32 computedBlockCRC, computedCombinedCRC;
Int32 currBlockNo;
IntNative retVal;
SET_BINARY_MODE(stream);
SET_BINARY_MODE(zStream);
ERROR_IF_NOT_ZERO ( ferror(stream) );
ERROR_IF_NOT_ZERO ( ferror(zStream) );
bsSetStream ( zStream, False );
/*--
A bad magic number is `recoverable from';
return with False so the caller skips the file.
--*/
magic1 = bsGetUChar ();
magic2 = bsGetUChar ();
magic3 = bsGetUChar ();
magic4 = bsGetUChar ();
if (magic1 != 'B' ||
magic2 != 'Z' ||
magic3 != 'h' ||
magic4 < '1' ||
magic4 > '9') {
bsFinishedWithStream();
retVal = fclose ( stream );
ERROR_IF_EOF ( retVal );
return False;
}
setDecompressStructureSizes ( magic4 - '0' );
computedCombinedCRC = 0;
if (verbosity >= 2) fprintf ( stderr, "\n " );
currBlockNo = 0;
while (True) {
magic1 = bsGetUChar ();
magic2 = bsGetUChar ();
magic3 = bsGetUChar ();
magic4 = bsGetUChar ();
magic5 = bsGetUChar ();
magic6 = bsGetUChar ();
if (magic1 == 0x17 && magic2 == 0x72 &&
magic3 == 0x45 && magic4 == 0x38 &&
magic5 == 0x50 && magic6 == 0x90) break;
if (magic1 != 0x31 || magic2 != 0x41 ||
magic3 != 0x59 || magic4 != 0x26 ||
magic5 != 0x53 || magic6 != 0x59) badBlockHeader();
storedBlockCRC = bsGetUInt32 ();
if (bsR(1) == 1)
blockRandomised = True; else
blockRandomised = False;
currBlockNo++;
if (verbosity >= 2)
fprintf ( stderr, "[%d: huff+mtf ", currBlockNo );
getAndMoveToFrontDecode ();
ERROR_IF_NOT_ZERO ( ferror(zStream) );
initialiseCRC();
if (verbosity >= 2) fprintf ( stderr, "rt+rld" );
if (smallMode)
undoReversibleTransformation_small ( stream );
else
undoReversibleTransformation_fast ( stream );
ERROR_IF_NOT_ZERO ( ferror(stream) );
computedBlockCRC = getFinalCRC();
if (verbosity >= 3)
fprintf ( stderr, " {0x%x, 0x%x}", storedBlockCRC, computedBlockCRC );
if (verbosity >= 2) fprintf ( stderr, "] " );
/*-- A bad CRC is considered a fatal error. --*/
if (storedBlockCRC != computedBlockCRC)
crcError ( storedBlockCRC, computedBlockCRC );
computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >> 31);
computedCombinedCRC ^= computedBlockCRC;
};
if (verbosity >= 2) fprintf ( stderr, "\n " );
storedCombinedCRC = bsGetUInt32 ();
if (verbosity >= 2)
fprintf ( stderr,
"combined CRCs: stored = 0x%x, computed = 0x%x\n ",
storedCombinedCRC, computedCombinedCRC );
if (storedCombinedCRC != computedCombinedCRC)
crcError ( storedCombinedCRC, computedCombinedCRC );
bsFinishedWithStream ();
ERROR_IF_NOT_ZERO ( ferror(zStream) );
retVal = fclose ( zStream );
ERROR_IF_EOF ( retVal );
ERROR_IF_NOT_ZERO ( ferror(stream) );
retVal = fflush ( stream );
ERROR_IF_NOT_ZERO ( retVal );
if (stream != stdout) {
retVal = fclose ( stream );
ERROR_IF_EOF ( retVal );
}
return True;
}
/*---------------------------------------------*/
Bool testStream ( FILE *zStream )
{
UChar magic1, magic2, magic3, magic4;
UChar magic5, magic6;
UInt32 storedBlockCRC, storedCombinedCRC;
UInt32 computedBlockCRC, computedCombinedCRC;
Int32 currBlockNo;
IntNative retVal;
SET_BINARY_MODE(zStream);
ERROR_IF_NOT_ZERO ( ferror(zStream) );
bsSetStream ( zStream, False );
magic1 = bsGetUChar ();
magic2 = bsGetUChar ();
magic3 = bsGetUChar ();
magic4 = bsGetUChar ();
if (magic1 != 'B' ||
magic2 != 'Z' ||
magic3 != 'h' ||
magic4 < '1' ||
magic4 > '9') {
bsFinishedWithStream();
fclose ( zStream );
fprintf ( stderr, "\n%s: bad magic number (ie, not created by bzip2)\n",
inName );
return False;
}
smallMode = True;
setDecompressStructureSizes ( magic4 - '0' );
computedCombinedCRC = 0;
if (verbosity >= 2) fprintf ( stderr, "\n" );
currBlockNo = 0;
while (True) {
magic1 = bsGetUChar ();
magic2 = bsGetUChar ();
magic3 = bsGetUChar ();
magic4 = bsGetUChar ();
magic5 = bsGetUChar ();
magic6 = bsGetUChar ();
if (magic1 == 0x17 && magic2 == 0x72 &&
magic3 == 0x45 && magic4 == 0x38 &&
magic5 == 0x50 && magic6 == 0x90) break;
currBlockNo++;
if (magic1 != 0x31 || magic2 != 0x41 ||
magic3 != 0x59 || magic4 != 0x26 ||
magic5 != 0x53 || magic6 != 0x59) {
bsFinishedWithStream();
fclose ( zStream );
fprintf ( stderr,
"\n%s, block %d: bad header (not == 0x314159265359)\n",
inName, currBlockNo );
return False;
}
storedBlockCRC = bsGetUInt32 ();
if (bsR(1) == 1)
blockRandomised = True; else
blockRandomised = False;
if (verbosity >= 2)
fprintf ( stderr, " block [%d: huff+mtf ", currBlockNo );
getAndMoveToFrontDecode ();
ERROR_IF_NOT_ZERO ( ferror(zStream) );
initialiseCRC();
if (verbosity >= 2) fprintf ( stderr, "rt+rld" );
undoReversibleTransformation_small ( NULL );
computedBlockCRC = getFinalCRC();
if (verbosity >= 3)
fprintf ( stderr, " {0x%x, 0x%x}", storedBlockCRC, computedBlockCRC );
if (verbosity >= 2) fprintf ( stderr, "] " );
if (storedBlockCRC != computedBlockCRC) {
bsFinishedWithStream();
fclose ( zStream );
fprintf ( stderr, "\n%s, block %d: computed CRC does not match stored one\n",
inName, currBlockNo );
return False;
}
if (verbosity >= 2) fprintf ( stderr, "ok\n" );
computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >> 31);
computedCombinedCRC ^= computedBlockCRC;
};
storedCombinedCRC = bsGetUInt32 ();
if (verbosity >= 2)
fprintf ( stderr,
" combined CRCs: stored = 0x%x, computed = 0x%x\n ",
storedCombinedCRC, computedCombinedCRC );
if (storedCombinedCRC != computedCombinedCRC) {
bsFinishedWithStream();
fclose ( zStream );
fprintf ( stderr, "\n%s: computed CRC does not match stored one\n",
inName );
return False;
}
bsFinishedWithStream ();
ERROR_IF_NOT_ZERO ( ferror(zStream) );
retVal = fclose ( zStream );
ERROR_IF_EOF ( retVal );
return True;
}
/*---------------------------------------------------*/
/*--- Error [non-] handling grunge ---*/
/*---------------------------------------------------*/
/*---------------------------------------------*/
void cadvise ( void )
{
fprintf (
stderr,
"\nIt is possible that the compressed file(s) have become corrupted.\n"
"You can use the -tvv option to test integrity of such files.\n\n"
"You can use the `bzip2recover' program to *attempt* to recover\n"
"data from undamaged sections of corrupted files.\n\n"
);
}
/*---------------------------------------------*/
void showFileNames ( void )
{
fprintf (
stderr,
"\tInput file = %s, output file = %s\n",
inName==NULL ? "(null)" : inName,
outName==NULL ? "(null)" : outName
);
}
/*---------------------------------------------*/
void cleanUpAndFail ( Int32 ec )
{
IntNative retVal;
if ( srcMode == SM_F2F && opMode != OM_TEST ) {
fprintf ( stderr, "%s: Deleting output file %s, if it exists.\n",
progName,
outName==NULL ? "(null)" : outName );
if (outputHandleJustInCase != NULL)
fclose ( outputHandleJustInCase );
retVal = remove ( outName );
if (retVal != 0)
fprintf ( stderr,
"%s: WARNING: deletion of output file (apparently) failed.\n",
progName );
}
if (numFileNames > 0 && numFilesProcessed < numFileNames) {
fprintf ( stderr,
"%s: WARNING: some files have not been processed:\n"
"\t%d specified on command line, %d not processed yet.\n\n",
progName, numFileNames,
numFileNames - numFilesProcessed );
}
exit ( ec );
}
/*---------------------------------------------*/
void panic ( Char* s )
{
fprintf ( stderr,
"\n%s: PANIC -- internal consistency error:\n"
"\t%s\n"
"\tThis is a BUG. Please report it to me at:\n"
"\tjseward@acm.org\n",
progName, s );
showFileNames();
cleanUpAndFail( 3 );
}
/*---------------------------------------------*/
void badBGLengths ( void )
{
fprintf ( stderr,
"\n%s: error when reading background model code lengths,\n"
"\twhich probably means the compressed file is corrupted.\n",
progName );
showFileNames();
cadvise();
cleanUpAndFail( 2 );
}
/*---------------------------------------------*/
void crcError ( UInt32 crcStored, UInt32 crcComputed )
{
fprintf ( stderr,
"\n%s: Data integrity error when decompressing.\n"
"\tStored CRC = 0x%x, computed CRC = 0x%x\n",
progName, crcStored, crcComputed );
showFileNames();
cadvise();
cleanUpAndFail( 2 );
}
/*---------------------------------------------*/
void compressedStreamEOF ( void )
{
fprintf ( stderr,
"\n%s: Compressed file ends unexpectedly;\n\t"
"perhaps it is corrupted? *Possible* reason follows.\n",
progName );
perror ( progName );
showFileNames();
cadvise();
cleanUpAndFail( 2 );
}
/*---------------------------------------------*/
void ioError ( )
{
fprintf ( stderr,
"\n%s: I/O or other error, bailing out. Possible reason follows.\n",
progName );
perror ( progName );
showFileNames();
cleanUpAndFail( 1 );
}
/*---------------------------------------------*/
void blockOverrun ()
{
fprintf ( stderr,
"\n%s: block overrun during decompression,\n"
"\twhich probably means the compressed file\n"
"\tis corrupted.\n",
progName );
showFileNames();
cadvise();
cleanUpAndFail( 2 );
}
/*---------------------------------------------*/
void badBlockHeader ()
{
fprintf ( stderr,
"\n%s: bad block header in the compressed file,\n"
"\twhich probably means it is corrupted.\n",
progName );
showFileNames();
cadvise();
cleanUpAndFail( 2 );
}
/*---------------------------------------------*/
void bitStreamEOF ()
{
fprintf ( stderr,
"\n%s: read past the end of compressed data,\n"
"\twhich probably means it is corrupted.\n",
progName );
showFileNames();
cadvise();
cleanUpAndFail( 2 );
}
/*---------------------------------------------*/
void mySignalCatcher ( IntNative n )
{
fprintf ( stderr,
"\n%s: Control-C (or similar) caught, quitting.\n",
progName );
cleanUpAndFail(1);
}
/*---------------------------------------------*/
void mySIGSEGVorSIGBUScatcher ( IntNative n )
{
if (opMode == OM_Z)
fprintf ( stderr,
"\n%s: Caught a SIGSEGV or SIGBUS whilst compressing,\n"
"\twhich probably indicates a bug in bzip2. Please\n"
"\treport it to me at: jseward@acm.org\n",
progName );
else
fprintf ( stderr,
"\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing,\n"
"\twhich probably indicates that the compressed data\n"
"\tis corrupted.\n",
progName );
showFileNames();
if (opMode == OM_Z)
cleanUpAndFail( 3 ); else
{ cadvise(); cleanUpAndFail( 2 ); }
}
/*---------------------------------------------*/
void uncompressOutOfMemory ( Int32 draw, Int32 blockSize )
{
fprintf ( stderr,
"\n%s: Can't allocate enough memory for decompression.\n"
"\tRequested %d bytes for a block size of %d.\n"
"\tTry selecting space-economic decompress (with flag -s)\n"
"\tand failing that, find a machine with more memory.\n",
progName, draw, blockSize );
showFileNames();
cleanUpAndFail(1);
}
/*---------------------------------------------*/
void compressOutOfMemory ( Int32 draw, Int32 blockSize )
{
fprintf ( stderr,
"\n%s: Can't allocate enough memory for compression.\n"
"\tRequested %d bytes for a block size of %d.\n"
"\tTry selecting a small block size (with flag -s).\n",
progName, draw, blockSize );
showFileNames();
cleanUpAndFail(1);
}
/*---------------------------------------------------*/
/*--- The main driver machinery ---*/
/*---------------------------------------------------*/
/*---------------------------------------------*/
void pad ( Char *s )
{
Int32 i;
if ( (Int32)strlen(s) >= longestFileName ) return;
for (i = 1; i <= longestFileName - (Int32)strlen(s); i++)
fprintf ( stderr, " " );
}
/*---------------------------------------------*/
Bool fileExists ( Char* name )
{
FILE *tmp = fopen ( name, "rb" );
Bool exists = (tmp != NULL);
if (tmp != NULL) fclose ( tmp );
return exists;
}
/*---------------------------------------------*/
/*--
if in doubt, return True
--*/
Bool notABogStandardFile ( Char* name )
{
IntNative i;
struct MY_STAT statBuf;
i = MY_LSTAT ( name, &statBuf );
if (i != 0) return True;
if (MY_S_IFREG(statBuf.st_mode)) return False;
return True;
}
/*---------------------------------------------*/
void copyDateAndPermissions ( Char *srcName, Char *dstName )
{
#if BZ_UNIX
IntNative retVal;
struct MY_STAT statBuf;
struct utimbuf uTimBuf;
retVal = MY_LSTAT ( srcName, &statBuf );
ERROR_IF_NOT_ZERO ( retVal );
uTimBuf.actime = statBuf.st_atime;
uTimBuf.modtime = statBuf.st_mtime;
retVal = chmod ( dstName, statBuf.st_mode );
ERROR_IF_NOT_ZERO ( retVal );
retVal = utime ( dstName, &uTimBuf );
ERROR_IF_NOT_ZERO ( retVal );
#endif
}
/*---------------------------------------------*/
Bool endsInBz2 ( Char* name )
{
Int32 n = strlen ( name );
if (n <= 4) return False;
return
(name[n-4] == '.' &&
name[n-3] == 'b' &&
name[n-2] == 'z' &&
name[n-1] == '2');
}
/*---------------------------------------------*/
Bool containsDubiousChars ( Char* name )
{
Bool cdc = False;
for (; *name != '\0'; name++)
if (*name == '?' || *name == '*') cdc = True;
return cdc;
}
/*---------------------------------------------*/
void compress ( Char *name )
{
FILE *inStr;
FILE *outStr;
if (name == NULL && srcMode != SM_I2O)
panic ( "compress: bad modes\n" );
switch (srcMode) {
case SM_I2O: strcpy ( inName, "(stdin)" );
strcpy ( outName, "(stdout)" ); break;
case SM_F2F: strcpy ( inName, name );
strcpy ( outName, name );
strcat ( outName, ".bz2" ); break;
case SM_F2O: strcpy ( inName, name );
strcpy ( outName, "(stdout)" ); break;
}
if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
fprintf ( stderr, "%s: There are no files matching `%s'.\n",
progName, inName );
return;
}
if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
progName, inName );
return;
}
if ( srcMode != SM_I2O && endsInBz2 ( inName )) {
fprintf ( stderr, "%s: Input file name %s ends in `.bz2', skipping.\n",
progName, inName );
return;
}
if ( srcMode != SM_I2O && notABogStandardFile ( inName )) {
fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
progName, inName );
return;
}
if ( srcMode == SM_F2F && fileExists ( outName ) ) {
fprintf ( stderr, "%s: Output file %s already exists, skipping.\n",
progName, outName );
return;
}
switch ( srcMode ) {
case SM_I2O:
inStr = stdin;
outStr = stdout;
if ( isatty ( fileno ( stdout ) ) ) {
fprintf ( stderr,
"%s: I won't write compressed data to a terminal.\n",
progName );
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
progName, progName );
return;
};
break;
case SM_F2O:
inStr = fopen ( inName, "rb" );
outStr = stdout;
if ( isatty ( fileno ( stdout ) ) ) {
fprintf ( stderr,
"%s: I won't write compressed data to a terminal.\n",
progName );
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
progName, progName );
return;
};
if ( inStr == NULL ) {
fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
progName, inName );
return;
};
break;
case SM_F2F:
inStr = fopen ( inName, "rb" );
outStr = fopen ( outName, "wb" );
if ( outStr == NULL) {
fprintf ( stderr, "%s: Can't create output file %s, skipping.\n",
progName, outName );
return;
}
if ( inStr == NULL ) {
fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
progName, inName );
return;
};
break;
default:
panic ( "compress: bad srcMode" );
break;
}
if (verbosity >= 1) {
fprintf ( stderr, " %s: ", inName );
pad ( inName );
fflush ( stderr );
}
/*--- Now the input and output handles are sane. Do the Biz. ---*/
outputHandleJustInCase = outStr;
compressStream ( inStr, outStr );
outputHandleJustInCase = NULL;
/*--- If there was an I/O error, we won't get here. ---*/
if ( srcMode == SM_F2F ) {
copyDateAndPermissions ( inName, outName );
if ( !keepInputFiles ) {
IntNative retVal = remove ( inName );
ERROR_IF_NOT_ZERO ( retVal );
}
}
}
/*---------------------------------------------*/
void uncompress ( Char *name )
{
FILE *inStr;
FILE *outStr;
Bool magicNumberOK;
if (name == NULL && srcMode != SM_I2O)
panic ( "uncompress: bad modes\n" );
switch (srcMode) {
case SM_I2O: strcpy ( inName, "(stdin)" );
strcpy ( outName, "(stdout)" ); break;
case SM_F2F: strcpy ( inName, name );
strcpy ( outName, name );
if (endsInBz2 ( outName ))
outName [ strlen ( outName ) - 4 ] = '\0';
break;
case SM_F2O: strcpy ( inName, name );
strcpy ( outName, "(stdout)" ); break;
}
if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
fprintf ( stderr, "%s: There are no files matching `%s'.\n",
progName, inName );
return;
}
if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
progName, inName );
return;
}
if ( srcMode != SM_I2O && !endsInBz2 ( inName )) {
fprintf ( stderr,
"%s: Input file name %s doesn't end in `.bz2', skipping.\n",
progName, inName );
return;
}
if ( srcMode != SM_I2O && notABogStandardFile ( inName )) {
fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
progName, inName );
return;
}
if ( srcMode == SM_F2F && fileExists ( outName ) ) {
fprintf ( stderr, "%s: Output file %s already exists, skipping.\n",
progName, outName );
return;
}
switch ( srcMode ) {
case SM_I2O:
inStr = stdin;
outStr = stdout;
if ( isatty ( fileno ( stdin ) ) ) {
fprintf ( stderr,
"%s: I won't read compressed data from a terminal.\n",
progName );
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
progName, progName );
return;
};
break;
case SM_F2O:
inStr = fopen ( inName, "rb" );
outStr = stdout;
if ( inStr == NULL ) {
fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
progName, inName );
return;
};
break;
case SM_F2F:
inStr = fopen ( inName, "rb" );
outStr = fopen ( outName, "wb" );
if ( outStr == NULL) {
fprintf ( stderr, "%s: Can't create output file %s, skipping.\n",
progName, outName );
return;
}
if ( inStr == NULL ) {
fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
progName, inName );
return;
};
break;
default:
panic ( "uncompress: bad srcMode" );
break;
}
if (verbosity >= 1) {
fprintf ( stderr, " %s: ", inName );
pad ( inName );
fflush ( stderr );
}
/*--- Now the input and output handles are sane. Do the Biz. ---*/
outputHandleJustInCase = outStr;
magicNumberOK = uncompressStream ( inStr, outStr );
outputHandleJustInCase = NULL;
/*--- If there was an I/O error, we won't get here. ---*/
if ( magicNumberOK ) {
if ( srcMode == SM_F2F ) {
copyDateAndPermissions ( inName, outName );
if ( !keepInputFiles ) {
IntNative retVal = remove ( inName );
ERROR_IF_NOT_ZERO ( retVal );
}
}
} else {
if ( srcMode == SM_F2F ) {
IntNative retVal = remove ( outName );
ERROR_IF_NOT_ZERO ( retVal );
}
}
if ( magicNumberOK ) {
if (verbosity >= 1)
fprintf ( stderr, "done\n" );
} else {
if (verbosity >= 1)
fprintf ( stderr, "not a bzip2 file, skipping.\n" ); else
fprintf ( stderr,
"%s: %s is not a bzip2 file, skipping.\n",
progName, inName );
}
}
/*---------------------------------------------*/
void testf ( Char *name )
{
FILE *inStr;
Bool allOK;
if (name == NULL && srcMode != SM_I2O)
panic ( "testf: bad modes\n" );
strcpy ( outName, "(none)" );
switch (srcMode) {
case SM_I2O: strcpy ( inName, "(stdin)" ); break;
case SM_F2F: strcpy ( inName, name ); break;
case SM_F2O: strcpy ( inName, name ); break;
}
if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
fprintf ( stderr, "%s: There are no files matching `%s'.\n",
progName, inName );
return;
}
if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
progName, inName );
return;
}
if ( srcMode != SM_I2O && !endsInBz2 ( inName )) {
fprintf ( stderr,
"%s: Input file name %s doesn't end in `.bz2', skipping.\n",
progName, inName );
return;
}
if ( srcMode != SM_I2O && notABogStandardFile ( inName )) {
fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
progName, inName );
return;
}
switch ( srcMode ) {
case SM_I2O:
if ( isatty ( fileno ( stdin ) ) ) {
fprintf ( stderr,
"%s: I won't read compressed data from a terminal.\n",
progName );
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
progName, progName );
return;
};
inStr = stdin;
break;
case SM_F2O: case SM_F2F:
inStr = fopen ( inName, "rb" );
if ( inStr == NULL ) {
fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
progName, inName );
return;
};
break;
default:
panic ( "testf: bad srcMode" );
break;
}
if (verbosity >= 1) {
fprintf ( stderr, " %s: ", inName );
pad ( inName );
fflush ( stderr );
}
/*--- Now the input handle is sane. Do the Biz. ---*/
allOK = testStream ( inStr );
if (allOK && verbosity >= 1) fprintf ( stderr, "ok\n" );
if (!allOK) testFailsExist = True;
}
/*---------------------------------------------*/
void license ( void )
{
fprintf ( stderr,
"bzip2, a block-sorting file compressor. "
"Version 0.1pl2, 29-Aug-97.\n"
" \n"
" Copyright (C) 1996, 1997 by Julian Seward.\n"
" \n"
" This program is free software; you can redistribute it and/or modify\n"
" it under the terms of the GNU General Public License as published by\n"
" the Free Software Foundation; either version 2 of the License, or\n"
" (at your option) any later version.\n"
" \n"
" This program is distributed in the hope that it will be useful,\n"
" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
" GNU General Public License for more details.\n"
" \n"
" You should have received a copy of the GNU General Public License\n"
" along with this program; if not, write to the Free Software\n"
" Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n"
" \n"
" The GNU General Public License is contained in the file LICENSE.\n"
" \n"
);
}
/*---------------------------------------------*/
void usage ( Char *fullProgName )
{
fprintf (
stderr,
"bzip2, a block-sorting file compressor. "
"Version 0.1pl2, 29-Aug-97.\n"
"\n usage: %s [flags and input files in any order]\n"
"\n"
" -h --help print this message\n"
" -d --decompress force decompression\n"
" -f --compress force compression\n"
" -t --test test compressed file integrity\n"
" -c --stdout output to standard out\n"
" -v --verbose be verbose (a 2nd -v gives more)\n"
" -k --keep keep (don't delete) input files\n"
" -L --license display software version & license\n"
" -V --version display software version & license\n"
" -s --small use less memory (at most 2500k)\n"
" -1 .. -9 set block size to 100k .. 900k\n"
" --repetitive-fast compress repetitive blocks faster\n"
" --repetitive-best compress repetitive blocks better\n"
"\n"
" If invoked as `bzip2', the default action is to compress.\n"
" as `bunzip2', the default action is to decompress.\n"
"\n"
" If no file names are given, bzip2 compresses or decompresses\n"
" from standard input to standard output. You can combine\n"
" flags, so `-v -4' means the same as -v4 or -4v, &c.\n"
#if BZ_UNIX
"\n"
#endif
,
fullProgName
);
}
/*---------------------------------------------*/
/*--
All the garbage from here to main() is purely to
implement a linked list of command-line arguments,
into which main() copies argv[1 .. argc-1].
The purpose of this ridiculous exercise is to
facilitate the expansion of wildcard characters
* and ? in filenames for halfwitted OSs like
MSDOS, Windows 95 and NT.
The actual Dirty Work is done by the platform-specific
macro APPEND_FILESPEC.
--*/
typedef
struct zzzz {
Char *name;
struct zzzz *link;
}
Cell;
/*---------------------------------------------*/
void *myMalloc ( Int32 n )
{
void* p;
p = malloc ( (size_t)n );
if (p == NULL) {
fprintf (
stderr,
"%s: `malloc' failed on request for %d bytes.\n",
progName, n
);
exit ( 1 );
}
return p;
}
/*---------------------------------------------*/
Cell *mkCell ( void )
{
Cell *c;
c = (Cell*) myMalloc ( sizeof ( Cell ) );
c->name = NULL;
c->link = NULL;
return c;
}
/*---------------------------------------------*/
Cell *snocString ( Cell *root, Char *name )
{
if (root == NULL) {
Cell *tmp = mkCell();
tmp->name = (Char*) myMalloc ( 5 + strlen(name) );
strcpy ( tmp->name, name );
return tmp;
} else {
Cell *tmp = root;
while (tmp->link != NULL) tmp = tmp->link;
tmp->link = snocString ( tmp->link, name );
return root;
}
}
/*---------------------------------------------*/
#define ISFLAG(s) (strcmp(aa->name, (s))==0)
IntNative main ( IntNative argc, Char *argv[] )
{
Int32 i, j;
Char *tmp;
Cell *argList;
Cell *aa;
#if DEBUG
fprintf ( stderr, "bzip2: *** compiled with debugging ON ***\n" );
#endif
/*-- Be really really really paranoid :-) --*/
if (sizeof(Int32) != 4 || sizeof(UInt32) != 4 ||
sizeof(Int16) != 2 || sizeof(UInt16) != 2 ||
sizeof(Char) != 1 || sizeof(UChar) != 1) {
fprintf ( stderr,
"bzip2: I'm not configured correctly for this platform!\n"
"\tI require Int32, Int16 and Char to have sizes\n"
"\tof 4, 2 and 1 bytes to run properly, and they don't.\n"
"\tProbably you can fix this by defining them correctly,\n"
"\tand recompiling. Bye!\n" );
exit(1);
}
/*-- Set up signal handlers --*/
signal (SIGINT, mySignalCatcher);
signal (SIGTERM, mySignalCatcher);
signal (SIGSEGV, mySIGSEGVorSIGBUScatcher);
#if BZ_UNIX
signal (SIGHUP, mySignalCatcher);
signal (SIGBUS, mySIGSEGVorSIGBUScatcher);
#endif
/*-- Initialise --*/
outputHandleJustInCase = NULL;
ftab = NULL;
ll4 = NULL;
ll16 = NULL;
ll8 = NULL;
tt = NULL;
block = NULL;
zptr = NULL;
smallMode = False;
keepInputFiles = False;
verbosity = 0;
blockSize100k = 9;
testFailsExist = False;
bsStream = NULL;
numFileNames = 0;
numFilesProcessed = 0;
workFactor = 30;
strcpy ( inName, "(none)" );
strcpy ( outName, "(none)" );
strcpy ( progNameReally, argv[0] );
progName = &progNameReally[0];
for (tmp = &progNameReally[0]; *tmp != '\0'; tmp++)
if (*tmp == PATH_SEP) progName = tmp + 1;
/*-- Expand filename wildcards in arg list --*/
argList = NULL;
for (i = 1; i <= argc-1; i++)
APPEND_FILESPEC(argList, argv[i]);
/*-- Find the length of the longest filename --*/
longestFileName = 7;
numFileNames = 0;
for (aa = argList; aa != NULL; aa = aa->link)
if (aa->name[0] != '-') {
numFileNames++;
if (longestFileName < (Int32)strlen(aa->name) )
longestFileName = (Int32)strlen(aa->name);
}
/*-- Determine what to do (compress/uncompress/test). --*/
/*-- Note that subsequent flag handling may change this. --*/
opMode = OM_Z;
if ( (strcmp ( "bunzip2", progName ) == 0) ||
(strcmp ( "BUNZIP2", progName ) == 0) ||
(strcmp ( "bunzip2.exe", progName ) == 0) ||
(strcmp ( "BUNZIP2.EXE", progName ) == 0) )
opMode = OM_UNZ;
/*-- Determine source modes; flag handling may change this too. --*/
if (numFileNames == 0)
srcMode = SM_I2O; else srcMode = SM_F2F;
/*-- Look at the flags. --*/
for (aa = argList; aa != NULL; aa = aa->link)
if (aa->name[0] == '-' && aa->name[1] != '-')
for (j = 1; aa->name[j] != '\0'; j++)
switch (aa->name[j]) {
case 'c': srcMode = SM_F2O; break;
case 'd': opMode = OM_UNZ; break;
case 'f': opMode = OM_Z; break;
case 't': opMode = OM_TEST; break;
case 'k': keepInputFiles = True; break;
case 's': smallMode = True; break;
case '1': blockSize100k = 1; break;
case '2': blockSize100k = 2; break;
case '3': blockSize100k = 3; break;
case '4': blockSize100k = 4; break;
case '5': blockSize100k = 5; break;
case '6': blockSize100k = 6; break;
case '7': blockSize100k = 7; break;
case '8': blockSize100k = 8; break;
case '9': blockSize100k = 9; break;
case 'V':
case 'L': license(); break;
case 'v': verbosity++; break;
case 'h': usage ( progName );
exit ( 1 );
break;
default: fprintf ( stderr, "%s: Bad flag `%s'\n",
progName, aa->name );
usage ( progName );
exit ( 1 );
break;
}
/*-- And again ... --*/
for (aa = argList; aa != NULL; aa = aa->link) {
if (ISFLAG("--stdout")) srcMode = SM_F2O; else
if (ISFLAG("--decompress")) opMode = OM_UNZ; else
if (ISFLAG("--compress")) opMode = OM_Z; else
if (ISFLAG("--test")) opMode = OM_TEST; else
if (ISFLAG("--keep")) keepInputFiles = True; else
if (ISFLAG("--small")) smallMode = True; else
if (ISFLAG("--version")) license(); else
if (ISFLAG("--license")) license(); else
if (ISFLAG("--repetitive-fast")) workFactor = 5; else
if (ISFLAG("--repetitive-best")) workFactor = 150; else
if (ISFLAG("--verbose")) verbosity++; else
if (ISFLAG("--help")) { usage ( progName ); exit ( 1 ); }
else
if (strncmp ( aa->name, "--", 2) == 0) {
fprintf ( stderr, "%s: Bad flag `%s'\n", progName, aa->name );
usage ( progName );
exit ( 1 );
}
}
if (opMode == OM_Z && smallMode) blockSize100k = 2;
if (opMode == OM_Z && srcMode == SM_F2O && numFileNames > 1) {
fprintf ( stderr, "%s: I won't compress multiple files to stdout.\n",
progName );
exit ( 1 );
}
if (srcMode == SM_F2O && numFileNames == 0) {
fprintf ( stderr, "%s: -c expects at least one filename.\n",
progName );
exit ( 1 );
}
if (opMode == OM_TEST && srcMode == SM_F2O) {
fprintf ( stderr, "%s: -c and -t cannot be used together.\n",
progName );
exit ( 1 );
}
if (opMode != OM_Z) blockSize100k = 0;
if (opMode == OM_Z) {
allocateCompressStructures();
if (srcMode == SM_I2O)
compress ( NULL );
else
for (aa = argList; aa != NULL; aa = aa->link)
if (aa->name[0] != '-') {
numFilesProcessed++;
compress ( aa->name );
}
} else
if (opMode == OM_UNZ) {
if (srcMode == SM_I2O)
uncompress ( NULL );
else
for (aa = argList; aa != NULL; aa = aa->link)
if (aa->name[0] != '-') {
numFilesProcessed++;
uncompress ( aa->name );
}
} else {
testFailsExist = False;
if (srcMode == SM_I2O)
testf ( NULL );
else
for (aa = argList; aa != NULL; aa = aa->link)
if (aa->name[0] != '-') {
numFilesProcessed++;
testf ( aa->name );
}
if (testFailsExist) {
fprintf ( stderr,
"\n"
"You can use the `bzip2recover' program to *attempt* to recover\n"
"data from undamaged sections of corrupted files.\n\n"
);
exit(2);
}
}
return 0;
}
/*-----------------------------------------------------------*/
/*--- end bzip2.c ---*/
/*-----------------------------------------------------------*/