mirror of
git://sourceware.org/git/bzip2.git
synced 2024-11-23 19:53:27 +08:00
4045 lines
115 KiB
C
4045 lines
115 KiB
C
|
|
/*-----------------------------------------------------------*/
|
|
/*--- A block-sorting, lossless compressor bzip2.c ---*/
|
|
/*-----------------------------------------------------------*/
|
|
|
|
/*--
|
|
This program is bzip2, a lossless, block-sorting data compressor,
|
|
version 0.1pl2, dated 29-Aug-1997.
|
|
|
|
Copyright (C) 1996, 1997 by Julian Seward.
|
|
Guildford, Surrey, UK
|
|
email: jseward@acm.org
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
The GNU General Public License is contained in the file LICENSE.
|
|
|
|
This program is based on (at least) the work of:
|
|
Mike Burrows
|
|
David Wheeler
|
|
Peter Fenwick
|
|
Alistair Moffat
|
|
Radford Neal
|
|
Ian H. Witten
|
|
Robert Sedgewick
|
|
Jon L. Bentley
|
|
|
|
For more information on these sources, see the file ALGORITHMS.
|
|
--*/
|
|
|
|
/*----------------------------------------------------*/
|
|
/*--- IMPORTANT ---*/
|
|
/*----------------------------------------------------*/
|
|
|
|
/*--
|
|
WARNING:
|
|
This program (attempts to) compress data by performing several
|
|
non-trivial transformations on it. Unless you are 100% familiar
|
|
with *all* the algorithms contained herein, and with the
|
|
consequences of modifying them, you should NOT meddle with the
|
|
compression or decompression machinery. Incorrect changes can
|
|
and very likely *will* lead to disasterous loss of data.
|
|
|
|
DISCLAIMER:
|
|
I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE
|
|
USE OF THIS PROGRAM, HOWSOEVER CAUSED.
|
|
|
|
Every compression of a file implies an assumption that the
|
|
compressed file can be decompressed to reproduce the original.
|
|
Great efforts in design, coding and testing have been made to
|
|
ensure that this program works correctly. However, the
|
|
complexity of the algorithms, and, in particular, the presence
|
|
of various special cases in the code which occur with very low
|
|
but non-zero probability make it impossible to rule out the
|
|
possibility of bugs remaining in the program. DO NOT COMPRESS
|
|
ANY DATA WITH THIS PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE
|
|
POSSIBILITY, HOWEVER SMALL, THAT THE DATA WILL NOT BE RECOVERABLE.
|
|
|
|
That is not to say this program is inherently unreliable.
|
|
Indeed, I very much hope the opposite is true. bzip2 has been
|
|
carefully constructed and extensively tested.
|
|
|
|
PATENTS:
|
|
To the best of my knowledge, bzip2 does not use any patented
|
|
algorithms. However, I do not have the resources available to
|
|
carry out a full patent search. Therefore I cannot give any
|
|
guarantee of the above statement.
|
|
--*/
|
|
|
|
|
|
|
|
/*----------------------------------------------------*/
|
|
/*--- and now for something much more pleasant :-) ---*/
|
|
/*----------------------------------------------------*/
|
|
|
|
/*---------------------------------------------*/
|
|
/*--
|
|
Place a 1 beside your platform, and 0 elsewhere.
|
|
--*/
|
|
|
|
/*--
|
|
Generic 32-bit Unix.
|
|
Also works on 64-bit Unix boxes.
|
|
--*/
|
|
#define BZ_UNIX 1
|
|
|
|
/*--
|
|
Win32, as seen by Jacob Navia's excellent
|
|
port of (Chris Fraser & David Hanson)'s excellent
|
|
lcc compiler.
|
|
--*/
|
|
#define BZ_LCCWIN32 0
|
|
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
/*--
|
|
Some stuff for all platforms.
|
|
--*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#if DEBUG
|
|
#include <assert.h>
|
|
#endif
|
|
#include <string.h>
|
|
#include <signal.h>
|
|
#include <math.h>
|
|
|
|
#define ERROR_IF_EOF(i) { if ((i) == EOF) ioError(); }
|
|
#define ERROR_IF_NOT_ZERO(i) { if ((i) != 0) ioError(); }
|
|
#define ERROR_IF_MINUS_ONE(i) { if ((i) == (-1)) ioError(); }
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
/*--
|
|
Platform-specific stuff.
|
|
--*/
|
|
|
|
#if BZ_UNIX
|
|
#include <sys/types.h>
|
|
#include <utime.h>
|
|
#include <unistd.h>
|
|
#include <malloc.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/times.h>
|
|
|
|
#define Int32 int
|
|
#define UInt32 unsigned int
|
|
#define Char char
|
|
#define UChar unsigned char
|
|
#define Int16 short
|
|
#define UInt16 unsigned short
|
|
|
|
#define PATH_SEP '/'
|
|
#define MY_LSTAT lstat
|
|
#define MY_S_IFREG S_ISREG
|
|
#define MY_STAT stat
|
|
|
|
#define APPEND_FILESPEC(root, name) \
|
|
root=snocString((root), (name))
|
|
|
|
#define SET_BINARY_MODE(fd) /**/
|
|
|
|
/*--
|
|
You should try very hard to persuade your C compiler
|
|
to inline the bits marked INLINE. Otherwise bzip2 will
|
|
run rather slowly. gcc version 2.x is recommended.
|
|
--*/
|
|
#ifdef __GNUC__
|
|
#define INLINE inline
|
|
#define NORETURN __attribute__ ((noreturn))
|
|
#else
|
|
#define INLINE /**/
|
|
#define NORETURN /**/
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
#if BZ_LCCWIN32
|
|
#include <io.h>
|
|
#include <fcntl.h>
|
|
#include <sys\stat.h>
|
|
|
|
#define Int32 int
|
|
#define UInt32 unsigned int
|
|
#define Int16 short
|
|
#define UInt16 unsigned short
|
|
#define Char char
|
|
#define UChar unsigned char
|
|
|
|
#define INLINE /**/
|
|
#define NORETURN /**/
|
|
#define PATH_SEP '\\'
|
|
#define MY_LSTAT _stat
|
|
#define MY_STAT _stat
|
|
#define MY_S_IFREG(x) ((x) & _S_IFREG)
|
|
|
|
#if 0
|
|
/*-- lcc-win32 seems to expand wildcards itself --*/
|
|
#define APPEND_FILESPEC(root, spec) \
|
|
do { \
|
|
if ((spec)[0] == '-') { \
|
|
root = snocString((root), (spec)); \
|
|
} else { \
|
|
struct _finddata_t c_file; \
|
|
long hFile; \
|
|
hFile = _findfirst((spec), &c_file); \
|
|
if ( hFile == -1L ) { \
|
|
root = snocString ((root), (spec)); \
|
|
} else { \
|
|
int anInt = 0; \
|
|
while ( anInt == 0 ) { \
|
|
root = snocString((root), \
|
|
&c_file.name[0]); \
|
|
anInt = _findnext(hFile, &c_file); \
|
|
} \
|
|
} \
|
|
} \
|
|
} while ( 0 )
|
|
#else
|
|
#define APPEND_FILESPEC(root, name) \
|
|
root = snocString ((root), (name))
|
|
#endif
|
|
|
|
#define SET_BINARY_MODE(fd) \
|
|
do { \
|
|
int retVal = setmode ( fileno ( fd ), \
|
|
O_BINARY ); \
|
|
ERROR_IF_MINUS_ONE ( retVal ); \
|
|
} while ( 0 )
|
|
|
|
#endif
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
/*--
|
|
Some more stuff for all platforms :-)
|
|
--*/
|
|
|
|
#define Bool unsigned char
|
|
#define True 1
|
|
#define False 0
|
|
|
|
/*--
|
|
IntNative is your platform's `native' int size.
|
|
Only here to avoid probs with 64-bit platforms.
|
|
--*/
|
|
#define IntNative int
|
|
|
|
|
|
/*--
|
|
change to 1, or compile with -DDEBUG=1 to debug
|
|
--*/
|
|
#ifndef DEBUG
|
|
#define DEBUG 0
|
|
#endif
|
|
|
|
|
|
/*---------------------------------------------------*/
|
|
/*--- ---*/
|
|
/*---------------------------------------------------*/
|
|
|
|
/*--
|
|
Implementation notes, July 1997
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
Memory allocation
|
|
~~~~~~~~~~~~~~~~~
|
|
All large data structures are allocated on the C heap,
|
|
for better or for worse. That includes the various
|
|
arrays of pointers, striped words, bytes, frequency
|
|
tables and buffers for compression and decompression.
|
|
|
|
bzip2 can operate at various block-sizes, ranging from
|
|
100k to 900k in 100k steps, and it allocates only as
|
|
much as it needs to. When compressing, we know from the
|
|
command-line options what the block-size is going to be,
|
|
so all allocation can be done at start-up; if that
|
|
succeeds, there can be no further allocation problems.
|
|
|
|
Decompression is more complicated. Each compressed file
|
|
contains, in its header, a byte indicating the block
|
|
size used for compression. This means bzip2 potentially
|
|
needs to reallocate memory for each file it deals with,
|
|
which in turn opens the possibility for a memory allocation
|
|
failure part way through a run of files, by encountering
|
|
a file requiring a much larger block size than all the
|
|
ones preceding it.
|
|
|
|
The policy is to simply give up if a memory allocation
|
|
failure occurs. During decompression, it would be
|
|
possible to move on to subsequent files in the hope that
|
|
some might ask for a smaller block size, but the
|
|
complications for doing this seem more trouble than they
|
|
are worth.
|
|
|
|
|
|
Compressed file formats
|
|
~~~~~~~~~~~~~~~~~~~~~~~
|
|
[This is now entirely different from both 0.21, and from
|
|
any previous Huffman-coded variant of bzip.
|
|
See the associated file bzip2.txt for details.]
|
|
|
|
|
|
Error conditions
|
|
~~~~~~~~~~~~~~~~
|
|
Dealing with error conditions is the least satisfactory
|
|
aspect of bzip2. The policy is to try and leave the
|
|
filesystem in a consistent state, then quit, even if it
|
|
means not processing some of the files mentioned in the
|
|
command line. `A consistent state' means that a file
|
|
exists either in its compressed or uncompressed form,
|
|
but not both. This boils down to the rule `delete the
|
|
output file if an error condition occurs, leaving the
|
|
input intact'. Input files are only deleted when we can
|
|
be pretty sure the output file has been written and
|
|
closed successfully.
|
|
|
|
Errors are a dog because there's so many things to
|
|
deal with. The following can happen mid-file, and
|
|
require cleaning up.
|
|
|
|
internal `panics' -- indicating a bug
|
|
corrupted or inconsistent compressed file
|
|
can't allocate enough memory to decompress this file
|
|
I/O error reading/writing/opening/closing
|
|
signal catches -- Control-C, SIGTERM, SIGHUP.
|
|
|
|
Other conditions, primarily pertaining to file names,
|
|
can be checked in-between files, which makes dealing
|
|
with them easier.
|
|
--*/
|
|
|
|
|
|
|
|
/*---------------------------------------------------*/
|
|
/*--- Misc (file handling) data decls ---*/
|
|
/*---------------------------------------------------*/
|
|
|
|
UInt32 bytesIn, bytesOut;
|
|
Int32 verbosity;
|
|
Bool keepInputFiles, smallMode, testFailsExist;
|
|
UInt32 globalCrc;
|
|
Int32 numFileNames, numFilesProcessed;
|
|
|
|
|
|
/*-- source modes; F==file, I==stdin, O==stdout --*/
|
|
#define SM_I2O 1
|
|
#define SM_F2O 2
|
|
#define SM_F2F 3
|
|
|
|
/*-- operation modes --*/
|
|
#define OM_Z 1
|
|
#define OM_UNZ 2
|
|
#define OM_TEST 3
|
|
|
|
Int32 opMode;
|
|
Int32 srcMode;
|
|
|
|
|
|
Int32 longestFileName;
|
|
Char inName[1024];
|
|
Char outName[1024];
|
|
Char *progName;
|
|
Char progNameReally[1024];
|
|
FILE *outputHandleJustInCase;
|
|
|
|
void panic ( Char* ) NORETURN;
|
|
void ioError ( void ) NORETURN;
|
|
void compressOutOfMemory ( Int32, Int32 ) NORETURN;
|
|
void uncompressOutOfMemory ( Int32, Int32 ) NORETURN;
|
|
void blockOverrun ( void ) NORETURN;
|
|
void badBlockHeader ( void ) NORETURN;
|
|
void badBGLengths ( void ) NORETURN;
|
|
void crcError ( UInt32, UInt32 ) NORETURN;
|
|
void bitStreamEOF ( void ) NORETURN;
|
|
void cleanUpAndFail ( Int32 ) NORETURN;
|
|
void compressedStreamEOF ( void ) NORETURN;
|
|
|
|
void* myMalloc ( Int32 );
|
|
|
|
|
|
|
|
/*---------------------------------------------------*/
|
|
/*--- Data decls for the front end ---*/
|
|
/*---------------------------------------------------*/
|
|
|
|
/*--
|
|
The overshoot bytes allow us to avoid most of
|
|
the cost of pointer renormalisation during
|
|
comparison of rotations in sorting.
|
|
The figure of 20 is derived as follows:
|
|
qSort3 allows an overshoot of up to 10.
|
|
It then calls simpleSort, which calls
|
|
fullGtU, also with max overshoot 10.
|
|
fullGtU does up to 10 comparisons without
|
|
renormalising, giving 10+10 == 20.
|
|
--*/
|
|
#define NUM_OVERSHOOT_BYTES 20
|
|
|
|
/*--
|
|
These are the main data structures for
|
|
the Burrows-Wheeler transform.
|
|
--*/
|
|
|
|
/*--
|
|
Pointers to compression and decompression
|
|
structures. Set by
|
|
allocateCompressStructures and
|
|
setDecompressStructureSizes
|
|
|
|
The structures are always set to be suitable
|
|
for a block of size 100000 * blockSize100k.
|
|
--*/
|
|
UChar *block; /*-- compress --*/
|
|
UInt16 *quadrant; /*-- compress --*/
|
|
Int32 *zptr; /*-- compress --*/
|
|
UInt16 *szptr; /*-- overlays zptr ---*/
|
|
Int32 *ftab; /*-- compress --*/
|
|
|
|
UInt16 *ll16; /*-- small decompress --*/
|
|
UChar *ll4; /*-- small decompress --*/
|
|
|
|
Int32 *tt; /*-- fast decompress --*/
|
|
UChar *ll8; /*-- fast decompress --*/
|
|
|
|
|
|
/*--
|
|
freq table collected to save a pass over the data
|
|
during decompression.
|
|
--*/
|
|
Int32 unzftab[256];
|
|
|
|
|
|
/*--
|
|
index of the last char in the block, so
|
|
the block size == last + 1.
|
|
--*/
|
|
Int32 last;
|
|
|
|
|
|
/*--
|
|
index in zptr[] of original string after sorting.
|
|
--*/
|
|
Int32 origPtr;
|
|
|
|
|
|
/*--
|
|
always: in the range 0 .. 9.
|
|
The current block size is 100000 * this number.
|
|
--*/
|
|
Int32 blockSize100k;
|
|
|
|
|
|
/*--
|
|
Used when sorting. If too many long comparisons
|
|
happen, we stop sorting, randomise the block
|
|
slightly, and try again.
|
|
--*/
|
|
|
|
Int32 workFactor;
|
|
Int32 workDone;
|
|
Int32 workLimit;
|
|
Bool blockRandomised;
|
|
Bool firstAttempt;
|
|
Int32 nBlocksRandomised;
|
|
|
|
|
|
|
|
/*---------------------------------------------------*/
|
|
/*--- Data decls for the back end ---*/
|
|
/*---------------------------------------------------*/
|
|
|
|
#define MAX_ALPHA_SIZE 258
|
|
#define MAX_CODE_LEN 23
|
|
|
|
#define RUNA 0
|
|
#define RUNB 1
|
|
|
|
#define N_GROUPS 6
|
|
#define G_SIZE 50
|
|
#define N_ITERS 4
|
|
|
|
#define MAX_SELECTORS (2 + (900000 / G_SIZE))
|
|
|
|
Bool inUse[256];
|
|
Int32 nInUse;
|
|
|
|
UChar seqToUnseq[256];
|
|
UChar unseqToSeq[256];
|
|
|
|
UChar selector [MAX_SELECTORS];
|
|
UChar selectorMtf[MAX_SELECTORS];
|
|
|
|
Int32 nMTF;
|
|
|
|
Int32 mtfFreq[MAX_ALPHA_SIZE];
|
|
|
|
UChar len [N_GROUPS][MAX_ALPHA_SIZE];
|
|
|
|
/*-- decompress only --*/
|
|
Int32 limit [N_GROUPS][MAX_ALPHA_SIZE];
|
|
Int32 base [N_GROUPS][MAX_ALPHA_SIZE];
|
|
Int32 perm [N_GROUPS][MAX_ALPHA_SIZE];
|
|
Int32 minLens[N_GROUPS];
|
|
|
|
/*-- compress only --*/
|
|
Int32 code [N_GROUPS][MAX_ALPHA_SIZE];
|
|
Int32 rfreq[N_GROUPS][MAX_ALPHA_SIZE];
|
|
|
|
|
|
/*---------------------------------------------------*/
|
|
/*--- 32-bit CRC grunge ---*/
|
|
/*---------------------------------------------------*/
|
|
|
|
/*--
|
|
I think this is an implementation of the AUTODIN-II,
|
|
Ethernet & FDDI 32-bit CRC standard. Vaguely derived
|
|
from code by Rob Warnock, in Section 51 of the
|
|
comp.compression FAQ.
|
|
--*/
|
|
|
|
UInt32 crc32Table[256] = {
|
|
|
|
/*-- Ugly, innit? --*/
|
|
|
|
0x00000000UL, 0x04c11db7UL, 0x09823b6eUL, 0x0d4326d9UL,
|
|
0x130476dcUL, 0x17c56b6bUL, 0x1a864db2UL, 0x1e475005UL,
|
|
0x2608edb8UL, 0x22c9f00fUL, 0x2f8ad6d6UL, 0x2b4bcb61UL,
|
|
0x350c9b64UL, 0x31cd86d3UL, 0x3c8ea00aUL, 0x384fbdbdUL,
|
|
0x4c11db70UL, 0x48d0c6c7UL, 0x4593e01eUL, 0x4152fda9UL,
|
|
0x5f15adacUL, 0x5bd4b01bUL, 0x569796c2UL, 0x52568b75UL,
|
|
0x6a1936c8UL, 0x6ed82b7fUL, 0x639b0da6UL, 0x675a1011UL,
|
|
0x791d4014UL, 0x7ddc5da3UL, 0x709f7b7aUL, 0x745e66cdUL,
|
|
0x9823b6e0UL, 0x9ce2ab57UL, 0x91a18d8eUL, 0x95609039UL,
|
|
0x8b27c03cUL, 0x8fe6dd8bUL, 0x82a5fb52UL, 0x8664e6e5UL,
|
|
0xbe2b5b58UL, 0xbaea46efUL, 0xb7a96036UL, 0xb3687d81UL,
|
|
0xad2f2d84UL, 0xa9ee3033UL, 0xa4ad16eaUL, 0xa06c0b5dUL,
|
|
0xd4326d90UL, 0xd0f37027UL, 0xddb056feUL, 0xd9714b49UL,
|
|
0xc7361b4cUL, 0xc3f706fbUL, 0xceb42022UL, 0xca753d95UL,
|
|
0xf23a8028UL, 0xf6fb9d9fUL, 0xfbb8bb46UL, 0xff79a6f1UL,
|
|
0xe13ef6f4UL, 0xe5ffeb43UL, 0xe8bccd9aUL, 0xec7dd02dUL,
|
|
0x34867077UL, 0x30476dc0UL, 0x3d044b19UL, 0x39c556aeUL,
|
|
0x278206abUL, 0x23431b1cUL, 0x2e003dc5UL, 0x2ac12072UL,
|
|
0x128e9dcfUL, 0x164f8078UL, 0x1b0ca6a1UL, 0x1fcdbb16UL,
|
|
0x018aeb13UL, 0x054bf6a4UL, 0x0808d07dUL, 0x0cc9cdcaUL,
|
|
0x7897ab07UL, 0x7c56b6b0UL, 0x71159069UL, 0x75d48ddeUL,
|
|
0x6b93dddbUL, 0x6f52c06cUL, 0x6211e6b5UL, 0x66d0fb02UL,
|
|
0x5e9f46bfUL, 0x5a5e5b08UL, 0x571d7dd1UL, 0x53dc6066UL,
|
|
0x4d9b3063UL, 0x495a2dd4UL, 0x44190b0dUL, 0x40d816baUL,
|
|
0xaca5c697UL, 0xa864db20UL, 0xa527fdf9UL, 0xa1e6e04eUL,
|
|
0xbfa1b04bUL, 0xbb60adfcUL, 0xb6238b25UL, 0xb2e29692UL,
|
|
0x8aad2b2fUL, 0x8e6c3698UL, 0x832f1041UL, 0x87ee0df6UL,
|
|
0x99a95df3UL, 0x9d684044UL, 0x902b669dUL, 0x94ea7b2aUL,
|
|
0xe0b41de7UL, 0xe4750050UL, 0xe9362689UL, 0xedf73b3eUL,
|
|
0xf3b06b3bUL, 0xf771768cUL, 0xfa325055UL, 0xfef34de2UL,
|
|
0xc6bcf05fUL, 0xc27dede8UL, 0xcf3ecb31UL, 0xcbffd686UL,
|
|
0xd5b88683UL, 0xd1799b34UL, 0xdc3abdedUL, 0xd8fba05aUL,
|
|
0x690ce0eeUL, 0x6dcdfd59UL, 0x608edb80UL, 0x644fc637UL,
|
|
0x7a089632UL, 0x7ec98b85UL, 0x738aad5cUL, 0x774bb0ebUL,
|
|
0x4f040d56UL, 0x4bc510e1UL, 0x46863638UL, 0x42472b8fUL,
|
|
0x5c007b8aUL, 0x58c1663dUL, 0x558240e4UL, 0x51435d53UL,
|
|
0x251d3b9eUL, 0x21dc2629UL, 0x2c9f00f0UL, 0x285e1d47UL,
|
|
0x36194d42UL, 0x32d850f5UL, 0x3f9b762cUL, 0x3b5a6b9bUL,
|
|
0x0315d626UL, 0x07d4cb91UL, 0x0a97ed48UL, 0x0e56f0ffUL,
|
|
0x1011a0faUL, 0x14d0bd4dUL, 0x19939b94UL, 0x1d528623UL,
|
|
0xf12f560eUL, 0xf5ee4bb9UL, 0xf8ad6d60UL, 0xfc6c70d7UL,
|
|
0xe22b20d2UL, 0xe6ea3d65UL, 0xeba91bbcUL, 0xef68060bUL,
|
|
0xd727bbb6UL, 0xd3e6a601UL, 0xdea580d8UL, 0xda649d6fUL,
|
|
0xc423cd6aUL, 0xc0e2d0ddUL, 0xcda1f604UL, 0xc960ebb3UL,
|
|
0xbd3e8d7eUL, 0xb9ff90c9UL, 0xb4bcb610UL, 0xb07daba7UL,
|
|
0xae3afba2UL, 0xaafbe615UL, 0xa7b8c0ccUL, 0xa379dd7bUL,
|
|
0x9b3660c6UL, 0x9ff77d71UL, 0x92b45ba8UL, 0x9675461fUL,
|
|
0x8832161aUL, 0x8cf30badUL, 0x81b02d74UL, 0x857130c3UL,
|
|
0x5d8a9099UL, 0x594b8d2eUL, 0x5408abf7UL, 0x50c9b640UL,
|
|
0x4e8ee645UL, 0x4a4ffbf2UL, 0x470cdd2bUL, 0x43cdc09cUL,
|
|
0x7b827d21UL, 0x7f436096UL, 0x7200464fUL, 0x76c15bf8UL,
|
|
0x68860bfdUL, 0x6c47164aUL, 0x61043093UL, 0x65c52d24UL,
|
|
0x119b4be9UL, 0x155a565eUL, 0x18197087UL, 0x1cd86d30UL,
|
|
0x029f3d35UL, 0x065e2082UL, 0x0b1d065bUL, 0x0fdc1becUL,
|
|
0x3793a651UL, 0x3352bbe6UL, 0x3e119d3fUL, 0x3ad08088UL,
|
|
0x2497d08dUL, 0x2056cd3aUL, 0x2d15ebe3UL, 0x29d4f654UL,
|
|
0xc5a92679UL, 0xc1683bceUL, 0xcc2b1d17UL, 0xc8ea00a0UL,
|
|
0xd6ad50a5UL, 0xd26c4d12UL, 0xdf2f6bcbUL, 0xdbee767cUL,
|
|
0xe3a1cbc1UL, 0xe760d676UL, 0xea23f0afUL, 0xeee2ed18UL,
|
|
0xf0a5bd1dUL, 0xf464a0aaUL, 0xf9278673UL, 0xfde69bc4UL,
|
|
0x89b8fd09UL, 0x8d79e0beUL, 0x803ac667UL, 0x84fbdbd0UL,
|
|
0x9abc8bd5UL, 0x9e7d9662UL, 0x933eb0bbUL, 0x97ffad0cUL,
|
|
0xafb010b1UL, 0xab710d06UL, 0xa6322bdfUL, 0xa2f33668UL,
|
|
0xbcb4666dUL, 0xb8757bdaUL, 0xb5365d03UL, 0xb1f740b4UL
|
|
};
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void initialiseCRC ( void )
|
|
{
|
|
globalCrc = 0xffffffffUL;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
UInt32 getFinalCRC ( void )
|
|
{
|
|
return ~globalCrc;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
UInt32 getGlobalCRC ( void )
|
|
{
|
|
return globalCrc;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void setGlobalCRC ( UInt32 newCrc )
|
|
{
|
|
globalCrc = newCrc;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
#define UPDATE_CRC(crcVar,cha) \
|
|
{ \
|
|
crcVar = (crcVar << 8) ^ \
|
|
crc32Table[(crcVar >> 24) ^ \
|
|
((UChar)cha)]; \
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------*/
|
|
/*--- Bit stream I/O ---*/
|
|
/*---------------------------------------------------*/
|
|
|
|
|
|
UInt32 bsBuff;
|
|
Int32 bsLive;
|
|
FILE* bsStream;
|
|
Bool bsWriting;
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void bsSetStream ( FILE* f, Bool wr )
|
|
{
|
|
if (bsStream != NULL) panic ( "bsSetStream" );
|
|
bsStream = f;
|
|
bsLive = 0;
|
|
bsBuff = 0;
|
|
bytesOut = 0;
|
|
bytesIn = 0;
|
|
bsWriting = wr;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void bsFinishedWithStream ( void )
|
|
{
|
|
if (bsWriting)
|
|
while (bsLive > 0) {
|
|
fputc ( (UChar)(bsBuff >> 24), bsStream );
|
|
bsBuff <<= 8;
|
|
bsLive -= 8;
|
|
bytesOut++;
|
|
}
|
|
bsStream = NULL;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
#define bsNEEDR(nz) \
|
|
{ \
|
|
while (bsLive < nz) { \
|
|
Int32 zzi = fgetc ( bsStream ); \
|
|
if (zzi == EOF) compressedStreamEOF(); \
|
|
bsBuff = (bsBuff << 8) | (zzi & 0xffL); \
|
|
bsLive += 8; \
|
|
} \
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
#define bsNEEDW(nz) \
|
|
{ \
|
|
while (bsLive >= 8) { \
|
|
fputc ( (UChar)(bsBuff >> 24), \
|
|
bsStream ); \
|
|
bsBuff <<= 8; \
|
|
bsLive -= 8; \
|
|
bytesOut++; \
|
|
} \
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
#define bsR1(vz) \
|
|
{ \
|
|
bsNEEDR(1); \
|
|
vz = (bsBuff >> (bsLive-1)) & 1; \
|
|
bsLive--; \
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
INLINE UInt32 bsR ( Int32 n )
|
|
{
|
|
UInt32 v;
|
|
bsNEEDR ( n );
|
|
v = (bsBuff >> (bsLive-n)) & ((1 << n)-1);
|
|
bsLive -= n;
|
|
return v;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
INLINE void bsW ( Int32 n, UInt32 v )
|
|
{
|
|
bsNEEDW ( n );
|
|
bsBuff |= (v << (32 - bsLive - n));
|
|
bsLive += n;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
UChar bsGetUChar ( void )
|
|
{
|
|
return (UChar)bsR(8);
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void bsPutUChar ( UChar c )
|
|
{
|
|
bsW(8, (UInt32)c );
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
Int32 bsGetUInt32 ( void )
|
|
{
|
|
UInt32 u;
|
|
u = 0;
|
|
u = (u << 8) | bsR(8);
|
|
u = (u << 8) | bsR(8);
|
|
u = (u << 8) | bsR(8);
|
|
u = (u << 8) | bsR(8);
|
|
return u;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
UInt32 bsGetIntVS ( UInt32 numBits )
|
|
{
|
|
return (UInt32)bsR(numBits);
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
UInt32 bsGetInt32 ( void )
|
|
{
|
|
return (Int32)bsGetUInt32();
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void bsPutUInt32 ( UInt32 u )
|
|
{
|
|
bsW ( 8, (u >> 24) & 0xffL );
|
|
bsW ( 8, (u >> 16) & 0xffL );
|
|
bsW ( 8, (u >> 8) & 0xffL );
|
|
bsW ( 8, u & 0xffL );
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void bsPutInt32 ( Int32 c )
|
|
{
|
|
bsPutUInt32 ( (UInt32)c );
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void bsPutIntVS ( Int32 numBits, UInt32 c )
|
|
{
|
|
bsW ( numBits, c );
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------*/
|
|
/*--- Huffman coding low-level stuff ---*/
|
|
/*---------------------------------------------------*/
|
|
|
|
#define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
|
|
#define DEPTHOF(zz1) ((zz1) & 0x000000ff)
|
|
#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
|
|
|
|
#define ADDWEIGHTS(zw1,zw2) \
|
|
(WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
|
|
(1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
|
|
|
|
#define UPHEAP(z) \
|
|
{ \
|
|
Int32 zz, tmp; \
|
|
zz = z; tmp = heap[zz]; \
|
|
while (weight[tmp] < weight[heap[zz >> 1]]) { \
|
|
heap[zz] = heap[zz >> 1]; \
|
|
zz >>= 1; \
|
|
} \
|
|
heap[zz] = tmp; \
|
|
}
|
|
|
|
#define DOWNHEAP(z) \
|
|
{ \
|
|
Int32 zz, yy, tmp; \
|
|
zz = z; tmp = heap[zz]; \
|
|
while (True) { \
|
|
yy = zz << 1; \
|
|
if (yy > nHeap) break; \
|
|
if (yy < nHeap && \
|
|
weight[heap[yy+1]] < weight[heap[yy]]) \
|
|
yy++; \
|
|
if (weight[tmp] < weight[heap[yy]]) break; \
|
|
heap[zz] = heap[yy]; \
|
|
zz = yy; \
|
|
} \
|
|
heap[zz] = tmp; \
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void hbMakeCodeLengths ( UChar *len,
|
|
Int32 *freq,
|
|
Int32 alphaSize,
|
|
Int32 maxLen )
|
|
{
|
|
/*--
|
|
Nodes and heap entries run from 1. Entry 0
|
|
for both the heap and nodes is a sentinel.
|
|
--*/
|
|
Int32 nNodes, nHeap, n1, n2, i, j, k;
|
|
Bool tooLong;
|
|
|
|
Int32 heap [ MAX_ALPHA_SIZE + 2 ];
|
|
Int32 weight [ MAX_ALPHA_SIZE * 2 ];
|
|
Int32 parent [ MAX_ALPHA_SIZE * 2 ];
|
|
|
|
for (i = 0; i < alphaSize; i++)
|
|
weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
|
|
|
|
while (True) {
|
|
|
|
nNodes = alphaSize;
|
|
nHeap = 0;
|
|
|
|
heap[0] = 0;
|
|
weight[0] = 0;
|
|
parent[0] = -2;
|
|
|
|
for (i = 1; i <= alphaSize; i++) {
|
|
parent[i] = -1;
|
|
nHeap++;
|
|
heap[nHeap] = i;
|
|
UPHEAP(nHeap);
|
|
}
|
|
if (!(nHeap < (MAX_ALPHA_SIZE+2)))
|
|
panic ( "hbMakeCodeLengths(1)" );
|
|
|
|
while (nHeap > 1) {
|
|
n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
|
|
n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
|
|
nNodes++;
|
|
parent[n1] = parent[n2] = nNodes;
|
|
weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
|
|
parent[nNodes] = -1;
|
|
nHeap++;
|
|
heap[nHeap] = nNodes;
|
|
UPHEAP(nHeap);
|
|
}
|
|
if (!(nNodes < (MAX_ALPHA_SIZE * 2)))
|
|
panic ( "hbMakeCodeLengths(2)" );
|
|
|
|
tooLong = False;
|
|
for (i = 1; i <= alphaSize; i++) {
|
|
j = 0;
|
|
k = i;
|
|
while (parent[k] >= 0) { k = parent[k]; j++; }
|
|
len[i-1] = j;
|
|
if (j > maxLen) tooLong = True;
|
|
}
|
|
|
|
if (! tooLong) break;
|
|
|
|
for (i = 1; i < alphaSize; i++) {
|
|
j = weight[i] >> 8;
|
|
j = 1 + (j / 2);
|
|
weight[i] = j << 8;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void hbAssignCodes ( Int32 *code,
|
|
UChar *length,
|
|
Int32 minLen,
|
|
Int32 maxLen,
|
|
Int32 alphaSize )
|
|
{
|
|
Int32 n, vec, i;
|
|
|
|
vec = 0;
|
|
for (n = minLen; n <= maxLen; n++) {
|
|
for (i = 0; i < alphaSize; i++)
|
|
if (length[i] == n) { code[i] = vec; vec++; };
|
|
vec <<= 1;
|
|
}
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void hbCreateDecodeTables ( Int32 *limit,
|
|
Int32 *base,
|
|
Int32 *perm,
|
|
UChar *length,
|
|
Int32 minLen,
|
|
Int32 maxLen,
|
|
Int32 alphaSize )
|
|
{
|
|
Int32 pp, i, j, vec;
|
|
|
|
pp = 0;
|
|
for (i = minLen; i <= maxLen; i++)
|
|
for (j = 0; j < alphaSize; j++)
|
|
if (length[j] == i) { perm[pp] = j; pp++; };
|
|
|
|
for (i = 0; i < MAX_CODE_LEN; i++) base[i] = 0;
|
|
for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
|
|
|
|
for (i = 1; i < MAX_CODE_LEN; i++) base[i] += base[i-1];
|
|
|
|
for (i = 0; i < MAX_CODE_LEN; i++) limit[i] = 0;
|
|
vec = 0;
|
|
|
|
for (i = minLen; i <= maxLen; i++) {
|
|
vec += (base[i+1] - base[i]);
|
|
limit[i] = vec-1;
|
|
vec <<= 1;
|
|
}
|
|
for (i = minLen + 1; i <= maxLen; i++)
|
|
base[i] = ((limit[i-1] + 1) << 1) - base[i];
|
|
}
|
|
|
|
|
|
|
|
/*---------------------------------------------------*/
|
|
/*--- Undoing the reversible transformation ---*/
|
|
/*---------------------------------------------------*/
|
|
|
|
/*---------------------------------------------*/
|
|
#define SET_LL4(i,n) \
|
|
{ if (((i) & 0x1) == 0) \
|
|
ll4[(i) >> 1] = (ll4[(i) >> 1] & 0xf0) | (n); else \
|
|
ll4[(i) >> 1] = (ll4[(i) >> 1] & 0x0f) | ((n) << 4); \
|
|
}
|
|
|
|
#define GET_LL4(i) \
|
|
(((UInt32)(ll4[(i) >> 1])) >> (((i) << 2) & 0x4) & 0xF)
|
|
|
|
#define SET_LL(i,n) \
|
|
{ ll16[i] = (UInt16)(n & 0x0000ffff); \
|
|
SET_LL4(i, n >> 16); \
|
|
}
|
|
|
|
#define GET_LL(i) \
|
|
(((UInt32)ll16[i]) | (GET_LL4(i) << 16))
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
/*--
|
|
Manage memory for compression/decompression.
|
|
When compressing, a single block size applies to
|
|
all files processed, and that's set when the
|
|
program starts. But when decompressing, each file
|
|
processed could have been compressed with a
|
|
different block size, so we may have to free
|
|
and reallocate on a per-file basis.
|
|
|
|
A call with argument of zero means
|
|
`free up everything.' And a value of zero for
|
|
blockSize100k means no memory is currently allocated.
|
|
--*/
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void allocateCompressStructures ( void )
|
|
{
|
|
Int32 n = 100000 * blockSize100k;
|
|
block = malloc ( (n + 1 + NUM_OVERSHOOT_BYTES) * sizeof(UChar) );
|
|
quadrant = malloc ( (n + NUM_OVERSHOOT_BYTES) * sizeof(Int16) );
|
|
zptr = malloc ( n * sizeof(Int32) );
|
|
ftab = malloc ( 65537 * sizeof(Int32) );
|
|
|
|
if (block == NULL || quadrant == NULL ||
|
|
zptr == NULL || ftab == NULL) {
|
|
Int32 totalDraw
|
|
= (n + 1 + NUM_OVERSHOOT_BYTES) * sizeof(UChar) +
|
|
(n + NUM_OVERSHOOT_BYTES) * sizeof(Int16) +
|
|
n * sizeof(Int32) +
|
|
65537 * sizeof(Int32);
|
|
|
|
compressOutOfMemory ( totalDraw, n );
|
|
}
|
|
|
|
/*--
|
|
Since we want valid indexes for block of
|
|
-1 to n + NUM_OVERSHOOT_BYTES - 1
|
|
inclusive.
|
|
--*/
|
|
block++;
|
|
|
|
/*--
|
|
The back end needs a place to store the MTF values
|
|
whilst it calculates the coding tables. We could
|
|
put them in the zptr array. However, these values
|
|
will fit in a short, so we overlay szptr at the
|
|
start of zptr, in the hope of reducing the number
|
|
of cache misses induced by the multiple traversals
|
|
of the MTF values when calculating coding tables.
|
|
Seems to improve compression speed by about 1%.
|
|
--*/
|
|
szptr = (UInt16*)zptr;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void setDecompressStructureSizes ( Int32 newSize100k )
|
|
{
|
|
if (! (0 <= newSize100k && newSize100k <= 9 &&
|
|
0 <= blockSize100k && blockSize100k <= 9))
|
|
panic ( "setDecompressStructureSizes" );
|
|
|
|
if (newSize100k == blockSize100k) return;
|
|
|
|
blockSize100k = newSize100k;
|
|
|
|
if (ll16 != NULL) free ( ll16 );
|
|
if (ll4 != NULL) free ( ll4 );
|
|
if (ll8 != NULL) free ( ll8 );
|
|
if (tt != NULL) free ( tt );
|
|
|
|
if (newSize100k == 0) return;
|
|
|
|
if (smallMode) {
|
|
|
|
Int32 n = 100000 * newSize100k;
|
|
ll16 = malloc ( n * sizeof(UInt16) );
|
|
ll4 = malloc ( ((n+1) >> 1) * sizeof(UChar) );
|
|
|
|
if (ll4 == NULL || ll16 == NULL) {
|
|
Int32 totalDraw
|
|
= n * sizeof(Int16) + ((n+1) >> 1) * sizeof(UChar);
|
|
uncompressOutOfMemory ( totalDraw, n );
|
|
}
|
|
|
|
} else {
|
|
|
|
Int32 n = 100000 * newSize100k;
|
|
ll8 = malloc ( n * sizeof(UChar) );
|
|
tt = malloc ( n * sizeof(Int32) );
|
|
|
|
if (ll8 == NULL || tt == NULL) {
|
|
Int32 totalDraw
|
|
= n * sizeof(UChar) + n * sizeof(UInt32);
|
|
uncompressOutOfMemory ( totalDraw, n );
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/*---------------------------------------------------*/
|
|
/*--- The new back end ---*/
|
|
/*---------------------------------------------------*/
|
|
|
|
/*---------------------------------------------*/
|
|
void makeMaps ( void )
|
|
{
|
|
Int32 i;
|
|
nInUse = 0;
|
|
for (i = 0; i < 256; i++)
|
|
if (inUse[i]) {
|
|
seqToUnseq[nInUse] = i;
|
|
unseqToSeq[i] = nInUse;
|
|
nInUse++;
|
|
}
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void generateMTFValues ( void )
|
|
{
|
|
UChar yy[256];
|
|
Int32 i, j;
|
|
UChar tmp;
|
|
UChar tmp2;
|
|
Int32 zPend;
|
|
Int32 wr;
|
|
Int32 EOB;
|
|
|
|
makeMaps();
|
|
EOB = nInUse+1;
|
|
|
|
for (i = 0; i <= EOB; i++) mtfFreq[i] = 0;
|
|
|
|
wr = 0;
|
|
zPend = 0;
|
|
for (i = 0; i < nInUse; i++) yy[i] = (UChar) i;
|
|
|
|
|
|
for (i = 0; i <= last; i++) {
|
|
UChar ll_i;
|
|
|
|
#if DEBUG
|
|
assert (wr <= i);
|
|
#endif
|
|
|
|
ll_i = unseqToSeq[block[zptr[i] - 1]];
|
|
#if DEBUG
|
|
assert (ll_i < nInUse);
|
|
#endif
|
|
|
|
j = 0;
|
|
tmp = yy[j];
|
|
while ( ll_i != tmp ) {
|
|
j++;
|
|
tmp2 = tmp;
|
|
tmp = yy[j];
|
|
yy[j] = tmp2;
|
|
};
|
|
yy[0] = tmp;
|
|
|
|
if (j == 0) {
|
|
zPend++;
|
|
} else {
|
|
if (zPend > 0) {
|
|
zPend--;
|
|
while (True) {
|
|
switch (zPend % 2) {
|
|
case 0: szptr[wr] = RUNA; wr++; mtfFreq[RUNA]++; break;
|
|
case 1: szptr[wr] = RUNB; wr++; mtfFreq[RUNB]++; break;
|
|
};
|
|
if (zPend < 2) break;
|
|
zPend = (zPend - 2) / 2;
|
|
};
|
|
zPend = 0;
|
|
}
|
|
szptr[wr] = j+1; wr++; mtfFreq[j+1]++;
|
|
}
|
|
}
|
|
|
|
if (zPend > 0) {
|
|
zPend--;
|
|
while (True) {
|
|
switch (zPend % 2) {
|
|
case 0: szptr[wr] = RUNA; wr++; mtfFreq[RUNA]++; break;
|
|
case 1: szptr[wr] = RUNB; wr++; mtfFreq[RUNB]++; break;
|
|
};
|
|
if (zPend < 2) break;
|
|
zPend = (zPend - 2) / 2;
|
|
};
|
|
}
|
|
|
|
szptr[wr] = EOB; wr++; mtfFreq[EOB]++;
|
|
|
|
nMTF = wr;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
#define LESSER_ICOST 0
|
|
#define GREATER_ICOST 15
|
|
|
|
void sendMTFValues ( void )
|
|
{
|
|
Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
|
|
Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
|
|
Int32 nGroups, nBytes;
|
|
|
|
/*--
|
|
UChar len [N_GROUPS][MAX_ALPHA_SIZE];
|
|
is a global since the decoder also needs it.
|
|
|
|
Int32 code[N_GROUPS][MAX_ALPHA_SIZE];
|
|
Int32 rfreq[N_GROUPS][MAX_ALPHA_SIZE];
|
|
are also globals only used in this proc.
|
|
Made global to keep stack frame size small.
|
|
--*/
|
|
|
|
|
|
UInt16 cost[N_GROUPS];
|
|
Int32 fave[N_GROUPS];
|
|
|
|
if (verbosity >= 3)
|
|
fprintf ( stderr,
|
|
" %d in block, %d after MTF & 1-2 coding, %d+2 syms in use\n",
|
|
last+1, nMTF, nInUse );
|
|
|
|
alphaSize = nInUse+2;
|
|
for (t = 0; t < N_GROUPS; t++)
|
|
for (v = 0; v < alphaSize; v++)
|
|
len[t][v] = GREATER_ICOST;
|
|
|
|
/*--- Decide how many coding tables to use ---*/
|
|
if (nMTF <= 0) panic ( "sendMTFValues(0)" );
|
|
if (nMTF < 200) nGroups = 2; else
|
|
if (nMTF < 800) nGroups = 4; else
|
|
nGroups = 6;
|
|
|
|
/*--- Generate an initial set of coding tables ---*/
|
|
{
|
|
Int32 nPart, remF, tFreq, aFreq;
|
|
|
|
nPart = nGroups;
|
|
remF = nMTF;
|
|
gs = 0;
|
|
while (nPart > 0) {
|
|
tFreq = remF / nPart;
|
|
ge = gs-1;
|
|
aFreq = 0;
|
|
while (aFreq < tFreq && ge < alphaSize-1) {
|
|
ge++;
|
|
aFreq += mtfFreq[ge];
|
|
}
|
|
|
|
if (ge > gs
|
|
&& nPart != nGroups && nPart != 1
|
|
&& ((nGroups-nPart) % 2 == 1)) {
|
|
aFreq -= mtfFreq[ge];
|
|
ge--;
|
|
}
|
|
|
|
if (verbosity >= 3)
|
|
fprintf ( stderr,
|
|
" initial group %d, [%d .. %d], has %d syms (%4.1f%%)\n",
|
|
nPart, gs, ge, aFreq,
|
|
(100.0 * (float)aFreq) / (float)nMTF );
|
|
|
|
for (v = 0; v < alphaSize; v++)
|
|
if (v >= gs && v <= ge)
|
|
len[nPart-1][v] = LESSER_ICOST; else
|
|
len[nPart-1][v] = GREATER_ICOST;
|
|
|
|
nPart--;
|
|
gs = ge+1;
|
|
remF -= aFreq;
|
|
}
|
|
}
|
|
|
|
/*---
|
|
Iterate up to N_ITERS times to improve the tables.
|
|
---*/
|
|
for (iter = 0; iter < N_ITERS; iter++) {
|
|
|
|
for (t = 0; t < nGroups; t++) fave[t] = 0;
|
|
|
|
for (t = 0; t < nGroups; t++)
|
|
for (v = 0; v < alphaSize; v++)
|
|
rfreq[t][v] = 0;
|
|
|
|
nSelectors = 0;
|
|
totc = 0;
|
|
gs = 0;
|
|
while (True) {
|
|
|
|
/*--- Set group start & end marks. --*/
|
|
if (gs >= nMTF) break;
|
|
ge = gs + G_SIZE - 1;
|
|
if (ge >= nMTF) ge = nMTF-1;
|
|
|
|
/*--
|
|
Calculate the cost of this group as coded
|
|
by each of the coding tables.
|
|
--*/
|
|
for (t = 0; t < nGroups; t++) cost[t] = 0;
|
|
|
|
if (nGroups == 6) {
|
|
register UInt16 cost0, cost1, cost2, cost3, cost4, cost5;
|
|
cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
|
|
for (i = gs; i <= ge; i++) {
|
|
UInt16 icv = szptr[i];
|
|
cost0 += len[0][icv];
|
|
cost1 += len[1][icv];
|
|
cost2 += len[2][icv];
|
|
cost3 += len[3][icv];
|
|
cost4 += len[4][icv];
|
|
cost5 += len[5][icv];
|
|
}
|
|
cost[0] = cost0; cost[1] = cost1; cost[2] = cost2;
|
|
cost[3] = cost3; cost[4] = cost4; cost[5] = cost5;
|
|
} else {
|
|
for (i = gs; i <= ge; i++) {
|
|
UInt16 icv = szptr[i];
|
|
for (t = 0; t < nGroups; t++) cost[t] += len[t][icv];
|
|
}
|
|
}
|
|
|
|
/*--
|
|
Find the coding table which is best for this group,
|
|
and record its identity in the selector table.
|
|
--*/
|
|
bc = 999999999; bt = -1;
|
|
for (t = 0; t < nGroups; t++)
|
|
if (cost[t] < bc) { bc = cost[t]; bt = t; };
|
|
totc += bc;
|
|
fave[bt]++;
|
|
selector[nSelectors] = bt;
|
|
nSelectors++;
|
|
|
|
/*--
|
|
Increment the symbol frequencies for the selected table.
|
|
--*/
|
|
for (i = gs; i <= ge; i++)
|
|
rfreq[bt][ szptr[i] ]++;
|
|
|
|
gs = ge+1;
|
|
}
|
|
if (verbosity >= 3) {
|
|
fprintf ( stderr,
|
|
" pass %d: size is %d, grp uses are ",
|
|
iter+1, totc/8 );
|
|
for (t = 0; t < nGroups; t++)
|
|
fprintf ( stderr, "%d ", fave[t] );
|
|
fprintf ( stderr, "\n" );
|
|
}
|
|
|
|
/*--
|
|
Recompute the tables based on the accumulated frequencies.
|
|
--*/
|
|
for (t = 0; t < nGroups; t++)
|
|
hbMakeCodeLengths ( &len[t][0], &rfreq[t][0], alphaSize, 20 );
|
|
}
|
|
|
|
|
|
if (!(nGroups < 8)) panic ( "sendMTFValues(1)" );
|
|
if (!(nSelectors < 32768 &&
|
|
nSelectors <= (2 + (900000 / G_SIZE))))
|
|
panic ( "sendMTFValues(2)" );
|
|
|
|
|
|
/*--- Compute MTF values for the selectors. ---*/
|
|
{
|
|
UChar pos[N_GROUPS], ll_i, tmp2, tmp;
|
|
for (i = 0; i < nGroups; i++) pos[i] = i;
|
|
for (i = 0; i < nSelectors; i++) {
|
|
ll_i = selector[i];
|
|
j = 0;
|
|
tmp = pos[j];
|
|
while ( ll_i != tmp ) {
|
|
j++;
|
|
tmp2 = tmp;
|
|
tmp = pos[j];
|
|
pos[j] = tmp2;
|
|
};
|
|
pos[0] = tmp;
|
|
selectorMtf[i] = j;
|
|
}
|
|
};
|
|
|
|
/*--- Assign actual codes for the tables. --*/
|
|
for (t = 0; t < nGroups; t++) {
|
|
minLen = 32;
|
|
maxLen = 0;
|
|
for (i = 0; i < alphaSize; i++) {
|
|
if (len[t][i] > maxLen) maxLen = len[t][i];
|
|
if (len[t][i] < minLen) minLen = len[t][i];
|
|
}
|
|
if (maxLen > 20) panic ( "sendMTFValues(3)" );
|
|
if (minLen < 1) panic ( "sendMTFValues(4)" );
|
|
hbAssignCodes ( &code[t][0], &len[t][0],
|
|
minLen, maxLen, alphaSize );
|
|
}
|
|
|
|
/*--- Transmit the mapping table. ---*/
|
|
{
|
|
Bool inUse16[16];
|
|
for (i = 0; i < 16; i++) {
|
|
inUse16[i] = False;
|
|
for (j = 0; j < 16; j++)
|
|
if (inUse[i * 16 + j]) inUse16[i] = True;
|
|
}
|
|
|
|
nBytes = bytesOut;
|
|
for (i = 0; i < 16; i++)
|
|
if (inUse16[i]) bsW(1,1); else bsW(1,0);
|
|
|
|
for (i = 0; i < 16; i++)
|
|
if (inUse16[i])
|
|
for (j = 0; j < 16; j++)
|
|
if (inUse[i * 16 + j]) bsW(1,1); else bsW(1,0);
|
|
|
|
if (verbosity >= 3)
|
|
fprintf ( stderr, " bytes: mapping %d, ", bytesOut-nBytes );
|
|
}
|
|
|
|
/*--- Now the selectors. ---*/
|
|
nBytes = bytesOut;
|
|
bsW ( 3, nGroups );
|
|
bsW ( 15, nSelectors );
|
|
for (i = 0; i < nSelectors; i++) {
|
|
for (j = 0; j < selectorMtf[i]; j++) bsW(1,1);
|
|
bsW(1,0);
|
|
}
|
|
if (verbosity >= 3)
|
|
fprintf ( stderr, "selectors %d, ", bytesOut-nBytes );
|
|
|
|
/*--- Now the coding tables. ---*/
|
|
nBytes = bytesOut;
|
|
|
|
for (t = 0; t < nGroups; t++) {
|
|
Int32 curr = len[t][0];
|
|
bsW ( 5, curr );
|
|
for (i = 0; i < alphaSize; i++) {
|
|
while (curr < len[t][i]) { bsW(2,2); curr++; /* 10 */ };
|
|
while (curr > len[t][i]) { bsW(2,3); curr--; /* 11 */ };
|
|
bsW ( 1, 0 );
|
|
}
|
|
}
|
|
|
|
if (verbosity >= 3)
|
|
fprintf ( stderr, "code lengths %d, ", bytesOut-nBytes );
|
|
|
|
/*--- And finally, the block data proper ---*/
|
|
nBytes = bytesOut;
|
|
selCtr = 0;
|
|
gs = 0;
|
|
while (True) {
|
|
if (gs >= nMTF) break;
|
|
ge = gs + G_SIZE - 1;
|
|
if (ge >= nMTF) ge = nMTF-1;
|
|
for (i = gs; i <= ge; i++) {
|
|
#if DEBUG
|
|
assert (selector[selCtr] < nGroups);
|
|
#endif
|
|
bsW ( len [selector[selCtr]] [szptr[i]],
|
|
code [selector[selCtr]] [szptr[i]] );
|
|
}
|
|
|
|
gs = ge+1;
|
|
selCtr++;
|
|
}
|
|
if (!(selCtr == nSelectors)) panic ( "sendMTFValues(5)" );
|
|
|
|
if (verbosity >= 3)
|
|
fprintf ( stderr, "codes %d\n", bytesOut-nBytes );
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void moveToFrontCodeAndSend ( void )
|
|
{
|
|
bsPutIntVS ( 24, origPtr );
|
|
generateMTFValues();
|
|
sendMTFValues();
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void recvDecodingTables ( void )
|
|
{
|
|
Int32 i, j, t, nGroups, nSelectors, alphaSize;
|
|
Int32 minLen, maxLen;
|
|
Bool inUse16[16];
|
|
|
|
/*--- Receive the mapping table ---*/
|
|
for (i = 0; i < 16; i++)
|
|
if (bsR(1) == 1)
|
|
inUse16[i] = True; else
|
|
inUse16[i] = False;
|
|
|
|
for (i = 0; i < 256; i++) inUse[i] = False;
|
|
|
|
for (i = 0; i < 16; i++)
|
|
if (inUse16[i])
|
|
for (j = 0; j < 16; j++)
|
|
if (bsR(1) == 1) inUse[i * 16 + j] = True;
|
|
|
|
makeMaps();
|
|
alphaSize = nInUse+2;
|
|
|
|
/*--- Now the selectors ---*/
|
|
nGroups = bsR ( 3 );
|
|
nSelectors = bsR ( 15 );
|
|
for (i = 0; i < nSelectors; i++) {
|
|
j = 0;
|
|
while (bsR(1) == 1) j++;
|
|
selectorMtf[i] = j;
|
|
}
|
|
|
|
/*--- Undo the MTF values for the selectors. ---*/
|
|
{
|
|
UChar pos[N_GROUPS], tmp, v;
|
|
for (v = 0; v < nGroups; v++) pos[v] = v;
|
|
|
|
for (i = 0; i < nSelectors; i++) {
|
|
v = selectorMtf[i];
|
|
tmp = pos[v];
|
|
while (v > 0) { pos[v] = pos[v-1]; v--; }
|
|
pos[0] = tmp;
|
|
selector[i] = tmp;
|
|
}
|
|
}
|
|
|
|
/*--- Now the coding tables ---*/
|
|
for (t = 0; t < nGroups; t++) {
|
|
Int32 curr = bsR ( 5 );
|
|
for (i = 0; i < alphaSize; i++) {
|
|
while (bsR(1) == 1) {
|
|
if (bsR(1) == 0) curr++; else curr--;
|
|
}
|
|
len[t][i] = curr;
|
|
}
|
|
}
|
|
|
|
/*--- Create the Huffman decoding tables ---*/
|
|
for (t = 0; t < nGroups; t++) {
|
|
minLen = 32;
|
|
maxLen = 0;
|
|
for (i = 0; i < alphaSize; i++) {
|
|
if (len[t][i] > maxLen) maxLen = len[t][i];
|
|
if (len[t][i] < minLen) minLen = len[t][i];
|
|
}
|
|
hbCreateDecodeTables (
|
|
&limit[t][0], &base[t][0], &perm[t][0], &len[t][0],
|
|
minLen, maxLen, alphaSize
|
|
);
|
|
minLens[t] = minLen;
|
|
}
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
#define GET_MTF_VAL(lval) \
|
|
{ \
|
|
Int32 zt, zn, zvec, zj; \
|
|
if (groupPos == 0) { \
|
|
groupNo++; \
|
|
groupPos = G_SIZE; \
|
|
} \
|
|
groupPos--; \
|
|
zt = selector[groupNo]; \
|
|
zn = minLens[zt]; \
|
|
zvec = bsR ( zn ); \
|
|
while (zvec > limit[zt][zn]) { \
|
|
zn++; bsR1(zj); \
|
|
zvec = (zvec << 1) | zj; \
|
|
}; \
|
|
lval = perm[zt][zvec - base[zt][zn]]; \
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void getAndMoveToFrontDecode ( void )
|
|
{
|
|
UChar yy[256];
|
|
Int32 i, j, nextSym, limitLast;
|
|
Int32 EOB, groupNo, groupPos;
|
|
|
|
limitLast = 100000 * blockSize100k;
|
|
origPtr = bsGetIntVS ( 24 );
|
|
|
|
recvDecodingTables();
|
|
EOB = nInUse+1;
|
|
groupNo = -1;
|
|
groupPos = 0;
|
|
|
|
/*--
|
|
Setting up the unzftab entries here is not strictly
|
|
necessary, but it does save having to do it later
|
|
in a separate pass, and so saves a block's worth of
|
|
cache misses.
|
|
--*/
|
|
for (i = 0; i <= 255; i++) unzftab[i] = 0;
|
|
|
|
for (i = 0; i <= 255; i++) yy[i] = (UChar) i;
|
|
|
|
last = -1;
|
|
|
|
GET_MTF_VAL(nextSym);
|
|
|
|
while (True) {
|
|
|
|
if (nextSym == EOB) break;
|
|
|
|
if (nextSym == RUNA || nextSym == RUNB) {
|
|
UChar ch;
|
|
Int32 s = -1;
|
|
Int32 N = 1;
|
|
do {
|
|
if (nextSym == RUNA) s = s + (0+1) * N; else
|
|
if (nextSym == RUNB) s = s + (1+1) * N;
|
|
N = N * 2;
|
|
GET_MTF_VAL(nextSym);
|
|
}
|
|
while (nextSym == RUNA || nextSym == RUNB);
|
|
|
|
s++;
|
|
ch = seqToUnseq[yy[0]];
|
|
unzftab[ch] += s;
|
|
|
|
if (smallMode)
|
|
while (s > 0) {
|
|
last++;
|
|
ll16[last] = ch;
|
|
s--;
|
|
}
|
|
else
|
|
while (s > 0) {
|
|
last++;
|
|
ll8[last] = ch;
|
|
s--;
|
|
};
|
|
|
|
if (last >= limitLast) blockOverrun();
|
|
continue;
|
|
|
|
} else {
|
|
|
|
UChar tmp;
|
|
last++; if (last >= limitLast) blockOverrun();
|
|
|
|
tmp = yy[nextSym-1];
|
|
unzftab[seqToUnseq[tmp]]++;
|
|
if (smallMode)
|
|
ll16[last] = seqToUnseq[tmp]; else
|
|
ll8[last] = seqToUnseq[tmp];
|
|
|
|
/*--
|
|
This loop is hammered during decompression,
|
|
hence the unrolling.
|
|
|
|
for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1];
|
|
--*/
|
|
|
|
j = nextSym-1;
|
|
for (; j > 3; j -= 4) {
|
|
yy[j] = yy[j-1];
|
|
yy[j-1] = yy[j-2];
|
|
yy[j-2] = yy[j-3];
|
|
yy[j-3] = yy[j-4];
|
|
}
|
|
for (; j > 0; j--) yy[j] = yy[j-1];
|
|
|
|
yy[0] = tmp;
|
|
GET_MTF_VAL(nextSym);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------*/
|
|
/*--- Block-sorting machinery ---*/
|
|
/*---------------------------------------------------*/
|
|
|
|
/*---------------------------------------------*/
|
|
/*--
|
|
Compare two strings in block. We assume (see
|
|
discussion above) that i1 and i2 have a max
|
|
offset of 10 on entry, and that the first
|
|
bytes of both block and quadrant have been
|
|
copied into the "overshoot area", ie
|
|
into the subscript range
|
|
[last+1 .. last+NUM_OVERSHOOT_BYTES].
|
|
--*/
|
|
INLINE Bool fullGtU ( Int32 i1, Int32 i2 )
|
|
{
|
|
Int32 k;
|
|
UChar c1, c2;
|
|
UInt16 s1, s2;
|
|
|
|
#if DEBUG
|
|
/*--
|
|
shellsort shouldn't ask to compare
|
|
something with itself.
|
|
--*/
|
|
assert (i1 != i2);
|
|
#endif
|
|
|
|
c1 = block[i1];
|
|
c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
|
|
c1 = block[i1];
|
|
c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
|
|
c1 = block[i1];
|
|
c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
|
|
c1 = block[i1];
|
|
c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
|
|
c1 = block[i1];
|
|
c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
|
|
c1 = block[i1];
|
|
c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
|
|
k = last + 1;
|
|
|
|
do {
|
|
|
|
c1 = block[i1];
|
|
c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1];
|
|
s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
|
|
c1 = block[i1];
|
|
c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1];
|
|
s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
|
|
c1 = block[i1];
|
|
c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1];
|
|
s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
|
|
c1 = block[i1];
|
|
c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1];
|
|
s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
|
|
if (i1 > last) { i1 -= last; i1--; };
|
|
if (i2 > last) { i2 -= last; i2--; };
|
|
|
|
k -= 4;
|
|
workDone++;
|
|
}
|
|
while (k >= 0);
|
|
|
|
return False;
|
|
}
|
|
|
|
/*---------------------------------------------*/
|
|
/*--
|
|
Knuth's increments seem to work better
|
|
than Incerpi-Sedgewick here. Possibly
|
|
because the number of elems to sort is
|
|
usually small, typically <= 20.
|
|
--*/
|
|
Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
|
|
9841, 29524, 88573, 265720,
|
|
797161, 2391484 };
|
|
|
|
void simpleSort ( Int32 lo, Int32 hi, Int32 d )
|
|
{
|
|
Int32 i, j, h, bigN, hp;
|
|
Int32 v;
|
|
|
|
bigN = hi - lo + 1;
|
|
if (bigN < 2) return;
|
|
|
|
hp = 0;
|
|
while (incs[hp] < bigN) hp++;
|
|
hp--;
|
|
|
|
for (; hp >= 0; hp--) {
|
|
h = incs[hp];
|
|
if (verbosity >= 5)
|
|
fprintf ( stderr, " shell increment %d\n", h );
|
|
|
|
i = lo + h;
|
|
while (True) {
|
|
|
|
/*-- copy 1 --*/
|
|
if (i > hi) break;
|
|
v = zptr[i];
|
|
j = i;
|
|
while ( fullGtU ( zptr[j-h]+d, v+d ) ) {
|
|
zptr[j] = zptr[j-h];
|
|
j = j - h;
|
|
if (j <= (lo + h - 1)) break;
|
|
}
|
|
zptr[j] = v;
|
|
i++;
|
|
|
|
/*-- copy 2 --*/
|
|
if (i > hi) break;
|
|
v = zptr[i];
|
|
j = i;
|
|
while ( fullGtU ( zptr[j-h]+d, v+d ) ) {
|
|
zptr[j] = zptr[j-h];
|
|
j = j - h;
|
|
if (j <= (lo + h - 1)) break;
|
|
}
|
|
zptr[j] = v;
|
|
i++;
|
|
|
|
/*-- copy 3 --*/
|
|
if (i > hi) break;
|
|
v = zptr[i];
|
|
j = i;
|
|
while ( fullGtU ( zptr[j-h]+d, v+d ) ) {
|
|
zptr[j] = zptr[j-h];
|
|
j = j - h;
|
|
if (j <= (lo + h - 1)) break;
|
|
}
|
|
zptr[j] = v;
|
|
i++;
|
|
|
|
if (workDone > workLimit && firstAttempt) return;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
/*--
|
|
The following is an implementation of
|
|
an elegant 3-way quicksort for strings,
|
|
described in a paper "Fast Algorithms for
|
|
Sorting and Searching Strings", by Robert
|
|
Sedgewick and Jon L. Bentley.
|
|
--*/
|
|
|
|
#define swap(lv1, lv2) \
|
|
{ Int32 tmp = lv1; lv1 = lv2; lv2 = tmp; }
|
|
|
|
INLINE void vswap ( Int32 p1, Int32 p2, Int32 n )
|
|
{
|
|
while (n > 0) {
|
|
swap(zptr[p1], zptr[p2]);
|
|
p1++; p2++; n--;
|
|
}
|
|
}
|
|
|
|
INLINE UChar med3 ( UChar a, UChar b, UChar c )
|
|
{
|
|
UChar t;
|
|
if (a > b) { t = a; a = b; b = t; };
|
|
if (b > c) { t = b; b = c; c = t; };
|
|
if (a > b) b = a;
|
|
return b;
|
|
}
|
|
|
|
|
|
#define min(a,b) ((a) < (b)) ? (a) : (b)
|
|
|
|
typedef
|
|
struct { Int32 ll; Int32 hh; Int32 dd; }
|
|
StackElem;
|
|
|
|
#define push(lz,hz,dz) { stack[sp].ll = lz; \
|
|
stack[sp].hh = hz; \
|
|
stack[sp].dd = dz; \
|
|
sp++; }
|
|
|
|
#define pop(lz,hz,dz) { sp--; \
|
|
lz = stack[sp].ll; \
|
|
hz = stack[sp].hh; \
|
|
dz = stack[sp].dd; }
|
|
|
|
#define SMALL_THRESH 20
|
|
#define DEPTH_THRESH 10
|
|
|
|
/*--
|
|
If you are ever unlucky/improbable enough
|
|
to get a stack overflow whilst sorting,
|
|
increase the following constant and try
|
|
again. In practice I have never seen the
|
|
stack go above 27 elems, so the following
|
|
limit seems very generous.
|
|
--*/
|
|
#define QSORT_STACK_SIZE 1000
|
|
|
|
|
|
void qSort3 ( Int32 loSt, Int32 hiSt, Int32 dSt )
|
|
{
|
|
Int32 unLo, unHi, ltLo, gtHi, med, n, m;
|
|
Int32 sp, lo, hi, d;
|
|
StackElem stack[QSORT_STACK_SIZE];
|
|
|
|
sp = 0;
|
|
push ( loSt, hiSt, dSt );
|
|
|
|
while (sp > 0) {
|
|
|
|
if (sp >= QSORT_STACK_SIZE) panic ( "stack overflow in qSort3" );
|
|
|
|
pop ( lo, hi, d );
|
|
|
|
if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) {
|
|
simpleSort ( lo, hi, d );
|
|
if (workDone > workLimit && firstAttempt) return;
|
|
continue;
|
|
}
|
|
|
|
med = med3 ( block[zptr[ lo ]+d],
|
|
block[zptr[ hi ]+d],
|
|
block[zptr[ (lo+hi)>>1 ]+d] );
|
|
|
|
unLo = ltLo = lo;
|
|
unHi = gtHi = hi;
|
|
|
|
while (True) {
|
|
while (True) {
|
|
if (unLo > unHi) break;
|
|
n = ((Int32)block[zptr[unLo]+d]) - med;
|
|
if (n == 0) { swap(zptr[unLo], zptr[ltLo]); ltLo++; unLo++; continue; };
|
|
if (n > 0) break;
|
|
unLo++;
|
|
}
|
|
while (True) {
|
|
if (unLo > unHi) break;
|
|
n = ((Int32)block[zptr[unHi]+d]) - med;
|
|
if (n == 0) { swap(zptr[unHi], zptr[gtHi]); gtHi--; unHi--; continue; };
|
|
if (n < 0) break;
|
|
unHi--;
|
|
}
|
|
if (unLo > unHi) break;
|
|
swap(zptr[unLo], zptr[unHi]); unLo++; unHi--;
|
|
}
|
|
#if DEBUG
|
|
assert (unHi == unLo-1);
|
|
#endif
|
|
|
|
if (gtHi < ltLo) {
|
|
push(lo, hi, d+1 );
|
|
continue;
|
|
}
|
|
|
|
n = min(ltLo-lo, unLo-ltLo); vswap(lo, unLo-n, n);
|
|
m = min(hi-gtHi, gtHi-unHi); vswap(unLo, hi-m+1, m);
|
|
|
|
n = lo + unLo - ltLo - 1;
|
|
m = hi - (gtHi - unHi) + 1;
|
|
|
|
push ( lo, n, d );
|
|
push ( n+1, m-1, d+1 );
|
|
push ( m, hi, d );
|
|
}
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
|
|
#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
|
|
|
|
#define SETMASK (1 << 21)
|
|
#define CLEARMASK (~(SETMASK))
|
|
|
|
void sortIt ( void )
|
|
{
|
|
Int32 i, j, ss, sb;
|
|
Int32 runningOrder[256];
|
|
Int32 copy[256];
|
|
Bool bigDone[256];
|
|
UChar c1, c2;
|
|
Int32 numQSorted;
|
|
|
|
/*--
|
|
In the various block-sized structures, live data runs
|
|
from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First,
|
|
set up the overshoot area for block.
|
|
--*/
|
|
|
|
if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" );
|
|
for (i = 0; i < NUM_OVERSHOOT_BYTES; i++)
|
|
block[last+i+1] = block[i % (last+1)];
|
|
for (i = 0; i <= last+NUM_OVERSHOOT_BYTES; i++)
|
|
quadrant[i] = 0;
|
|
|
|
block[-1] = block[last];
|
|
|
|
if (last < 4000) {
|
|
|
|
/*--
|
|
Use simpleSort(), since the full sorting mechanism
|
|
has quite a large constant overhead.
|
|
--*/
|
|
if (verbosity >= 4) fprintf ( stderr, " simpleSort ...\n" );
|
|
for (i = 0; i <= last; i++) zptr[i] = i;
|
|
firstAttempt = False;
|
|
workDone = workLimit = 0;
|
|
simpleSort ( 0, last, 0 );
|
|
if (verbosity >= 4) fprintf ( stderr, " simpleSort done.\n" );
|
|
|
|
} else {
|
|
|
|
numQSorted = 0;
|
|
for (i = 0; i <= 255; i++) bigDone[i] = False;
|
|
|
|
if (verbosity >= 4) fprintf ( stderr, " bucket sorting ...\n" );
|
|
|
|
for (i = 0; i <= 65536; i++) ftab[i] = 0;
|
|
|
|
c1 = block[-1];
|
|
for (i = 0; i <= last; i++) {
|
|
c2 = block[i];
|
|
ftab[(c1 << 8) + c2]++;
|
|
c1 = c2;
|
|
}
|
|
|
|
for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
|
|
|
|
c1 = block[0];
|
|
for (i = 0; i < last; i++) {
|
|
c2 = block[i+1];
|
|
j = (c1 << 8) + c2;
|
|
c1 = c2;
|
|
ftab[j]--;
|
|
zptr[ftab[j]] = i;
|
|
}
|
|
j = (block[last] << 8) + block[0];
|
|
ftab[j]--;
|
|
zptr[ftab[j]] = last;
|
|
|
|
/*--
|
|
Now ftab contains the first loc of every small bucket.
|
|
Calculate the running order, from smallest to largest
|
|
big bucket.
|
|
--*/
|
|
|
|
for (i = 0; i <= 255; i++) runningOrder[i] = i;
|
|
|
|
{
|
|
Int32 vv;
|
|
Int32 h = 1;
|
|
do h = 3 * h + 1; while (h <= 256);
|
|
do {
|
|
h = h / 3;
|
|
for (i = h; i <= 255; i++) {
|
|
vv = runningOrder[i];
|
|
j = i;
|
|
while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
|
|
runningOrder[j] = runningOrder[j-h];
|
|
j = j - h;
|
|
if (j <= (h - 1)) goto zero;
|
|
}
|
|
zero:
|
|
runningOrder[j] = vv;
|
|
}
|
|
} while (h != 1);
|
|
}
|
|
|
|
/*--
|
|
The main sorting loop.
|
|
--*/
|
|
|
|
for (i = 0; i <= 255; i++) {
|
|
|
|
/*--
|
|
Process big buckets, starting with the least full.
|
|
--*/
|
|
ss = runningOrder[i];
|
|
|
|
/*--
|
|
Complete the big bucket [ss] by quicksorting
|
|
any unsorted small buckets [ss, j]. Hopefully
|
|
previous pointer-scanning phases have already
|
|
completed many of the small buckets [ss, j], so
|
|
we don't have to sort them at all.
|
|
--*/
|
|
for (j = 0; j <= 255; j++) {
|
|
sb = (ss << 8) + j;
|
|
if ( ! (ftab[sb] & SETMASK) ) {
|
|
Int32 lo = ftab[sb] & CLEARMASK;
|
|
Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
|
|
if (hi > lo) {
|
|
if (verbosity >= 4)
|
|
fprintf ( stderr,
|
|
" qsort [0x%x, 0x%x] done %d this %d\n",
|
|
ss, j, numQSorted, hi - lo + 1 );
|
|
qSort3 ( lo, hi, 2 );
|
|
numQSorted += ( hi - lo + 1 );
|
|
if (workDone > workLimit && firstAttempt) return;
|
|
}
|
|
ftab[sb] |= SETMASK;
|
|
}
|
|
}
|
|
|
|
/*--
|
|
The ss big bucket is now done. Record this fact,
|
|
and update the quadrant descriptors. Remember to
|
|
update quadrants in the overshoot area too, if
|
|
necessary. The "if (i < 255)" test merely skips
|
|
this updating for the last bucket processed, since
|
|
updating for the last bucket is pointless.
|
|
--*/
|
|
bigDone[ss] = True;
|
|
|
|
if (i < 255) {
|
|
Int32 bbStart = ftab[ss << 8] & CLEARMASK;
|
|
Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
|
|
Int32 shifts = 0;
|
|
|
|
while ((bbSize >> shifts) > 65534) shifts++;
|
|
|
|
for (j = 0; j < bbSize; j++) {
|
|
Int32 a2update = zptr[bbStart + j];
|
|
UInt16 qVal = (UInt16)(j >> shifts);
|
|
quadrant[a2update] = qVal;
|
|
if (a2update < NUM_OVERSHOOT_BYTES)
|
|
quadrant[a2update + last + 1] = qVal;
|
|
}
|
|
|
|
if (! ( ((bbSize-1) >> shifts) <= 65535 )) panic ( "sortIt" );
|
|
}
|
|
|
|
/*--
|
|
Now scan this big bucket so as to synthesise the
|
|
sorted order for small buckets [t, ss] for all t != ss.
|
|
--*/
|
|
for (j = 0; j <= 255; j++)
|
|
copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
|
|
|
|
for (j = ftab[ss << 8] & CLEARMASK;
|
|
j < (ftab[(ss+1) << 8] & CLEARMASK);
|
|
j++) {
|
|
c1 = block[zptr[j]-1];
|
|
if ( ! bigDone[c1] ) {
|
|
zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1;
|
|
copy[c1] ++;
|
|
}
|
|
}
|
|
|
|
for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
|
|
}
|
|
if (verbosity >= 4)
|
|
fprintf ( stderr, " %d pointers, %d sorted, %d scanned\n",
|
|
last+1, numQSorted, (last+1) - numQSorted );
|
|
}
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------*/
|
|
/*--- Stuff for randomising repetitive blocks ---*/
|
|
/*---------------------------------------------------*/
|
|
|
|
/*---------------------------------------------*/
|
|
Int32 rNums[512] = {
|
|
619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
|
|
985, 724, 205, 454, 863, 491, 741, 242, 949, 214,
|
|
733, 859, 335, 708, 621, 574, 73, 654, 730, 472,
|
|
419, 436, 278, 496, 867, 210, 399, 680, 480, 51,
|
|
878, 465, 811, 169, 869, 675, 611, 697, 867, 561,
|
|
862, 687, 507, 283, 482, 129, 807, 591, 733, 623,
|
|
150, 238, 59, 379, 684, 877, 625, 169, 643, 105,
|
|
170, 607, 520, 932, 727, 476, 693, 425, 174, 647,
|
|
73, 122, 335, 530, 442, 853, 695, 249, 445, 515,
|
|
909, 545, 703, 919, 874, 474, 882, 500, 594, 612,
|
|
641, 801, 220, 162, 819, 984, 589, 513, 495, 799,
|
|
161, 604, 958, 533, 221, 400, 386, 867, 600, 782,
|
|
382, 596, 414, 171, 516, 375, 682, 485, 911, 276,
|
|
98, 553, 163, 354, 666, 933, 424, 341, 533, 870,
|
|
227, 730, 475, 186, 263, 647, 537, 686, 600, 224,
|
|
469, 68, 770, 919, 190, 373, 294, 822, 808, 206,
|
|
184, 943, 795, 384, 383, 461, 404, 758, 839, 887,
|
|
715, 67, 618, 276, 204, 918, 873, 777, 604, 560,
|
|
951, 160, 578, 722, 79, 804, 96, 409, 713, 940,
|
|
652, 934, 970, 447, 318, 353, 859, 672, 112, 785,
|
|
645, 863, 803, 350, 139, 93, 354, 99, 820, 908,
|
|
609, 772, 154, 274, 580, 184, 79, 626, 630, 742,
|
|
653, 282, 762, 623, 680, 81, 927, 626, 789, 125,
|
|
411, 521, 938, 300, 821, 78, 343, 175, 128, 250,
|
|
170, 774, 972, 275, 999, 639, 495, 78, 352, 126,
|
|
857, 956, 358, 619, 580, 124, 737, 594, 701, 612,
|
|
669, 112, 134, 694, 363, 992, 809, 743, 168, 974,
|
|
944, 375, 748, 52, 600, 747, 642, 182, 862, 81,
|
|
344, 805, 988, 739, 511, 655, 814, 334, 249, 515,
|
|
897, 955, 664, 981, 649, 113, 974, 459, 893, 228,
|
|
433, 837, 553, 268, 926, 240, 102, 654, 459, 51,
|
|
686, 754, 806, 760, 493, 403, 415, 394, 687, 700,
|
|
946, 670, 656, 610, 738, 392, 760, 799, 887, 653,
|
|
978, 321, 576, 617, 626, 502, 894, 679, 243, 440,
|
|
680, 879, 194, 572, 640, 724, 926, 56, 204, 700,
|
|
707, 151, 457, 449, 797, 195, 791, 558, 945, 679,
|
|
297, 59, 87, 824, 713, 663, 412, 693, 342, 606,
|
|
134, 108, 571, 364, 631, 212, 174, 643, 304, 329,
|
|
343, 97, 430, 751, 497, 314, 983, 374, 822, 928,
|
|
140, 206, 73, 263, 980, 736, 876, 478, 430, 305,
|
|
170, 514, 364, 692, 829, 82, 855, 953, 676, 246,
|
|
369, 970, 294, 750, 807, 827, 150, 790, 288, 923,
|
|
804, 378, 215, 828, 592, 281, 565, 555, 710, 82,
|
|
896, 831, 547, 261, 524, 462, 293, 465, 502, 56,
|
|
661, 821, 976, 991, 658, 869, 905, 758, 745, 193,
|
|
768, 550, 608, 933, 378, 286, 215, 979, 792, 961,
|
|
61, 688, 793, 644, 986, 403, 106, 366, 905, 644,
|
|
372, 567, 466, 434, 645, 210, 389, 550, 919, 135,
|
|
780, 773, 635, 389, 707, 100, 626, 958, 165, 504,
|
|
920, 176, 193, 713, 857, 265, 203, 50, 668, 108,
|
|
645, 990, 626, 197, 510, 357, 358, 850, 858, 364,
|
|
936, 638
|
|
};
|
|
|
|
|
|
#define RAND_DECLS \
|
|
Int32 rNToGo = 0; \
|
|
Int32 rTPos = 0; \
|
|
|
|
#define RAND_MASK ((rNToGo == 1) ? 1 : 0)
|
|
|
|
#define RAND_UPD_MASK \
|
|
if (rNToGo == 0) { \
|
|
rNToGo = rNums[rTPos]; \
|
|
rTPos++; if (rTPos == 512) rTPos = 0; \
|
|
} \
|
|
rNToGo--;
|
|
|
|
|
|
|
|
/*---------------------------------------------------*/
|
|
/*--- The Reversible Transformation (tm) ---*/
|
|
/*---------------------------------------------------*/
|
|
|
|
/*---------------------------------------------*/
|
|
void randomiseBlock ( void )
|
|
{
|
|
Int32 i;
|
|
RAND_DECLS;
|
|
for (i = 0; i < 256; i++) inUse[i] = False;
|
|
|
|
for (i = 0; i <= last; i++) {
|
|
RAND_UPD_MASK;
|
|
block[i] ^= RAND_MASK;
|
|
inUse[block[i]] = True;
|
|
}
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void doReversibleTransformation ( void )
|
|
{
|
|
Int32 i;
|
|
|
|
if (verbosity >= 2) fprintf ( stderr, "\n" );
|
|
|
|
workLimit = workFactor * last;
|
|
workDone = 0;
|
|
blockRandomised = False;
|
|
firstAttempt = True;
|
|
|
|
sortIt ();
|
|
|
|
if (verbosity >= 3)
|
|
fprintf ( stderr, " %d work, %d block, ratio %5.2f\n",
|
|
workDone, last, (float)workDone / (float)(last) );
|
|
|
|
if (workDone > workLimit && firstAttempt) {
|
|
if (verbosity >= 2)
|
|
fprintf ( stderr, " sorting aborted; randomising block\n" );
|
|
randomiseBlock ();
|
|
workLimit = workDone = 0;
|
|
blockRandomised = True;
|
|
firstAttempt = False;
|
|
sortIt();
|
|
if (verbosity >= 3)
|
|
fprintf ( stderr, " %d work, %d block, ratio %f\n",
|
|
workDone, last, (float)workDone / (float)(last) );
|
|
}
|
|
|
|
origPtr = -1;
|
|
for (i = 0; i <= last; i++)
|
|
if (zptr[i] == 0)
|
|
{ origPtr = i; break; };
|
|
|
|
if (origPtr == -1) panic ( "doReversibleTransformation" );
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
|
|
INLINE Int32 indexIntoF ( Int32 indx, Int32 *cftab )
|
|
{
|
|
Int32 nb, na, mid;
|
|
nb = 0;
|
|
na = 256;
|
|
do {
|
|
mid = (nb + na) >> 1;
|
|
if (indx >= cftab[mid]) nb = mid; else na = mid;
|
|
}
|
|
while (na - nb != 1);
|
|
return nb;
|
|
}
|
|
|
|
|
|
#define GET_SMALL(cccc) \
|
|
\
|
|
cccc = indexIntoF ( tPos, cftab ); \
|
|
tPos = GET_LL(tPos);
|
|
|
|
|
|
void undoReversibleTransformation_small ( FILE* dst )
|
|
{
|
|
Int32 cftab[257], cftabAlso[257];
|
|
Int32 i, j, tmp, tPos;
|
|
UChar ch;
|
|
|
|
/*--
|
|
We assume here that the global array unzftab will
|
|
already be holding the frequency counts for
|
|
ll8[0 .. last].
|
|
--*/
|
|
|
|
/*-- Set up cftab to facilitate generation of indexIntoF --*/
|
|
cftab[0] = 0;
|
|
for (i = 1; i <= 256; i++) cftab[i] = unzftab[i-1];
|
|
for (i = 1; i <= 256; i++) cftab[i] += cftab[i-1];
|
|
|
|
/*-- Make a copy of it, used in generation of T --*/
|
|
for (i = 0; i <= 256; i++) cftabAlso[i] = cftab[i];
|
|
|
|
/*-- compute the T vector --*/
|
|
for (i = 0; i <= last; i++) {
|
|
ch = (UChar)ll16[i];
|
|
SET_LL(i, cftabAlso[ch]);
|
|
cftabAlso[ch]++;
|
|
}
|
|
|
|
/*--
|
|
Compute T^(-1) by pointer reversal on T. This is rather
|
|
subtle, in that, if the original block was two or more
|
|
(in general, N) concatenated copies of the same thing,
|
|
the T vector will consist of N cycles, each of length
|
|
blocksize / N, and decoding will involve traversing one
|
|
of these cycles N times. Which particular cycle doesn't
|
|
matter -- they are all equivalent. The tricky part is to
|
|
make sure that the pointer reversal creates a correct
|
|
reversed cycle for us to traverse. So, the code below
|
|
simply reverses whatever cycle origPtr happens to fall into,
|
|
without regard to the cycle length. That gives one reversed
|
|
cycle, which for normal blocks, is the entire block-size long.
|
|
For repeated blocks, it will be interspersed with the other
|
|
N-1 non-reversed cycles. Providing that the F-subscripting
|
|
phase which follows starts at origPtr, all then works ok.
|
|
--*/
|
|
i = origPtr;
|
|
j = GET_LL(i);
|
|
do {
|
|
tmp = GET_LL(j);
|
|
SET_LL(j, i);
|
|
i = j;
|
|
j = tmp;
|
|
}
|
|
while (i != origPtr);
|
|
|
|
/*--
|
|
We recreate the original by subscripting F through T^(-1).
|
|
The run-length-decoder below requires characters incrementally,
|
|
so tPos is set to a starting value, and is updated by
|
|
the GET_SMALL macro.
|
|
--*/
|
|
tPos = origPtr;
|
|
|
|
/*-------------------------------------------------*/
|
|
/*--
|
|
This is pretty much a verbatim copy of the
|
|
run-length decoder present in the distribution
|
|
bzip-0.21; it has to be here to avoid creating
|
|
block[] as an intermediary structure. As in 0.21,
|
|
this code derives from some sent to me by
|
|
Christian von Roques.
|
|
|
|
It allows dst==NULL, so as to support the test (-t)
|
|
option without slowing down the fast decompression
|
|
code.
|
|
--*/
|
|
{
|
|
IntNative retVal;
|
|
Int32 i2, count, chPrev, ch2;
|
|
UInt32 localCrc;
|
|
|
|
count = 0;
|
|
i2 = 0;
|
|
ch2 = 256; /*-- not a char and not EOF --*/
|
|
localCrc = getGlobalCRC();
|
|
|
|
{
|
|
RAND_DECLS;
|
|
while ( i2 <= last ) {
|
|
chPrev = ch2;
|
|
GET_SMALL(ch2);
|
|
if (blockRandomised) {
|
|
RAND_UPD_MASK;
|
|
ch2 ^= (UInt32)RAND_MASK;
|
|
}
|
|
i2++;
|
|
|
|
if (dst)
|
|
retVal = putc ( ch2, dst );
|
|
|
|
UPDATE_CRC ( localCrc, (UChar)ch2 );
|
|
|
|
if (ch2 != chPrev) {
|
|
count = 1;
|
|
} else {
|
|
count++;
|
|
if (count >= 4) {
|
|
Int32 j2;
|
|
UChar z;
|
|
GET_SMALL(z);
|
|
if (blockRandomised) {
|
|
RAND_UPD_MASK;
|
|
z ^= RAND_MASK;
|
|
}
|
|
for (j2 = 0; j2 < (Int32)z; j2++) {
|
|
if (dst) retVal = putc (ch2, dst);
|
|
UPDATE_CRC ( localCrc, (UChar)ch2 );
|
|
}
|
|
i2++;
|
|
count = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
setGlobalCRC ( localCrc );
|
|
}
|
|
/*-- end of the in-line run-length-decoder. --*/
|
|
}
|
|
#undef GET_SMALL
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
|
|
#define GET_FAST(cccc) \
|
|
\
|
|
cccc = ll8[tPos]; \
|
|
tPos = tt[tPos];
|
|
|
|
|
|
void undoReversibleTransformation_fast ( FILE* dst )
|
|
{
|
|
Int32 cftab[257];
|
|
Int32 i, tPos;
|
|
UChar ch;
|
|
|
|
/*--
|
|
We assume here that the global array unzftab will
|
|
already be holding the frequency counts for
|
|
ll8[0 .. last].
|
|
--*/
|
|
|
|
/*-- Set up cftab to facilitate generation of T^(-1) --*/
|
|
cftab[0] = 0;
|
|
for (i = 1; i <= 256; i++) cftab[i] = unzftab[i-1];
|
|
for (i = 1; i <= 256; i++) cftab[i] += cftab[i-1];
|
|
|
|
/*-- compute the T^(-1) vector --*/
|
|
for (i = 0; i <= last; i++) {
|
|
ch = (UChar)ll8[i];
|
|
tt[cftab[ch]] = i;
|
|
cftab[ch]++;
|
|
}
|
|
|
|
/*--
|
|
We recreate the original by subscripting L through T^(-1).
|
|
The run-length-decoder below requires characters incrementally,
|
|
so tPos is set to a starting value, and is updated by
|
|
the GET_FAST macro.
|
|
--*/
|
|
tPos = tt[origPtr];
|
|
|
|
/*-------------------------------------------------*/
|
|
/*--
|
|
This is pretty much a verbatim copy of the
|
|
run-length decoder present in the distribution
|
|
bzip-0.21; it has to be here to avoid creating
|
|
block[] as an intermediary structure. As in 0.21,
|
|
this code derives from some sent to me by
|
|
Christian von Roques.
|
|
--*/
|
|
{
|
|
IntNative retVal;
|
|
Int32 i2, count, chPrev, ch2;
|
|
UInt32 localCrc;
|
|
|
|
count = 0;
|
|
i2 = 0;
|
|
ch2 = 256; /*-- not a char and not EOF --*/
|
|
localCrc = getGlobalCRC();
|
|
|
|
if (blockRandomised) {
|
|
RAND_DECLS;
|
|
while ( i2 <= last ) {
|
|
chPrev = ch2;
|
|
GET_FAST(ch2);
|
|
RAND_UPD_MASK;
|
|
ch2 ^= (UInt32)RAND_MASK;
|
|
i2++;
|
|
|
|
retVal = putc ( ch2, dst );
|
|
UPDATE_CRC ( localCrc, (UChar)ch2 );
|
|
|
|
if (ch2 != chPrev) {
|
|
count = 1;
|
|
} else {
|
|
count++;
|
|
if (count >= 4) {
|
|
Int32 j2;
|
|
UChar z;
|
|
GET_FAST(z);
|
|
RAND_UPD_MASK;
|
|
z ^= RAND_MASK;
|
|
for (j2 = 0; j2 < (Int32)z; j2++) {
|
|
retVal = putc (ch2, dst);
|
|
UPDATE_CRC ( localCrc, (UChar)ch2 );
|
|
}
|
|
i2++;
|
|
count = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
} else {
|
|
|
|
while ( i2 <= last ) {
|
|
chPrev = ch2;
|
|
GET_FAST(ch2);
|
|
i2++;
|
|
|
|
retVal = putc ( ch2, dst );
|
|
UPDATE_CRC ( localCrc, (UChar)ch2 );
|
|
|
|
if (ch2 != chPrev) {
|
|
count = 1;
|
|
} else {
|
|
count++;
|
|
if (count >= 4) {
|
|
Int32 j2;
|
|
UChar z;
|
|
GET_FAST(z);
|
|
for (j2 = 0; j2 < (Int32)z; j2++) {
|
|
retVal = putc (ch2, dst);
|
|
UPDATE_CRC ( localCrc, (UChar)ch2 );
|
|
}
|
|
i2++;
|
|
count = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
} /*-- if (blockRandomised) --*/
|
|
|
|
setGlobalCRC ( localCrc );
|
|
}
|
|
/*-- end of the in-line run-length-decoder. --*/
|
|
}
|
|
#undef GET_FAST
|
|
|
|
|
|
/*---------------------------------------------------*/
|
|
/*--- The block loader and RLEr ---*/
|
|
/*---------------------------------------------------*/
|
|
|
|
/*---------------------------------------------*/
|
|
/* Top 16: run length, 1 to 255.
|
|
* Lower 16: the char, or MY_EOF for EOF.
|
|
*/
|
|
|
|
#define MY_EOF 257
|
|
|
|
INLINE Int32 getRLEpair ( FILE* src )
|
|
{
|
|
Int32 runLength;
|
|
IntNative ch, chLatest;
|
|
|
|
ch = getc ( src );
|
|
|
|
/*--- Because I have no idea what kind of a value EOF is. ---*/
|
|
if (ch == EOF) {
|
|
ERROR_IF_NOT_ZERO ( ferror(src));
|
|
return (1 << 16) | MY_EOF;
|
|
}
|
|
|
|
runLength = 0;
|
|
do {
|
|
chLatest = getc ( src );
|
|
runLength++;
|
|
bytesIn++;
|
|
}
|
|
while (ch == chLatest && runLength < 255);
|
|
|
|
if ( chLatest != EOF ) {
|
|
if ( ungetc ( chLatest, src ) == EOF )
|
|
panic ( "getRLEpair: ungetc failed" );
|
|
} else {
|
|
ERROR_IF_NOT_ZERO ( ferror(src) );
|
|
}
|
|
|
|
/*--- Conditional is just a speedup hack. ---*/
|
|
if (runLength == 1) {
|
|
UPDATE_CRC ( globalCrc, (UChar)ch );
|
|
return (1 << 16) | ch;
|
|
} else {
|
|
Int32 i;
|
|
for (i = 1; i <= runLength; i++)
|
|
UPDATE_CRC ( globalCrc, (UChar)ch );
|
|
return (runLength << 16) | ch;
|
|
}
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void loadAndRLEsource ( FILE* src )
|
|
{
|
|
Int32 ch, allowableBlockSize, i;
|
|
|
|
last = -1;
|
|
ch = 0;
|
|
|
|
for (i = 0; i < 256; i++) inUse[i] = False;
|
|
|
|
/*--- 20 is just a paranoia constant ---*/
|
|
allowableBlockSize = 100000 * blockSize100k - 20;
|
|
|
|
while (last < allowableBlockSize && ch != MY_EOF) {
|
|
Int32 rlePair, runLen;
|
|
rlePair = getRLEpair ( src );
|
|
ch = rlePair & 0xFFFF;
|
|
runLen = (UInt32)rlePair >> 16;
|
|
|
|
#if DEBUG
|
|
assert (runLen >= 1 && runLen <= 255);
|
|
#endif
|
|
|
|
if (ch != MY_EOF) {
|
|
inUse[ch] = True;
|
|
switch (runLen) {
|
|
case 1:
|
|
last++; block[last] = (UChar)ch; break;
|
|
case 2:
|
|
last++; block[last] = (UChar)ch;
|
|
last++; block[last] = (UChar)ch; break;
|
|
case 3:
|
|
last++; block[last] = (UChar)ch;
|
|
last++; block[last] = (UChar)ch;
|
|
last++; block[last] = (UChar)ch; break;
|
|
default:
|
|
inUse[runLen-4] = True;
|
|
last++; block[last] = (UChar)ch;
|
|
last++; block[last] = (UChar)ch;
|
|
last++; block[last] = (UChar)ch;
|
|
last++; block[last] = (UChar)ch;
|
|
last++; block[last] = (UChar)(runLen-4); break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------*/
|
|
/*--- Processing of complete files and streams ---*/
|
|
/*---------------------------------------------------*/
|
|
|
|
/*---------------------------------------------*/
|
|
void compressStream ( FILE *stream, FILE *zStream )
|
|
{
|
|
IntNative retVal;
|
|
UInt32 blockCRC, combinedCRC;
|
|
Int32 blockNo;
|
|
|
|
blockNo = 0;
|
|
bytesIn = 0;
|
|
bytesOut = 0;
|
|
nBlocksRandomised = 0;
|
|
|
|
SET_BINARY_MODE(stream);
|
|
SET_BINARY_MODE(zStream);
|
|
|
|
ERROR_IF_NOT_ZERO ( ferror(stream) );
|
|
ERROR_IF_NOT_ZERO ( ferror(zStream) );
|
|
|
|
bsSetStream ( zStream, True );
|
|
|
|
/*--- Write `magic' bytes B and Z,
|
|
then h indicating file-format == huffmanised,
|
|
followed by a digit indicating blockSize100k.
|
|
---*/
|
|
bsPutUChar ( 'B' );
|
|
bsPutUChar ( 'Z' );
|
|
bsPutUChar ( 'h' );
|
|
bsPutUChar ( '0' + blockSize100k );
|
|
|
|
combinedCRC = 0;
|
|
|
|
if (verbosity >= 2) fprintf ( stderr, "\n" );
|
|
|
|
while (True) {
|
|
|
|
blockNo++;
|
|
initialiseCRC ();
|
|
loadAndRLEsource ( stream );
|
|
ERROR_IF_NOT_ZERO ( ferror(stream) );
|
|
if (last == -1) break;
|
|
|
|
blockCRC = getFinalCRC ();
|
|
combinedCRC = (combinedCRC << 1) | (combinedCRC >> 31);
|
|
combinedCRC ^= blockCRC;
|
|
|
|
if (verbosity >= 2)
|
|
fprintf ( stderr, " block %d: crc = 0x%8x, combined CRC = 0x%8x, size = %d",
|
|
blockNo, blockCRC, combinedCRC, last+1 );
|
|
|
|
/*-- sort the block and establish posn of original string --*/
|
|
doReversibleTransformation ();
|
|
|
|
/*--
|
|
A 6-byte block header, the value chosen arbitrarily
|
|
as 0x314159265359 :-). A 32 bit value does not really
|
|
give a strong enough guarantee that the value will not
|
|
appear by chance in the compressed datastream. Worst-case
|
|
probability of this event, for a 900k block, is about
|
|
2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits.
|
|
For a compressed file of size 100Gb -- about 100000 blocks --
|
|
only a 48-bit marker will do. NB: normal compression/
|
|
decompression do *not* rely on these statistical properties.
|
|
They are only important when trying to recover blocks from
|
|
damaged files.
|
|
--*/
|
|
bsPutUChar ( 0x31 ); bsPutUChar ( 0x41 );
|
|
bsPutUChar ( 0x59 ); bsPutUChar ( 0x26 );
|
|
bsPutUChar ( 0x53 ); bsPutUChar ( 0x59 );
|
|
|
|
/*-- Now the block's CRC, so it is in a known place. --*/
|
|
bsPutUInt32 ( blockCRC );
|
|
|
|
/*-- Now a single bit indicating randomisation. --*/
|
|
if (blockRandomised) {
|
|
bsW(1,1); nBlocksRandomised++;
|
|
} else
|
|
bsW(1,0);
|
|
|
|
/*-- Finally, block's contents proper. --*/
|
|
moveToFrontCodeAndSend ();
|
|
|
|
ERROR_IF_NOT_ZERO ( ferror(zStream) );
|
|
}
|
|
|
|
if (verbosity >= 2 && nBlocksRandomised > 0)
|
|
fprintf ( stderr, " %d block%s needed randomisation\n",
|
|
nBlocksRandomised,
|
|
nBlocksRandomised == 1 ? "" : "s" );
|
|
|
|
/*--
|
|
Now another magic 48-bit number, 0x177245385090, to
|
|
indicate the end of the last block. (sqrt(pi), if
|
|
you want to know. I did want to use e, but it contains
|
|
too much repetition -- 27 18 28 18 28 46 -- for me
|
|
to feel statistically comfortable. Call me paranoid.)
|
|
--*/
|
|
|
|
bsPutUChar ( 0x17 ); bsPutUChar ( 0x72 );
|
|
bsPutUChar ( 0x45 ); bsPutUChar ( 0x38 );
|
|
bsPutUChar ( 0x50 ); bsPutUChar ( 0x90 );
|
|
|
|
bsPutUInt32 ( combinedCRC );
|
|
if (verbosity >= 2)
|
|
fprintf ( stderr, " final combined CRC = 0x%x\n ", combinedCRC );
|
|
|
|
/*-- Close the files in an utterly paranoid way. --*/
|
|
bsFinishedWithStream ();
|
|
|
|
ERROR_IF_NOT_ZERO ( ferror(zStream) );
|
|
retVal = fflush ( zStream );
|
|
ERROR_IF_EOF ( retVal );
|
|
retVal = fclose ( zStream );
|
|
ERROR_IF_EOF ( retVal );
|
|
|
|
ERROR_IF_NOT_ZERO ( ferror(stream) );
|
|
retVal = fclose ( stream );
|
|
ERROR_IF_EOF ( retVal );
|
|
|
|
if (bytesIn == 0) bytesIn = 1;
|
|
if (bytesOut == 0) bytesOut = 1;
|
|
|
|
if (verbosity >= 1)
|
|
fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, "
|
|
"%5.2f%% saved, %d in, %d out.\n",
|
|
(float)bytesIn / (float)bytesOut,
|
|
(8.0 * (float)bytesOut) / (float)bytesIn,
|
|
100.0 * (1.0 - (float)bytesOut / (float)bytesIn),
|
|
bytesIn,
|
|
bytesOut
|
|
);
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
Bool uncompressStream ( FILE *zStream, FILE *stream )
|
|
{
|
|
UChar magic1, magic2, magic3, magic4;
|
|
UChar magic5, magic6;
|
|
UInt32 storedBlockCRC, storedCombinedCRC;
|
|
UInt32 computedBlockCRC, computedCombinedCRC;
|
|
Int32 currBlockNo;
|
|
IntNative retVal;
|
|
|
|
SET_BINARY_MODE(stream);
|
|
SET_BINARY_MODE(zStream);
|
|
|
|
ERROR_IF_NOT_ZERO ( ferror(stream) );
|
|
ERROR_IF_NOT_ZERO ( ferror(zStream) );
|
|
|
|
bsSetStream ( zStream, False );
|
|
|
|
/*--
|
|
A bad magic number is `recoverable from';
|
|
return with False so the caller skips the file.
|
|
--*/
|
|
magic1 = bsGetUChar ();
|
|
magic2 = bsGetUChar ();
|
|
magic3 = bsGetUChar ();
|
|
magic4 = bsGetUChar ();
|
|
if (magic1 != 'B' ||
|
|
magic2 != 'Z' ||
|
|
magic3 != 'h' ||
|
|
magic4 < '1' ||
|
|
magic4 > '9') {
|
|
bsFinishedWithStream();
|
|
retVal = fclose ( stream );
|
|
ERROR_IF_EOF ( retVal );
|
|
return False;
|
|
}
|
|
|
|
setDecompressStructureSizes ( magic4 - '0' );
|
|
computedCombinedCRC = 0;
|
|
|
|
if (verbosity >= 2) fprintf ( stderr, "\n " );
|
|
currBlockNo = 0;
|
|
|
|
while (True) {
|
|
magic1 = bsGetUChar ();
|
|
magic2 = bsGetUChar ();
|
|
magic3 = bsGetUChar ();
|
|
magic4 = bsGetUChar ();
|
|
magic5 = bsGetUChar ();
|
|
magic6 = bsGetUChar ();
|
|
if (magic1 == 0x17 && magic2 == 0x72 &&
|
|
magic3 == 0x45 && magic4 == 0x38 &&
|
|
magic5 == 0x50 && magic6 == 0x90) break;
|
|
|
|
if (magic1 != 0x31 || magic2 != 0x41 ||
|
|
magic3 != 0x59 || magic4 != 0x26 ||
|
|
magic5 != 0x53 || magic6 != 0x59) badBlockHeader();
|
|
|
|
storedBlockCRC = bsGetUInt32 ();
|
|
|
|
if (bsR(1) == 1)
|
|
blockRandomised = True; else
|
|
blockRandomised = False;
|
|
|
|
currBlockNo++;
|
|
if (verbosity >= 2)
|
|
fprintf ( stderr, "[%d: huff+mtf ", currBlockNo );
|
|
getAndMoveToFrontDecode ();
|
|
ERROR_IF_NOT_ZERO ( ferror(zStream) );
|
|
|
|
initialiseCRC();
|
|
if (verbosity >= 2) fprintf ( stderr, "rt+rld" );
|
|
if (smallMode)
|
|
undoReversibleTransformation_small ( stream );
|
|
else
|
|
undoReversibleTransformation_fast ( stream );
|
|
|
|
ERROR_IF_NOT_ZERO ( ferror(stream) );
|
|
|
|
computedBlockCRC = getFinalCRC();
|
|
if (verbosity >= 3)
|
|
fprintf ( stderr, " {0x%x, 0x%x}", storedBlockCRC, computedBlockCRC );
|
|
if (verbosity >= 2) fprintf ( stderr, "] " );
|
|
|
|
/*-- A bad CRC is considered a fatal error. --*/
|
|
if (storedBlockCRC != computedBlockCRC)
|
|
crcError ( storedBlockCRC, computedBlockCRC );
|
|
|
|
computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >> 31);
|
|
computedCombinedCRC ^= computedBlockCRC;
|
|
};
|
|
|
|
if (verbosity >= 2) fprintf ( stderr, "\n " );
|
|
|
|
storedCombinedCRC = bsGetUInt32 ();
|
|
if (verbosity >= 2)
|
|
fprintf ( stderr,
|
|
"combined CRCs: stored = 0x%x, computed = 0x%x\n ",
|
|
storedCombinedCRC, computedCombinedCRC );
|
|
if (storedCombinedCRC != computedCombinedCRC)
|
|
crcError ( storedCombinedCRC, computedCombinedCRC );
|
|
|
|
|
|
bsFinishedWithStream ();
|
|
ERROR_IF_NOT_ZERO ( ferror(zStream) );
|
|
retVal = fclose ( zStream );
|
|
ERROR_IF_EOF ( retVal );
|
|
|
|
ERROR_IF_NOT_ZERO ( ferror(stream) );
|
|
retVal = fflush ( stream );
|
|
ERROR_IF_NOT_ZERO ( retVal );
|
|
if (stream != stdout) {
|
|
retVal = fclose ( stream );
|
|
ERROR_IF_EOF ( retVal );
|
|
}
|
|
return True;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
Bool testStream ( FILE *zStream )
|
|
{
|
|
UChar magic1, magic2, magic3, magic4;
|
|
UChar magic5, magic6;
|
|
UInt32 storedBlockCRC, storedCombinedCRC;
|
|
UInt32 computedBlockCRC, computedCombinedCRC;
|
|
Int32 currBlockNo;
|
|
IntNative retVal;
|
|
|
|
SET_BINARY_MODE(zStream);
|
|
ERROR_IF_NOT_ZERO ( ferror(zStream) );
|
|
|
|
bsSetStream ( zStream, False );
|
|
|
|
magic1 = bsGetUChar ();
|
|
magic2 = bsGetUChar ();
|
|
magic3 = bsGetUChar ();
|
|
magic4 = bsGetUChar ();
|
|
if (magic1 != 'B' ||
|
|
magic2 != 'Z' ||
|
|
magic3 != 'h' ||
|
|
magic4 < '1' ||
|
|
magic4 > '9') {
|
|
bsFinishedWithStream();
|
|
fclose ( zStream );
|
|
fprintf ( stderr, "\n%s: bad magic number (ie, not created by bzip2)\n",
|
|
inName );
|
|
return False;
|
|
}
|
|
|
|
smallMode = True;
|
|
setDecompressStructureSizes ( magic4 - '0' );
|
|
computedCombinedCRC = 0;
|
|
|
|
if (verbosity >= 2) fprintf ( stderr, "\n" );
|
|
currBlockNo = 0;
|
|
|
|
while (True) {
|
|
magic1 = bsGetUChar ();
|
|
magic2 = bsGetUChar ();
|
|
magic3 = bsGetUChar ();
|
|
magic4 = bsGetUChar ();
|
|
magic5 = bsGetUChar ();
|
|
magic6 = bsGetUChar ();
|
|
if (magic1 == 0x17 && magic2 == 0x72 &&
|
|
magic3 == 0x45 && magic4 == 0x38 &&
|
|
magic5 == 0x50 && magic6 == 0x90) break;
|
|
|
|
currBlockNo++;
|
|
if (magic1 != 0x31 || magic2 != 0x41 ||
|
|
magic3 != 0x59 || magic4 != 0x26 ||
|
|
magic5 != 0x53 || magic6 != 0x59) {
|
|
bsFinishedWithStream();
|
|
fclose ( zStream );
|
|
fprintf ( stderr,
|
|
"\n%s, block %d: bad header (not == 0x314159265359)\n",
|
|
inName, currBlockNo );
|
|
return False;
|
|
}
|
|
storedBlockCRC = bsGetUInt32 ();
|
|
|
|
if (bsR(1) == 1)
|
|
blockRandomised = True; else
|
|
blockRandomised = False;
|
|
|
|
if (verbosity >= 2)
|
|
fprintf ( stderr, " block [%d: huff+mtf ", currBlockNo );
|
|
getAndMoveToFrontDecode ();
|
|
ERROR_IF_NOT_ZERO ( ferror(zStream) );
|
|
|
|
initialiseCRC();
|
|
if (verbosity >= 2) fprintf ( stderr, "rt+rld" );
|
|
undoReversibleTransformation_small ( NULL );
|
|
|
|
computedBlockCRC = getFinalCRC();
|
|
if (verbosity >= 3)
|
|
fprintf ( stderr, " {0x%x, 0x%x}", storedBlockCRC, computedBlockCRC );
|
|
if (verbosity >= 2) fprintf ( stderr, "] " );
|
|
|
|
if (storedBlockCRC != computedBlockCRC) {
|
|
bsFinishedWithStream();
|
|
fclose ( zStream );
|
|
fprintf ( stderr, "\n%s, block %d: computed CRC does not match stored one\n",
|
|
inName, currBlockNo );
|
|
return False;
|
|
}
|
|
|
|
if (verbosity >= 2) fprintf ( stderr, "ok\n" );
|
|
computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >> 31);
|
|
computedCombinedCRC ^= computedBlockCRC;
|
|
};
|
|
|
|
storedCombinedCRC = bsGetUInt32 ();
|
|
if (verbosity >= 2)
|
|
fprintf ( stderr,
|
|
" combined CRCs: stored = 0x%x, computed = 0x%x\n ",
|
|
storedCombinedCRC, computedCombinedCRC );
|
|
if (storedCombinedCRC != computedCombinedCRC) {
|
|
bsFinishedWithStream();
|
|
fclose ( zStream );
|
|
fprintf ( stderr, "\n%s: computed CRC does not match stored one\n",
|
|
inName );
|
|
return False;
|
|
}
|
|
|
|
bsFinishedWithStream ();
|
|
ERROR_IF_NOT_ZERO ( ferror(zStream) );
|
|
retVal = fclose ( zStream );
|
|
ERROR_IF_EOF ( retVal );
|
|
return True;
|
|
}
|
|
|
|
|
|
|
|
/*---------------------------------------------------*/
|
|
/*--- Error [non-] handling grunge ---*/
|
|
/*---------------------------------------------------*/
|
|
|
|
/*---------------------------------------------*/
|
|
void cadvise ( void )
|
|
{
|
|
fprintf (
|
|
stderr,
|
|
"\nIt is possible that the compressed file(s) have become corrupted.\n"
|
|
"You can use the -tvv option to test integrity of such files.\n\n"
|
|
"You can use the `bzip2recover' program to *attempt* to recover\n"
|
|
"data from undamaged sections of corrupted files.\n\n"
|
|
);
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void showFileNames ( void )
|
|
{
|
|
fprintf (
|
|
stderr,
|
|
"\tInput file = %s, output file = %s\n",
|
|
inName==NULL ? "(null)" : inName,
|
|
outName==NULL ? "(null)" : outName
|
|
);
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void cleanUpAndFail ( Int32 ec )
|
|
{
|
|
IntNative retVal;
|
|
|
|
if ( srcMode == SM_F2F && opMode != OM_TEST ) {
|
|
fprintf ( stderr, "%s: Deleting output file %s, if it exists.\n",
|
|
progName,
|
|
outName==NULL ? "(null)" : outName );
|
|
if (outputHandleJustInCase != NULL)
|
|
fclose ( outputHandleJustInCase );
|
|
retVal = remove ( outName );
|
|
if (retVal != 0)
|
|
fprintf ( stderr,
|
|
"%s: WARNING: deletion of output file (apparently) failed.\n",
|
|
progName );
|
|
}
|
|
if (numFileNames > 0 && numFilesProcessed < numFileNames) {
|
|
fprintf ( stderr,
|
|
"%s: WARNING: some files have not been processed:\n"
|
|
"\t%d specified on command line, %d not processed yet.\n\n",
|
|
progName, numFileNames,
|
|
numFileNames - numFilesProcessed );
|
|
}
|
|
exit ( ec );
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void panic ( Char* s )
|
|
{
|
|
fprintf ( stderr,
|
|
"\n%s: PANIC -- internal consistency error:\n"
|
|
"\t%s\n"
|
|
"\tThis is a BUG. Please report it to me at:\n"
|
|
"\tjseward@acm.org\n",
|
|
progName, s );
|
|
showFileNames();
|
|
cleanUpAndFail( 3 );
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void badBGLengths ( void )
|
|
{
|
|
fprintf ( stderr,
|
|
"\n%s: error when reading background model code lengths,\n"
|
|
"\twhich probably means the compressed file is corrupted.\n",
|
|
progName );
|
|
showFileNames();
|
|
cadvise();
|
|
cleanUpAndFail( 2 );
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void crcError ( UInt32 crcStored, UInt32 crcComputed )
|
|
{
|
|
fprintf ( stderr,
|
|
"\n%s: Data integrity error when decompressing.\n"
|
|
"\tStored CRC = 0x%x, computed CRC = 0x%x\n",
|
|
progName, crcStored, crcComputed );
|
|
showFileNames();
|
|
cadvise();
|
|
cleanUpAndFail( 2 );
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void compressedStreamEOF ( void )
|
|
{
|
|
fprintf ( stderr,
|
|
"\n%s: Compressed file ends unexpectedly;\n\t"
|
|
"perhaps it is corrupted? *Possible* reason follows.\n",
|
|
progName );
|
|
perror ( progName );
|
|
showFileNames();
|
|
cadvise();
|
|
cleanUpAndFail( 2 );
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void ioError ( )
|
|
{
|
|
fprintf ( stderr,
|
|
"\n%s: I/O or other error, bailing out. Possible reason follows.\n",
|
|
progName );
|
|
perror ( progName );
|
|
showFileNames();
|
|
cleanUpAndFail( 1 );
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void blockOverrun ()
|
|
{
|
|
fprintf ( stderr,
|
|
"\n%s: block overrun during decompression,\n"
|
|
"\twhich probably means the compressed file\n"
|
|
"\tis corrupted.\n",
|
|
progName );
|
|
showFileNames();
|
|
cadvise();
|
|
cleanUpAndFail( 2 );
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void badBlockHeader ()
|
|
{
|
|
fprintf ( stderr,
|
|
"\n%s: bad block header in the compressed file,\n"
|
|
"\twhich probably means it is corrupted.\n",
|
|
progName );
|
|
showFileNames();
|
|
cadvise();
|
|
cleanUpAndFail( 2 );
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void bitStreamEOF ()
|
|
{
|
|
fprintf ( stderr,
|
|
"\n%s: read past the end of compressed data,\n"
|
|
"\twhich probably means it is corrupted.\n",
|
|
progName );
|
|
showFileNames();
|
|
cadvise();
|
|
cleanUpAndFail( 2 );
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void mySignalCatcher ( IntNative n )
|
|
{
|
|
fprintf ( stderr,
|
|
"\n%s: Control-C (or similar) caught, quitting.\n",
|
|
progName );
|
|
cleanUpAndFail(1);
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void mySIGSEGVorSIGBUScatcher ( IntNative n )
|
|
{
|
|
if (opMode == OM_Z)
|
|
fprintf ( stderr,
|
|
"\n%s: Caught a SIGSEGV or SIGBUS whilst compressing,\n"
|
|
"\twhich probably indicates a bug in bzip2. Please\n"
|
|
"\treport it to me at: jseward@acm.org\n",
|
|
progName );
|
|
else
|
|
fprintf ( stderr,
|
|
"\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing,\n"
|
|
"\twhich probably indicates that the compressed data\n"
|
|
"\tis corrupted.\n",
|
|
progName );
|
|
|
|
showFileNames();
|
|
if (opMode == OM_Z)
|
|
cleanUpAndFail( 3 ); else
|
|
{ cadvise(); cleanUpAndFail( 2 ); }
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void uncompressOutOfMemory ( Int32 draw, Int32 blockSize )
|
|
{
|
|
fprintf ( stderr,
|
|
"\n%s: Can't allocate enough memory for decompression.\n"
|
|
"\tRequested %d bytes for a block size of %d.\n"
|
|
"\tTry selecting space-economic decompress (with flag -s)\n"
|
|
"\tand failing that, find a machine with more memory.\n",
|
|
progName, draw, blockSize );
|
|
showFileNames();
|
|
cleanUpAndFail(1);
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void compressOutOfMemory ( Int32 draw, Int32 blockSize )
|
|
{
|
|
fprintf ( stderr,
|
|
"\n%s: Can't allocate enough memory for compression.\n"
|
|
"\tRequested %d bytes for a block size of %d.\n"
|
|
"\tTry selecting a small block size (with flag -s).\n",
|
|
progName, draw, blockSize );
|
|
showFileNames();
|
|
cleanUpAndFail(1);
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------*/
|
|
/*--- The main driver machinery ---*/
|
|
/*---------------------------------------------------*/
|
|
|
|
/*---------------------------------------------*/
|
|
void pad ( Char *s )
|
|
{
|
|
Int32 i;
|
|
if ( (Int32)strlen(s) >= longestFileName ) return;
|
|
for (i = 1; i <= longestFileName - (Int32)strlen(s); i++)
|
|
fprintf ( stderr, " " );
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
Bool fileExists ( Char* name )
|
|
{
|
|
FILE *tmp = fopen ( name, "rb" );
|
|
Bool exists = (tmp != NULL);
|
|
if (tmp != NULL) fclose ( tmp );
|
|
return exists;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
/*--
|
|
if in doubt, return True
|
|
--*/
|
|
Bool notABogStandardFile ( Char* name )
|
|
{
|
|
IntNative i;
|
|
struct MY_STAT statBuf;
|
|
|
|
i = MY_LSTAT ( name, &statBuf );
|
|
if (i != 0) return True;
|
|
if (MY_S_IFREG(statBuf.st_mode)) return False;
|
|
return True;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void copyDateAndPermissions ( Char *srcName, Char *dstName )
|
|
{
|
|
#if BZ_UNIX
|
|
IntNative retVal;
|
|
struct MY_STAT statBuf;
|
|
struct utimbuf uTimBuf;
|
|
|
|
retVal = MY_LSTAT ( srcName, &statBuf );
|
|
ERROR_IF_NOT_ZERO ( retVal );
|
|
uTimBuf.actime = statBuf.st_atime;
|
|
uTimBuf.modtime = statBuf.st_mtime;
|
|
|
|
retVal = chmod ( dstName, statBuf.st_mode );
|
|
ERROR_IF_NOT_ZERO ( retVal );
|
|
retVal = utime ( dstName, &uTimBuf );
|
|
ERROR_IF_NOT_ZERO ( retVal );
|
|
#endif
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
Bool endsInBz2 ( Char* name )
|
|
{
|
|
Int32 n = strlen ( name );
|
|
if (n <= 4) return False;
|
|
return
|
|
(name[n-4] == '.' &&
|
|
name[n-3] == 'b' &&
|
|
name[n-2] == 'z' &&
|
|
name[n-1] == '2');
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
Bool containsDubiousChars ( Char* name )
|
|
{
|
|
Bool cdc = False;
|
|
for (; *name != '\0'; name++)
|
|
if (*name == '?' || *name == '*') cdc = True;
|
|
return cdc;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void compress ( Char *name )
|
|
{
|
|
FILE *inStr;
|
|
FILE *outStr;
|
|
|
|
if (name == NULL && srcMode != SM_I2O)
|
|
panic ( "compress: bad modes\n" );
|
|
|
|
switch (srcMode) {
|
|
case SM_I2O: strcpy ( inName, "(stdin)" );
|
|
strcpy ( outName, "(stdout)" ); break;
|
|
case SM_F2F: strcpy ( inName, name );
|
|
strcpy ( outName, name );
|
|
strcat ( outName, ".bz2" ); break;
|
|
case SM_F2O: strcpy ( inName, name );
|
|
strcpy ( outName, "(stdout)" ); break;
|
|
}
|
|
|
|
if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
|
|
fprintf ( stderr, "%s: There are no files matching `%s'.\n",
|
|
progName, inName );
|
|
return;
|
|
}
|
|
if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
|
|
fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
|
|
progName, inName );
|
|
return;
|
|
}
|
|
if ( srcMode != SM_I2O && endsInBz2 ( inName )) {
|
|
fprintf ( stderr, "%s: Input file name %s ends in `.bz2', skipping.\n",
|
|
progName, inName );
|
|
return;
|
|
}
|
|
if ( srcMode != SM_I2O && notABogStandardFile ( inName )) {
|
|
fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
|
|
progName, inName );
|
|
return;
|
|
}
|
|
if ( srcMode == SM_F2F && fileExists ( outName ) ) {
|
|
fprintf ( stderr, "%s: Output file %s already exists, skipping.\n",
|
|
progName, outName );
|
|
return;
|
|
}
|
|
|
|
switch ( srcMode ) {
|
|
|
|
case SM_I2O:
|
|
inStr = stdin;
|
|
outStr = stdout;
|
|
if ( isatty ( fileno ( stdout ) ) ) {
|
|
fprintf ( stderr,
|
|
"%s: I won't write compressed data to a terminal.\n",
|
|
progName );
|
|
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
|
|
progName, progName );
|
|
return;
|
|
};
|
|
break;
|
|
|
|
case SM_F2O:
|
|
inStr = fopen ( inName, "rb" );
|
|
outStr = stdout;
|
|
if ( isatty ( fileno ( stdout ) ) ) {
|
|
fprintf ( stderr,
|
|
"%s: I won't write compressed data to a terminal.\n",
|
|
progName );
|
|
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
|
|
progName, progName );
|
|
return;
|
|
};
|
|
if ( inStr == NULL ) {
|
|
fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
|
|
progName, inName );
|
|
return;
|
|
};
|
|
break;
|
|
|
|
case SM_F2F:
|
|
inStr = fopen ( inName, "rb" );
|
|
outStr = fopen ( outName, "wb" );
|
|
if ( outStr == NULL) {
|
|
fprintf ( stderr, "%s: Can't create output file %s, skipping.\n",
|
|
progName, outName );
|
|
return;
|
|
}
|
|
if ( inStr == NULL ) {
|
|
fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
|
|
progName, inName );
|
|
return;
|
|
};
|
|
break;
|
|
|
|
default:
|
|
panic ( "compress: bad srcMode" );
|
|
break;
|
|
}
|
|
|
|
if (verbosity >= 1) {
|
|
fprintf ( stderr, " %s: ", inName );
|
|
pad ( inName );
|
|
fflush ( stderr );
|
|
}
|
|
|
|
/*--- Now the input and output handles are sane. Do the Biz. ---*/
|
|
outputHandleJustInCase = outStr;
|
|
compressStream ( inStr, outStr );
|
|
outputHandleJustInCase = NULL;
|
|
|
|
/*--- If there was an I/O error, we won't get here. ---*/
|
|
if ( srcMode == SM_F2F ) {
|
|
copyDateAndPermissions ( inName, outName );
|
|
if ( !keepInputFiles ) {
|
|
IntNative retVal = remove ( inName );
|
|
ERROR_IF_NOT_ZERO ( retVal );
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void uncompress ( Char *name )
|
|
{
|
|
FILE *inStr;
|
|
FILE *outStr;
|
|
Bool magicNumberOK;
|
|
|
|
if (name == NULL && srcMode != SM_I2O)
|
|
panic ( "uncompress: bad modes\n" );
|
|
|
|
switch (srcMode) {
|
|
case SM_I2O: strcpy ( inName, "(stdin)" );
|
|
strcpy ( outName, "(stdout)" ); break;
|
|
case SM_F2F: strcpy ( inName, name );
|
|
strcpy ( outName, name );
|
|
if (endsInBz2 ( outName ))
|
|
outName [ strlen ( outName ) - 4 ] = '\0';
|
|
break;
|
|
case SM_F2O: strcpy ( inName, name );
|
|
strcpy ( outName, "(stdout)" ); break;
|
|
}
|
|
|
|
if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
|
|
fprintf ( stderr, "%s: There are no files matching `%s'.\n",
|
|
progName, inName );
|
|
return;
|
|
}
|
|
if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
|
|
fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
|
|
progName, inName );
|
|
return;
|
|
}
|
|
if ( srcMode != SM_I2O && !endsInBz2 ( inName )) {
|
|
fprintf ( stderr,
|
|
"%s: Input file name %s doesn't end in `.bz2', skipping.\n",
|
|
progName, inName );
|
|
return;
|
|
}
|
|
if ( srcMode != SM_I2O && notABogStandardFile ( inName )) {
|
|
fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
|
|
progName, inName );
|
|
return;
|
|
}
|
|
if ( srcMode == SM_F2F && fileExists ( outName ) ) {
|
|
fprintf ( stderr, "%s: Output file %s already exists, skipping.\n",
|
|
progName, outName );
|
|
return;
|
|
}
|
|
|
|
switch ( srcMode ) {
|
|
|
|
case SM_I2O:
|
|
inStr = stdin;
|
|
outStr = stdout;
|
|
if ( isatty ( fileno ( stdin ) ) ) {
|
|
fprintf ( stderr,
|
|
"%s: I won't read compressed data from a terminal.\n",
|
|
progName );
|
|
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
|
|
progName, progName );
|
|
return;
|
|
};
|
|
break;
|
|
|
|
case SM_F2O:
|
|
inStr = fopen ( inName, "rb" );
|
|
outStr = stdout;
|
|
if ( inStr == NULL ) {
|
|
fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
|
|
progName, inName );
|
|
return;
|
|
};
|
|
break;
|
|
|
|
case SM_F2F:
|
|
inStr = fopen ( inName, "rb" );
|
|
outStr = fopen ( outName, "wb" );
|
|
if ( outStr == NULL) {
|
|
fprintf ( stderr, "%s: Can't create output file %s, skipping.\n",
|
|
progName, outName );
|
|
return;
|
|
}
|
|
if ( inStr == NULL ) {
|
|
fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
|
|
progName, inName );
|
|
return;
|
|
};
|
|
break;
|
|
|
|
default:
|
|
panic ( "uncompress: bad srcMode" );
|
|
break;
|
|
}
|
|
|
|
if (verbosity >= 1) {
|
|
fprintf ( stderr, " %s: ", inName );
|
|
pad ( inName );
|
|
fflush ( stderr );
|
|
}
|
|
|
|
/*--- Now the input and output handles are sane. Do the Biz. ---*/
|
|
outputHandleJustInCase = outStr;
|
|
magicNumberOK = uncompressStream ( inStr, outStr );
|
|
outputHandleJustInCase = NULL;
|
|
|
|
/*--- If there was an I/O error, we won't get here. ---*/
|
|
if ( magicNumberOK ) {
|
|
if ( srcMode == SM_F2F ) {
|
|
copyDateAndPermissions ( inName, outName );
|
|
if ( !keepInputFiles ) {
|
|
IntNative retVal = remove ( inName );
|
|
ERROR_IF_NOT_ZERO ( retVal );
|
|
}
|
|
}
|
|
} else {
|
|
if ( srcMode == SM_F2F ) {
|
|
IntNative retVal = remove ( outName );
|
|
ERROR_IF_NOT_ZERO ( retVal );
|
|
}
|
|
}
|
|
|
|
if ( magicNumberOK ) {
|
|
if (verbosity >= 1)
|
|
fprintf ( stderr, "done\n" );
|
|
} else {
|
|
if (verbosity >= 1)
|
|
fprintf ( stderr, "not a bzip2 file, skipping.\n" ); else
|
|
fprintf ( stderr,
|
|
"%s: %s is not a bzip2 file, skipping.\n",
|
|
progName, inName );
|
|
}
|
|
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void testf ( Char *name )
|
|
{
|
|
FILE *inStr;
|
|
Bool allOK;
|
|
|
|
if (name == NULL && srcMode != SM_I2O)
|
|
panic ( "testf: bad modes\n" );
|
|
|
|
strcpy ( outName, "(none)" );
|
|
switch (srcMode) {
|
|
case SM_I2O: strcpy ( inName, "(stdin)" ); break;
|
|
case SM_F2F: strcpy ( inName, name ); break;
|
|
case SM_F2O: strcpy ( inName, name ); break;
|
|
}
|
|
|
|
if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
|
|
fprintf ( stderr, "%s: There are no files matching `%s'.\n",
|
|
progName, inName );
|
|
return;
|
|
}
|
|
if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
|
|
fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
|
|
progName, inName );
|
|
return;
|
|
}
|
|
if ( srcMode != SM_I2O && !endsInBz2 ( inName )) {
|
|
fprintf ( stderr,
|
|
"%s: Input file name %s doesn't end in `.bz2', skipping.\n",
|
|
progName, inName );
|
|
return;
|
|
}
|
|
if ( srcMode != SM_I2O && notABogStandardFile ( inName )) {
|
|
fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
|
|
progName, inName );
|
|
return;
|
|
}
|
|
|
|
switch ( srcMode ) {
|
|
|
|
case SM_I2O:
|
|
if ( isatty ( fileno ( stdin ) ) ) {
|
|
fprintf ( stderr,
|
|
"%s: I won't read compressed data from a terminal.\n",
|
|
progName );
|
|
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
|
|
progName, progName );
|
|
return;
|
|
};
|
|
inStr = stdin;
|
|
break;
|
|
|
|
case SM_F2O: case SM_F2F:
|
|
inStr = fopen ( inName, "rb" );
|
|
if ( inStr == NULL ) {
|
|
fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
|
|
progName, inName );
|
|
return;
|
|
};
|
|
break;
|
|
|
|
default:
|
|
panic ( "testf: bad srcMode" );
|
|
break;
|
|
}
|
|
|
|
if (verbosity >= 1) {
|
|
fprintf ( stderr, " %s: ", inName );
|
|
pad ( inName );
|
|
fflush ( stderr );
|
|
}
|
|
|
|
/*--- Now the input handle is sane. Do the Biz. ---*/
|
|
allOK = testStream ( inStr );
|
|
|
|
if (allOK && verbosity >= 1) fprintf ( stderr, "ok\n" );
|
|
if (!allOK) testFailsExist = True;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void license ( void )
|
|
{
|
|
fprintf ( stderr,
|
|
|
|
"bzip2, a block-sorting file compressor. "
|
|
"Version 0.1pl2, 29-Aug-97.\n"
|
|
" \n"
|
|
" Copyright (C) 1996, 1997 by Julian Seward.\n"
|
|
" \n"
|
|
" This program is free software; you can redistribute it and/or modify\n"
|
|
" it under the terms of the GNU General Public License as published by\n"
|
|
" the Free Software Foundation; either version 2 of the License, or\n"
|
|
" (at your option) any later version.\n"
|
|
" \n"
|
|
" This program is distributed in the hope that it will be useful,\n"
|
|
" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
|
|
" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
|
|
" GNU General Public License for more details.\n"
|
|
" \n"
|
|
" You should have received a copy of the GNU General Public License\n"
|
|
" along with this program; if not, write to the Free Software\n"
|
|
" Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n"
|
|
" \n"
|
|
" The GNU General Public License is contained in the file LICENSE.\n"
|
|
" \n"
|
|
);
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void usage ( Char *fullProgName )
|
|
{
|
|
fprintf (
|
|
stderr,
|
|
"bzip2, a block-sorting file compressor. "
|
|
"Version 0.1pl2, 29-Aug-97.\n"
|
|
"\n usage: %s [flags and input files in any order]\n"
|
|
"\n"
|
|
" -h --help print this message\n"
|
|
" -d --decompress force decompression\n"
|
|
" -f --compress force compression\n"
|
|
" -t --test test compressed file integrity\n"
|
|
" -c --stdout output to standard out\n"
|
|
" -v --verbose be verbose (a 2nd -v gives more)\n"
|
|
" -k --keep keep (don't delete) input files\n"
|
|
" -L --license display software version & license\n"
|
|
" -V --version display software version & license\n"
|
|
" -s --small use less memory (at most 2500k)\n"
|
|
" -1 .. -9 set block size to 100k .. 900k\n"
|
|
" --repetitive-fast compress repetitive blocks faster\n"
|
|
" --repetitive-best compress repetitive blocks better\n"
|
|
"\n"
|
|
" If invoked as `bzip2', the default action is to compress.\n"
|
|
" as `bunzip2', the default action is to decompress.\n"
|
|
"\n"
|
|
" If no file names are given, bzip2 compresses or decompresses\n"
|
|
" from standard input to standard output. You can combine\n"
|
|
" flags, so `-v -4' means the same as -v4 or -4v, &c.\n"
|
|
#if BZ_UNIX
|
|
"\n"
|
|
#endif
|
|
,
|
|
|
|
fullProgName
|
|
);
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
/*--
|
|
All the garbage from here to main() is purely to
|
|
implement a linked list of command-line arguments,
|
|
into which main() copies argv[1 .. argc-1].
|
|
|
|
The purpose of this ridiculous exercise is to
|
|
facilitate the expansion of wildcard characters
|
|
* and ? in filenames for halfwitted OSs like
|
|
MSDOS, Windows 95 and NT.
|
|
|
|
The actual Dirty Work is done by the platform-specific
|
|
macro APPEND_FILESPEC.
|
|
--*/
|
|
|
|
typedef
|
|
struct zzzz {
|
|
Char *name;
|
|
struct zzzz *link;
|
|
}
|
|
Cell;
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
void *myMalloc ( Int32 n )
|
|
{
|
|
void* p;
|
|
|
|
p = malloc ( (size_t)n );
|
|
if (p == NULL) {
|
|
fprintf (
|
|
stderr,
|
|
"%s: `malloc' failed on request for %d bytes.\n",
|
|
progName, n
|
|
);
|
|
exit ( 1 );
|
|
}
|
|
return p;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
Cell *mkCell ( void )
|
|
{
|
|
Cell *c;
|
|
|
|
c = (Cell*) myMalloc ( sizeof ( Cell ) );
|
|
c->name = NULL;
|
|
c->link = NULL;
|
|
return c;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
Cell *snocString ( Cell *root, Char *name )
|
|
{
|
|
if (root == NULL) {
|
|
Cell *tmp = mkCell();
|
|
tmp->name = (Char*) myMalloc ( 5 + strlen(name) );
|
|
strcpy ( tmp->name, name );
|
|
return tmp;
|
|
} else {
|
|
Cell *tmp = root;
|
|
while (tmp->link != NULL) tmp = tmp->link;
|
|
tmp->link = snocString ( tmp->link, name );
|
|
return root;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/*---------------------------------------------*/
|
|
#define ISFLAG(s) (strcmp(aa->name, (s))==0)
|
|
|
|
|
|
IntNative main ( IntNative argc, Char *argv[] )
|
|
{
|
|
Int32 i, j;
|
|
Char *tmp;
|
|
Cell *argList;
|
|
Cell *aa;
|
|
|
|
|
|
#if DEBUG
|
|
fprintf ( stderr, "bzip2: *** compiled with debugging ON ***\n" );
|
|
#endif
|
|
|
|
/*-- Be really really really paranoid :-) --*/
|
|
if (sizeof(Int32) != 4 || sizeof(UInt32) != 4 ||
|
|
sizeof(Int16) != 2 || sizeof(UInt16) != 2 ||
|
|
sizeof(Char) != 1 || sizeof(UChar) != 1) {
|
|
fprintf ( stderr,
|
|
"bzip2: I'm not configured correctly for this platform!\n"
|
|
"\tI require Int32, Int16 and Char to have sizes\n"
|
|
"\tof 4, 2 and 1 bytes to run properly, and they don't.\n"
|
|
"\tProbably you can fix this by defining them correctly,\n"
|
|
"\tand recompiling. Bye!\n" );
|
|
exit(1);
|
|
}
|
|
|
|
|
|
/*-- Set up signal handlers --*/
|
|
signal (SIGINT, mySignalCatcher);
|
|
signal (SIGTERM, mySignalCatcher);
|
|
signal (SIGSEGV, mySIGSEGVorSIGBUScatcher);
|
|
#if BZ_UNIX
|
|
signal (SIGHUP, mySignalCatcher);
|
|
signal (SIGBUS, mySIGSEGVorSIGBUScatcher);
|
|
#endif
|
|
|
|
|
|
/*-- Initialise --*/
|
|
outputHandleJustInCase = NULL;
|
|
ftab = NULL;
|
|
ll4 = NULL;
|
|
ll16 = NULL;
|
|
ll8 = NULL;
|
|
tt = NULL;
|
|
block = NULL;
|
|
zptr = NULL;
|
|
smallMode = False;
|
|
keepInputFiles = False;
|
|
verbosity = 0;
|
|
blockSize100k = 9;
|
|
testFailsExist = False;
|
|
bsStream = NULL;
|
|
numFileNames = 0;
|
|
numFilesProcessed = 0;
|
|
workFactor = 30;
|
|
|
|
strcpy ( inName, "(none)" );
|
|
strcpy ( outName, "(none)" );
|
|
|
|
strcpy ( progNameReally, argv[0] );
|
|
progName = &progNameReally[0];
|
|
for (tmp = &progNameReally[0]; *tmp != '\0'; tmp++)
|
|
if (*tmp == PATH_SEP) progName = tmp + 1;
|
|
|
|
|
|
/*-- Expand filename wildcards in arg list --*/
|
|
argList = NULL;
|
|
for (i = 1; i <= argc-1; i++)
|
|
APPEND_FILESPEC(argList, argv[i]);
|
|
|
|
|
|
/*-- Find the length of the longest filename --*/
|
|
longestFileName = 7;
|
|
numFileNames = 0;
|
|
for (aa = argList; aa != NULL; aa = aa->link)
|
|
if (aa->name[0] != '-') {
|
|
numFileNames++;
|
|
if (longestFileName < (Int32)strlen(aa->name) )
|
|
longestFileName = (Int32)strlen(aa->name);
|
|
}
|
|
|
|
|
|
/*-- Determine what to do (compress/uncompress/test). --*/
|
|
/*-- Note that subsequent flag handling may change this. --*/
|
|
opMode = OM_Z;
|
|
|
|
if ( (strcmp ( "bunzip2", progName ) == 0) ||
|
|
(strcmp ( "BUNZIP2", progName ) == 0) ||
|
|
(strcmp ( "bunzip2.exe", progName ) == 0) ||
|
|
(strcmp ( "BUNZIP2.EXE", progName ) == 0) )
|
|
opMode = OM_UNZ;
|
|
|
|
|
|
/*-- Determine source modes; flag handling may change this too. --*/
|
|
if (numFileNames == 0)
|
|
srcMode = SM_I2O; else srcMode = SM_F2F;
|
|
|
|
|
|
/*-- Look at the flags. --*/
|
|
for (aa = argList; aa != NULL; aa = aa->link)
|
|
if (aa->name[0] == '-' && aa->name[1] != '-')
|
|
for (j = 1; aa->name[j] != '\0'; j++)
|
|
switch (aa->name[j]) {
|
|
case 'c': srcMode = SM_F2O; break;
|
|
case 'd': opMode = OM_UNZ; break;
|
|
case 'f': opMode = OM_Z; break;
|
|
case 't': opMode = OM_TEST; break;
|
|
case 'k': keepInputFiles = True; break;
|
|
case 's': smallMode = True; break;
|
|
case '1': blockSize100k = 1; break;
|
|
case '2': blockSize100k = 2; break;
|
|
case '3': blockSize100k = 3; break;
|
|
case '4': blockSize100k = 4; break;
|
|
case '5': blockSize100k = 5; break;
|
|
case '6': blockSize100k = 6; break;
|
|
case '7': blockSize100k = 7; break;
|
|
case '8': blockSize100k = 8; break;
|
|
case '9': blockSize100k = 9; break;
|
|
case 'V':
|
|
case 'L': license(); break;
|
|
case 'v': verbosity++; break;
|
|
case 'h': usage ( progName );
|
|
exit ( 1 );
|
|
break;
|
|
default: fprintf ( stderr, "%s: Bad flag `%s'\n",
|
|
progName, aa->name );
|
|
usage ( progName );
|
|
exit ( 1 );
|
|
break;
|
|
}
|
|
|
|
/*-- And again ... --*/
|
|
for (aa = argList; aa != NULL; aa = aa->link) {
|
|
if (ISFLAG("--stdout")) srcMode = SM_F2O; else
|
|
if (ISFLAG("--decompress")) opMode = OM_UNZ; else
|
|
if (ISFLAG("--compress")) opMode = OM_Z; else
|
|
if (ISFLAG("--test")) opMode = OM_TEST; else
|
|
if (ISFLAG("--keep")) keepInputFiles = True; else
|
|
if (ISFLAG("--small")) smallMode = True; else
|
|
if (ISFLAG("--version")) license(); else
|
|
if (ISFLAG("--license")) license(); else
|
|
if (ISFLAG("--repetitive-fast")) workFactor = 5; else
|
|
if (ISFLAG("--repetitive-best")) workFactor = 150; else
|
|
if (ISFLAG("--verbose")) verbosity++; else
|
|
if (ISFLAG("--help")) { usage ( progName ); exit ( 1 ); }
|
|
else
|
|
if (strncmp ( aa->name, "--", 2) == 0) {
|
|
fprintf ( stderr, "%s: Bad flag `%s'\n", progName, aa->name );
|
|
usage ( progName );
|
|
exit ( 1 );
|
|
}
|
|
}
|
|
|
|
if (opMode == OM_Z && smallMode) blockSize100k = 2;
|
|
|
|
if (opMode == OM_Z && srcMode == SM_F2O && numFileNames > 1) {
|
|
fprintf ( stderr, "%s: I won't compress multiple files to stdout.\n",
|
|
progName );
|
|
exit ( 1 );
|
|
}
|
|
|
|
if (srcMode == SM_F2O && numFileNames == 0) {
|
|
fprintf ( stderr, "%s: -c expects at least one filename.\n",
|
|
progName );
|
|
exit ( 1 );
|
|
}
|
|
|
|
if (opMode == OM_TEST && srcMode == SM_F2O) {
|
|
fprintf ( stderr, "%s: -c and -t cannot be used together.\n",
|
|
progName );
|
|
exit ( 1 );
|
|
}
|
|
|
|
if (opMode != OM_Z) blockSize100k = 0;
|
|
|
|
if (opMode == OM_Z) {
|
|
allocateCompressStructures();
|
|
if (srcMode == SM_I2O)
|
|
compress ( NULL );
|
|
else
|
|
for (aa = argList; aa != NULL; aa = aa->link)
|
|
if (aa->name[0] != '-') {
|
|
numFilesProcessed++;
|
|
compress ( aa->name );
|
|
}
|
|
} else
|
|
if (opMode == OM_UNZ) {
|
|
if (srcMode == SM_I2O)
|
|
uncompress ( NULL );
|
|
else
|
|
for (aa = argList; aa != NULL; aa = aa->link)
|
|
if (aa->name[0] != '-') {
|
|
numFilesProcessed++;
|
|
uncompress ( aa->name );
|
|
}
|
|
} else {
|
|
testFailsExist = False;
|
|
if (srcMode == SM_I2O)
|
|
testf ( NULL );
|
|
else
|
|
for (aa = argList; aa != NULL; aa = aa->link)
|
|
if (aa->name[0] != '-') {
|
|
numFilesProcessed++;
|
|
testf ( aa->name );
|
|
}
|
|
if (testFailsExist) {
|
|
fprintf ( stderr,
|
|
"\n"
|
|
"You can use the `bzip2recover' program to *attempt* to recover\n"
|
|
"data from undamaged sections of corrupted files.\n\n"
|
|
);
|
|
exit(2);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*-----------------------------------------------------------*/
|
|
/*--- end bzip2.c ---*/
|
|
/*-----------------------------------------------------------*/
|