mirror of
https://github.com/python/cpython.git
synced 2025-01-20 23:45:32 +08:00
1112 lines
34 KiB
C
1112 lines
34 KiB
C
/* _lzma - Low-level Python interface to liblzma.
|
|
|
|
Initial implementation by Per Øyvind Karlsen.
|
|
Rewritten by Nadeem Vawda.
|
|
|
|
*/
|
|
|
|
#define PY_SSIZE_T_CLEAN
|
|
|
|
#include "Python.h"
|
|
#include "structmember.h"
|
|
#ifdef WITH_THREAD
|
|
#include "pythread.h"
|
|
#endif
|
|
|
|
#include <stdarg.h>
|
|
#include <string.h>
|
|
|
|
#include <lzma.h>
|
|
|
|
|
|
#ifndef PY_LONG_LONG
|
|
#error "This module requires PY_LONG_LONG to be defined"
|
|
#endif
|
|
|
|
|
|
#ifdef WITH_THREAD
|
|
#define ACQUIRE_LOCK(obj) do { \
|
|
if (!PyThread_acquire_lock((obj)->lock, 0)) { \
|
|
Py_BEGIN_ALLOW_THREADS \
|
|
PyThread_acquire_lock((obj)->lock, 1); \
|
|
Py_END_ALLOW_THREADS \
|
|
} } while (0)
|
|
#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
|
|
#else
|
|
#define ACQUIRE_LOCK(obj)
|
|
#define RELEASE_LOCK(obj)
|
|
#endif
|
|
|
|
|
|
/* Container formats: */
|
|
enum {
|
|
FORMAT_AUTO,
|
|
FORMAT_XZ,
|
|
FORMAT_ALONE,
|
|
FORMAT_RAW,
|
|
};
|
|
|
|
#define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
|
|
|
|
|
|
typedef struct {
|
|
PyObject_HEAD
|
|
lzma_stream lzs;
|
|
int flushed;
|
|
#ifdef WITH_THREAD
|
|
PyThread_type_lock lock;
|
|
#endif
|
|
} Compressor;
|
|
|
|
typedef struct {
|
|
PyObject_HEAD
|
|
lzma_stream lzs;
|
|
int check;
|
|
char eof;
|
|
PyObject *unused_data;
|
|
#ifdef WITH_THREAD
|
|
PyThread_type_lock lock;
|
|
#endif
|
|
} Decompressor;
|
|
|
|
/* LZMAError class object. */
|
|
static PyObject *Error;
|
|
|
|
/* An empty tuple, used by the filter specifier parsing code. */
|
|
static PyObject *empty_tuple;
|
|
|
|
|
|
/* Helper functions. */
|
|
|
|
static int
|
|
catch_lzma_error(lzma_ret lzret)
|
|
{
|
|
switch (lzret) {
|
|
case LZMA_OK:
|
|
case LZMA_GET_CHECK:
|
|
case LZMA_NO_CHECK:
|
|
case LZMA_STREAM_END:
|
|
return 0;
|
|
case LZMA_UNSUPPORTED_CHECK:
|
|
PyErr_SetString(Error, "Unsupported integrity check");
|
|
return 1;
|
|
case LZMA_MEM_ERROR:
|
|
PyErr_NoMemory();
|
|
return 1;
|
|
case LZMA_MEMLIMIT_ERROR:
|
|
PyErr_SetString(Error, "Memory usage limit exceeded");
|
|
return 1;
|
|
case LZMA_FORMAT_ERROR:
|
|
PyErr_SetString(Error, "Input format not supported by decoder");
|
|
return 1;
|
|
case LZMA_OPTIONS_ERROR:
|
|
PyErr_SetString(Error, "Invalid or unsupported options");
|
|
return 1;
|
|
case LZMA_DATA_ERROR:
|
|
PyErr_SetString(Error, "Corrupt input data");
|
|
return 1;
|
|
case LZMA_BUF_ERROR:
|
|
PyErr_SetString(Error, "Insufficient buffer space");
|
|
return 1;
|
|
case LZMA_PROG_ERROR:
|
|
PyErr_SetString(Error, "Internal error");
|
|
return 1;
|
|
default:
|
|
PyErr_Format(Error, "Unrecognized error from liblzma: %d", lzret);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
#if BUFSIZ < 8192
|
|
#define INITIAL_BUFFER_SIZE 8192
|
|
#else
|
|
#define INITIAL_BUFFER_SIZE BUFSIZ
|
|
#endif
|
|
|
|
static int
|
|
grow_buffer(PyObject **buf)
|
|
{
|
|
size_t size = PyBytes_GET_SIZE(*buf);
|
|
return _PyBytes_Resize(buf, size + (size >> 3) + 6);
|
|
}
|
|
|
|
|
|
/* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
|
|
since the predefined conversion specifiers do not suit our needs:
|
|
|
|
uint32_t - the "I" (unsigned int) specifier is the right size, but
|
|
silently ignores overflows on conversion.
|
|
|
|
lzma_mode and lzma_match_finder - these are enumeration types, and
|
|
so the size of each is implementation-defined. Worse, different
|
|
enum types can be of different sizes within the same program, so
|
|
to be strictly correct, we need to define two separate converters.
|
|
*/
|
|
|
|
#define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
|
|
static int \
|
|
FUNCNAME(PyObject *obj, void *ptr) \
|
|
{ \
|
|
unsigned long val; \
|
|
\
|
|
val = PyLong_AsUnsignedLong(obj); \
|
|
if (PyErr_Occurred()) \
|
|
return 0; \
|
|
if ((unsigned long)(TYPE)val != val) { \
|
|
PyErr_SetString(PyExc_OverflowError, \
|
|
"Value too large for " #TYPE " type"); \
|
|
return 0; \
|
|
} \
|
|
*(TYPE *)ptr = val; \
|
|
return 1; \
|
|
}
|
|
|
|
INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
|
|
INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
|
|
INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
|
|
|
|
#undef INT_TYPE_CONVERTER_FUNC
|
|
|
|
|
|
/* Filter specifier parsing functions. */
|
|
|
|
static void *
|
|
parse_filter_spec_lzma(PyObject *spec)
|
|
{
|
|
static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
|
|
"pb", "mode", "nice_len", "mf", "depth", NULL};
|
|
PyObject *id;
|
|
PyObject *preset_obj;
|
|
uint32_t preset = LZMA_PRESET_DEFAULT;
|
|
lzma_options_lzma *options;
|
|
|
|
/* First, fill in default values for all the options using a preset.
|
|
Then, override the defaults with any values given by the caller. */
|
|
|
|
preset_obj = PyMapping_GetItemString(spec, "preset");
|
|
if (preset_obj == NULL) {
|
|
if (PyErr_ExceptionMatches(PyExc_KeyError))
|
|
PyErr_Clear();
|
|
else
|
|
return NULL;
|
|
} else {
|
|
int ok = uint32_converter(preset_obj, &preset);
|
|
Py_DECREF(preset_obj);
|
|
if (!ok)
|
|
return NULL;
|
|
}
|
|
|
|
options = (lzma_options_lzma *)PyMem_Malloc(sizeof *options);
|
|
if (options == NULL)
|
|
return PyErr_NoMemory();
|
|
memset(options, 0, sizeof *options);
|
|
|
|
if (lzma_lzma_preset(options, preset)) {
|
|
PyMem_Free(options);
|
|
PyErr_Format(Error, "lzma_lzma_preset() failed for preset %#x", preset);
|
|
return NULL;
|
|
}
|
|
|
|
if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec,
|
|
"|OOO&O&O&O&O&O&O&O&", optnames,
|
|
&id, &preset_obj,
|
|
uint32_converter, &options->dict_size,
|
|
uint32_converter, &options->lc,
|
|
uint32_converter, &options->lp,
|
|
uint32_converter, &options->pb,
|
|
lzma_mode_converter, &options->mode,
|
|
uint32_converter, &options->nice_len,
|
|
lzma_mf_converter, &options->mf,
|
|
uint32_converter, &options->depth)) {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"Invalid filter specifier for LZMA filter");
|
|
PyMem_Free(options);
|
|
options = NULL;
|
|
}
|
|
return options;
|
|
}
|
|
|
|
static void *
|
|
parse_filter_spec_delta(PyObject *spec)
|
|
{
|
|
static char *optnames[] = {"id", "dist", NULL};
|
|
PyObject *id;
|
|
uint32_t dist = 1;
|
|
lzma_options_delta *options;
|
|
|
|
if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
|
|
&id, uint32_converter, &dist)) {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"Invalid filter specifier for delta filter");
|
|
return NULL;
|
|
}
|
|
|
|
options = (lzma_options_delta *)PyMem_Malloc(sizeof *options);
|
|
if (options == NULL)
|
|
return PyErr_NoMemory();
|
|
memset(options, 0, sizeof *options);
|
|
options->type = LZMA_DELTA_TYPE_BYTE;
|
|
options->dist = dist;
|
|
return options;
|
|
}
|
|
|
|
static void *
|
|
parse_filter_spec_bcj(PyObject *spec)
|
|
{
|
|
static char *optnames[] = {"id", "start_offset", NULL};
|
|
PyObject *id;
|
|
uint32_t start_offset = 0;
|
|
lzma_options_bcj *options;
|
|
|
|
if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
|
|
&id, uint32_converter, &start_offset)) {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"Invalid filter specifier for BCJ filter");
|
|
return NULL;
|
|
}
|
|
|
|
options = (lzma_options_bcj *)PyMem_Malloc(sizeof *options);
|
|
if (options == NULL)
|
|
return PyErr_NoMemory();
|
|
memset(options, 0, sizeof *options);
|
|
options->start_offset = start_offset;
|
|
return options;
|
|
}
|
|
|
|
static void *
|
|
parse_filter_spec(lzma_filter *f, PyObject *spec)
|
|
{
|
|
PyObject *id_obj;
|
|
|
|
if (!PyMapping_Check(spec)) {
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"Filter specifier must be a dict or dict-like object");
|
|
return NULL;
|
|
}
|
|
id_obj = PyMapping_GetItemString(spec, "id");
|
|
if (id_obj == NULL) {
|
|
if (PyErr_ExceptionMatches(PyExc_KeyError))
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"Filter specifier must have an \"id\" entry");
|
|
return NULL;
|
|
}
|
|
f->id = PyLong_AsUnsignedLongLong(id_obj);
|
|
Py_DECREF(id_obj);
|
|
if (PyErr_Occurred())
|
|
return NULL;
|
|
|
|
switch (f->id) {
|
|
case LZMA_FILTER_LZMA1:
|
|
case LZMA_FILTER_LZMA2:
|
|
f->options = parse_filter_spec_lzma(spec);
|
|
return f->options;
|
|
case LZMA_FILTER_DELTA:
|
|
f->options = parse_filter_spec_delta(spec);
|
|
return f->options;
|
|
case LZMA_FILTER_X86:
|
|
case LZMA_FILTER_POWERPC:
|
|
case LZMA_FILTER_IA64:
|
|
case LZMA_FILTER_ARM:
|
|
case LZMA_FILTER_ARMTHUMB:
|
|
case LZMA_FILTER_SPARC:
|
|
f->options = parse_filter_spec_bcj(spec);
|
|
return f->options;
|
|
default:
|
|
PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
static void
|
|
free_filter_chain(lzma_filter filters[])
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++)
|
|
PyMem_Free(filters[i].options);
|
|
}
|
|
|
|
static int
|
|
parse_filter_chain_spec(lzma_filter filters[], PyObject *filterspecs)
|
|
{
|
|
Py_ssize_t i, num_filters;
|
|
|
|
num_filters = PySequence_Length(filterspecs);
|
|
if (num_filters == -1)
|
|
return -1;
|
|
if (num_filters > LZMA_FILTERS_MAX) {
|
|
PyErr_Format(PyExc_ValueError,
|
|
"Too many filters - liblzma supports a maximum of %d",
|
|
LZMA_FILTERS_MAX);
|
|
return -1;
|
|
}
|
|
|
|
for (i = 0; i < num_filters; i++) {
|
|
int ok = 1;
|
|
PyObject *spec = PySequence_GetItem(filterspecs, i);
|
|
if (spec == NULL || parse_filter_spec(&filters[i], spec) == NULL)
|
|
ok = 0;
|
|
Py_XDECREF(spec);
|
|
if (!ok) {
|
|
filters[i].id = LZMA_VLI_UNKNOWN;
|
|
free_filter_chain(filters);
|
|
return -1;
|
|
}
|
|
}
|
|
filters[num_filters].id = LZMA_VLI_UNKNOWN;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* LZMACompressor class. */
|
|
|
|
static PyObject *
|
|
compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
|
|
{
|
|
size_t data_size = 0;
|
|
PyObject *result;
|
|
|
|
result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
|
|
if (result == NULL)
|
|
return NULL;
|
|
c->lzs.next_in = data;
|
|
c->lzs.avail_in = len;
|
|
c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
|
|
c->lzs.avail_out = PyBytes_GET_SIZE(result);
|
|
for (;;) {
|
|
lzma_ret lzret;
|
|
|
|
Py_BEGIN_ALLOW_THREADS
|
|
lzret = lzma_code(&c->lzs, action);
|
|
data_size = (char *)c->lzs.next_out - PyBytes_AS_STRING(result);
|
|
Py_END_ALLOW_THREADS
|
|
if (catch_lzma_error(lzret))
|
|
goto error;
|
|
if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
|
|
(action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
|
|
break;
|
|
} else if (c->lzs.avail_out == 0) {
|
|
if (grow_buffer(&result) == -1)
|
|
goto error;
|
|
c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
|
|
c->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
|
|
}
|
|
}
|
|
if (data_size != PyBytes_GET_SIZE(result))
|
|
if (_PyBytes_Resize(&result, data_size) == -1)
|
|
goto error;
|
|
return result;
|
|
|
|
error:
|
|
Py_XDECREF(result);
|
|
return NULL;
|
|
}
|
|
|
|
PyDoc_STRVAR(Compressor_compress_doc,
|
|
"compress(data) -> bytes\n"
|
|
"\n"
|
|
"Provide data to the compressor object. Returns a chunk of\n"
|
|
"compressed data if possible, or b\"\" otherwise.\n"
|
|
"\n"
|
|
"When you have finished providing data to the compressor, call the\n"
|
|
"flush() method to finish the conversion process.\n");
|
|
|
|
static PyObject *
|
|
Compressor_compress(Compressor *self, PyObject *args)
|
|
{
|
|
Py_buffer buffer;
|
|
PyObject *result = NULL;
|
|
|
|
if (!PyArg_ParseTuple(args, "y*:compress", &buffer))
|
|
return NULL;
|
|
|
|
ACQUIRE_LOCK(self);
|
|
if (self->flushed)
|
|
PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
|
|
else
|
|
result = compress(self, buffer.buf, buffer.len, LZMA_RUN);
|
|
RELEASE_LOCK(self);
|
|
PyBuffer_Release(&buffer);
|
|
return result;
|
|
}
|
|
|
|
PyDoc_STRVAR(Compressor_flush_doc,
|
|
"flush() -> bytes\n"
|
|
"\n"
|
|
"Finish the compression process. Returns the compressed data left\n"
|
|
"in internal buffers.\n"
|
|
"\n"
|
|
"The compressor object cannot be used after this method is called.\n");
|
|
|
|
static PyObject *
|
|
Compressor_flush(Compressor *self, PyObject *noargs)
|
|
{
|
|
PyObject *result = NULL;
|
|
|
|
ACQUIRE_LOCK(self);
|
|
if (self->flushed) {
|
|
PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
|
|
} else {
|
|
self->flushed = 1;
|
|
result = compress(self, NULL, 0, LZMA_FINISH);
|
|
}
|
|
RELEASE_LOCK(self);
|
|
return result;
|
|
}
|
|
|
|
static int
|
|
Compressor_init_xz(lzma_stream *lzs, int check, uint32_t preset,
|
|
PyObject *filterspecs)
|
|
{
|
|
lzma_ret lzret;
|
|
|
|
if (filterspecs == Py_None) {
|
|
lzret = lzma_easy_encoder(lzs, preset, check);
|
|
} else {
|
|
lzma_filter filters[LZMA_FILTERS_MAX + 1];
|
|
|
|
if (parse_filter_chain_spec(filters, filterspecs) == -1)
|
|
return -1;
|
|
lzret = lzma_stream_encoder(lzs, filters, check);
|
|
free_filter_chain(filters);
|
|
}
|
|
if (catch_lzma_error(lzret))
|
|
return -1;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
Compressor_init_alone(lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
|
|
{
|
|
lzma_ret lzret;
|
|
|
|
if (filterspecs == Py_None) {
|
|
lzma_options_lzma options;
|
|
|
|
if (lzma_lzma_preset(&options, preset)) {
|
|
PyErr_Format(Error, "Invalid compression preset: %#x", preset);
|
|
return -1;
|
|
}
|
|
lzret = lzma_alone_encoder(lzs, &options);
|
|
} else {
|
|
lzma_filter filters[LZMA_FILTERS_MAX + 1];
|
|
|
|
if (parse_filter_chain_spec(filters, filterspecs) == -1)
|
|
return -1;
|
|
if (filters[0].id == LZMA_FILTER_LZMA1 &&
|
|
filters[1].id == LZMA_VLI_UNKNOWN) {
|
|
lzret = lzma_alone_encoder(lzs, filters[0].options);
|
|
} else {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"Invalid filter chain for FORMAT_ALONE - "
|
|
"must be a single LZMA1 filter");
|
|
lzret = LZMA_PROG_ERROR;
|
|
}
|
|
free_filter_chain(filters);
|
|
}
|
|
if (PyErr_Occurred() || catch_lzma_error(lzret))
|
|
return -1;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
Compressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
|
|
{
|
|
lzma_filter filters[LZMA_FILTERS_MAX + 1];
|
|
lzma_ret lzret;
|
|
|
|
if (filterspecs == Py_None) {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"Must specify filters for FORMAT_RAW");
|
|
return -1;
|
|
}
|
|
if (parse_filter_chain_spec(filters, filterspecs) == -1)
|
|
return -1;
|
|
lzret = lzma_raw_encoder(lzs, filters);
|
|
free_filter_chain(filters);
|
|
if (catch_lzma_error(lzret))
|
|
return -1;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)
|
|
{
|
|
static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
|
|
int format = FORMAT_XZ;
|
|
int check = -1;
|
|
uint32_t preset = LZMA_PRESET_DEFAULT;
|
|
PyObject *preset_obj = Py_None;
|
|
PyObject *filterspecs = Py_None;
|
|
|
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs,
|
|
"|iiOO:LZMACompressor", arg_names,
|
|
&format, &check, &preset_obj,
|
|
&filterspecs))
|
|
return -1;
|
|
|
|
if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"Integrity checks are only supported by FORMAT_XZ");
|
|
return -1;
|
|
}
|
|
|
|
if (preset_obj != Py_None && filterspecs != Py_None) {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"Cannot specify both preset and filter chain");
|
|
return -1;
|
|
}
|
|
|
|
if (preset_obj != Py_None)
|
|
if (!uint32_converter(preset_obj, &preset))
|
|
return -1;
|
|
|
|
#ifdef WITH_THREAD
|
|
self->lock = PyThread_allocate_lock();
|
|
if (self->lock == NULL) {
|
|
PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
|
|
return -1;
|
|
}
|
|
#endif
|
|
|
|
self->flushed = 0;
|
|
switch (format) {
|
|
case FORMAT_XZ:
|
|
if (check == -1)
|
|
check = LZMA_CHECK_CRC64;
|
|
if (Compressor_init_xz(&self->lzs, check, preset, filterspecs) != 0)
|
|
break;
|
|
return 0;
|
|
|
|
case FORMAT_ALONE:
|
|
if (Compressor_init_alone(&self->lzs, preset, filterspecs) != 0)
|
|
break;
|
|
return 0;
|
|
|
|
case FORMAT_RAW:
|
|
if (Compressor_init_raw(&self->lzs, filterspecs) != 0)
|
|
break;
|
|
return 0;
|
|
|
|
default:
|
|
PyErr_Format(PyExc_ValueError,
|
|
"Invalid container format: %d", format);
|
|
break;
|
|
}
|
|
|
|
#ifdef WITH_THREAD
|
|
PyThread_free_lock(self->lock);
|
|
self->lock = NULL;
|
|
#endif
|
|
return -1;
|
|
}
|
|
|
|
static void
|
|
Compressor_dealloc(Compressor *self)
|
|
{
|
|
lzma_end(&self->lzs);
|
|
#ifdef WITH_THREAD
|
|
if (self->lock != NULL)
|
|
PyThread_free_lock(self->lock);
|
|
#endif
|
|
Py_TYPE(self)->tp_free((PyObject *)self);
|
|
}
|
|
|
|
static PyMethodDef Compressor_methods[] = {
|
|
{"compress", (PyCFunction)Compressor_compress, METH_VARARGS,
|
|
Compressor_compress_doc},
|
|
{"flush", (PyCFunction)Compressor_flush, METH_NOARGS,
|
|
Compressor_flush_doc},
|
|
{NULL}
|
|
};
|
|
|
|
PyDoc_STRVAR(Compressor_doc,
|
|
"LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
|
|
"\n"
|
|
"Create a compressor object for compressing data incrementally.\n"
|
|
"\n"
|
|
"format specifies the container format to use for the output. This can\n"
|
|
"be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
|
|
"\n"
|
|
"check specifies the integrity check to use. For FORMAT_XZ, the default\n"
|
|
"is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not suport integrity\n"
|
|
"checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
|
|
"\n"
|
|
"The settings used by the compressor can be specified either as a\n"
|
|
"preset compression level (with the 'preset' argument), or in detail\n"
|
|
"as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
|
|
"and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
|
|
"level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
|
|
"the raw compressor does not support preset compression levels.\n"
|
|
"\n"
|
|
"preset (if provided) should be an integer in the range 0-9, optionally\n"
|
|
"OR-ed with the constant PRESET_EXTREME.\n"
|
|
"\n"
|
|
"filters (if provided) should be a sequence of dicts. Each dict should\n"
|
|
"have an entry for \"id\" indicating the ID of the filter, plus\n"
|
|
"additional entries for options to the filter.\n"
|
|
"\n"
|
|
"For one-shot compression, use the compress() function instead.\n");
|
|
|
|
static PyTypeObject Compressor_type = {
|
|
PyVarObject_HEAD_INIT(NULL, 0)
|
|
"_lzma.LZMACompressor", /* tp_name */
|
|
sizeof(Compressor), /* tp_basicsize */
|
|
0, /* tp_itemsize */
|
|
(destructor)Compressor_dealloc, /* tp_dealloc */
|
|
0, /* tp_print */
|
|
0, /* tp_getattr */
|
|
0, /* tp_setattr */
|
|
0, /* tp_reserved */
|
|
0, /* tp_repr */
|
|
0, /* tp_as_number */
|
|
0, /* tp_as_sequence */
|
|
0, /* tp_as_mapping */
|
|
0, /* tp_hash */
|
|
0, /* tp_call */
|
|
0, /* tp_str */
|
|
0, /* tp_getattro */
|
|
0, /* tp_setattro */
|
|
0, /* tp_as_buffer */
|
|
Py_TPFLAGS_DEFAULT, /* tp_flags */
|
|
Compressor_doc, /* tp_doc */
|
|
0, /* tp_traverse */
|
|
0, /* tp_clear */
|
|
0, /* tp_richcompare */
|
|
0, /* tp_weaklistoffset */
|
|
0, /* tp_iter */
|
|
0, /* tp_iternext */
|
|
Compressor_methods, /* tp_methods */
|
|
0, /* tp_members */
|
|
0, /* tp_getset */
|
|
0, /* tp_base */
|
|
0, /* tp_dict */
|
|
0, /* tp_descr_get */
|
|
0, /* tp_descr_set */
|
|
0, /* tp_dictoffset */
|
|
(initproc)Compressor_init, /* tp_init */
|
|
0, /* tp_alloc */
|
|
PyType_GenericNew, /* tp_new */
|
|
};
|
|
|
|
|
|
/* LZMADecompressor class. */
|
|
|
|
static PyObject *
|
|
decompress(Decompressor *d, uint8_t *data, size_t len)
|
|
{
|
|
size_t data_size = 0;
|
|
PyObject *result;
|
|
|
|
result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
|
|
if (result == NULL)
|
|
return NULL;
|
|
d->lzs.next_in = data;
|
|
d->lzs.avail_in = len;
|
|
d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
|
|
d->lzs.avail_out = PyBytes_GET_SIZE(result);
|
|
for (;;) {
|
|
lzma_ret lzret;
|
|
|
|
Py_BEGIN_ALLOW_THREADS
|
|
lzret = lzma_code(&d->lzs, LZMA_RUN);
|
|
data_size = (char *)d->lzs.next_out - PyBytes_AS_STRING(result);
|
|
Py_END_ALLOW_THREADS
|
|
if (catch_lzma_error(lzret))
|
|
goto error;
|
|
if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK)
|
|
d->check = lzma_get_check(&d->lzs);
|
|
if (lzret == LZMA_STREAM_END) {
|
|
d->eof = 1;
|
|
if (d->lzs.avail_in > 0) {
|
|
Py_CLEAR(d->unused_data);
|
|
d->unused_data = PyBytes_FromStringAndSize(
|
|
(char *)d->lzs.next_in, d->lzs.avail_in);
|
|
if (d->unused_data == NULL)
|
|
goto error;
|
|
}
|
|
break;
|
|
} else if (d->lzs.avail_in == 0) {
|
|
break;
|
|
} else if (d->lzs.avail_out == 0) {
|
|
if (grow_buffer(&result) == -1)
|
|
goto error;
|
|
d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
|
|
d->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
|
|
}
|
|
}
|
|
if (data_size != PyBytes_GET_SIZE(result))
|
|
if (_PyBytes_Resize(&result, data_size) == -1)
|
|
goto error;
|
|
return result;
|
|
|
|
error:
|
|
Py_XDECREF(result);
|
|
return NULL;
|
|
}
|
|
|
|
PyDoc_STRVAR(Decompressor_decompress_doc,
|
|
"decompress(data) -> bytes\n"
|
|
"\n"
|
|
"Provide data to the decompressor object. Returns a chunk of\n"
|
|
"decompressed data if possible, or b\"\" otherwise.\n"
|
|
"\n"
|
|
"Attempting to decompress data after the end of the stream is\n"
|
|
"reached raises an EOFError. Any data found after the end of the\n"
|
|
"stream is ignored, and saved in the unused_data attribute.\n");
|
|
|
|
static PyObject *
|
|
Decompressor_decompress(Decompressor *self, PyObject *args)
|
|
{
|
|
Py_buffer buffer;
|
|
PyObject *result = NULL;
|
|
|
|
if (!PyArg_ParseTuple(args, "y*:decompress", &buffer))
|
|
return NULL;
|
|
|
|
ACQUIRE_LOCK(self);
|
|
if (self->eof)
|
|
PyErr_SetString(PyExc_EOFError, "Already at end of stream");
|
|
else
|
|
result = decompress(self, buffer.buf, buffer.len);
|
|
RELEASE_LOCK(self);
|
|
PyBuffer_Release(&buffer);
|
|
return result;
|
|
}
|
|
|
|
static int
|
|
Decompressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
|
|
{
|
|
lzma_filter filters[LZMA_FILTERS_MAX + 1];
|
|
lzma_ret lzret;
|
|
|
|
if (parse_filter_chain_spec(filters, filterspecs) == -1)
|
|
return -1;
|
|
lzret = lzma_raw_decoder(lzs, filters);
|
|
free_filter_chain(filters);
|
|
if (catch_lzma_error(lzret))
|
|
return -1;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
Decompressor_init(Decompressor *self, PyObject *args, PyObject *kwargs)
|
|
{
|
|
static char *arg_names[] = {"format", "memlimit", "filters", NULL};
|
|
const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
|
|
int format = FORMAT_AUTO;
|
|
uint64_t memlimit = UINT64_MAX;
|
|
PyObject *memlimit_obj = Py_None;
|
|
PyObject *filterspecs = Py_None;
|
|
lzma_ret lzret;
|
|
|
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs,
|
|
"|iOO:LZMADecompressor", arg_names,
|
|
&format, &memlimit_obj, &filterspecs))
|
|
return -1;
|
|
|
|
if (memlimit_obj != Py_None) {
|
|
if (format == FORMAT_RAW) {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"Cannot specify memory limit with FORMAT_RAW");
|
|
return -1;
|
|
}
|
|
memlimit = PyLong_AsUnsignedLongLong(memlimit_obj);
|
|
if (PyErr_Occurred())
|
|
return -1;
|
|
}
|
|
|
|
if (format == FORMAT_RAW && filterspecs == Py_None) {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"Must specify filters for FORMAT_RAW");
|
|
return -1;
|
|
} else if (format != FORMAT_RAW && filterspecs != Py_None) {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"Cannot specify filters except with FORMAT_RAW");
|
|
return -1;
|
|
}
|
|
|
|
#ifdef WITH_THREAD
|
|
self->lock = PyThread_allocate_lock();
|
|
if (self->lock == NULL) {
|
|
PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
|
|
return -1;
|
|
}
|
|
#endif
|
|
|
|
self->check = LZMA_CHECK_UNKNOWN;
|
|
self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
|
|
if (self->unused_data == NULL)
|
|
goto error;
|
|
|
|
switch (format) {
|
|
case FORMAT_AUTO:
|
|
lzret = lzma_auto_decoder(&self->lzs, memlimit, decoder_flags);
|
|
if (catch_lzma_error(lzret))
|
|
break;
|
|
return 0;
|
|
|
|
case FORMAT_XZ:
|
|
lzret = lzma_stream_decoder(&self->lzs, memlimit, decoder_flags);
|
|
if (catch_lzma_error(lzret))
|
|
break;
|
|
return 0;
|
|
|
|
case FORMAT_ALONE:
|
|
self->check = LZMA_CHECK_NONE;
|
|
lzret = lzma_alone_decoder(&self->lzs, memlimit);
|
|
if (catch_lzma_error(lzret))
|
|
break;
|
|
return 0;
|
|
|
|
case FORMAT_RAW:
|
|
self->check = LZMA_CHECK_NONE;
|
|
if (Decompressor_init_raw(&self->lzs, filterspecs) == -1)
|
|
break;
|
|
return 0;
|
|
|
|
default:
|
|
PyErr_Format(PyExc_ValueError,
|
|
"Invalid container format: %d", format);
|
|
break;
|
|
}
|
|
|
|
error:
|
|
Py_CLEAR(self->unused_data);
|
|
#ifdef WITH_THREAD
|
|
PyThread_free_lock(self->lock);
|
|
self->lock = NULL;
|
|
#endif
|
|
return -1;
|
|
}
|
|
|
|
static void
|
|
Decompressor_dealloc(Decompressor *self)
|
|
{
|
|
lzma_end(&self->lzs);
|
|
Py_CLEAR(self->unused_data);
|
|
#ifdef WITH_THREAD
|
|
if (self->lock != NULL)
|
|
PyThread_free_lock(self->lock);
|
|
#endif
|
|
Py_TYPE(self)->tp_free((PyObject *)self);
|
|
}
|
|
|
|
static PyMethodDef Decompressor_methods[] = {
|
|
{"decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS,
|
|
Decompressor_decompress_doc},
|
|
{NULL}
|
|
};
|
|
|
|
PyDoc_STRVAR(Decompressor_check_doc,
|
|
"ID of the integrity check used by the input stream.");
|
|
|
|
PyDoc_STRVAR(Decompressor_eof_doc,
|
|
"True if the end-of-stream marker has been reached.");
|
|
|
|
PyDoc_STRVAR(Decompressor_unused_data_doc,
|
|
"Data found after the end of the compressed stream.");
|
|
|
|
static PyMemberDef Decompressor_members[] = {
|
|
{"check", T_INT, offsetof(Decompressor, check), READONLY,
|
|
Decompressor_check_doc},
|
|
{"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
|
|
Decompressor_eof_doc},
|
|
{"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
|
|
Decompressor_unused_data_doc},
|
|
{NULL}
|
|
};
|
|
|
|
PyDoc_STRVAR(Decompressor_doc,
|
|
"LZMADecompressor(format=FORMAT_AUTO, memlimit=None, filters=None)\n"
|
|
"\n"
|
|
"Create a decompressor object for decompressing data incrementally.\n"
|
|
"\n"
|
|
"format specifies the container format of the input stream. If this is\n"
|
|
"FORMAT_AUTO (the default), the decompressor will automatically detect\n"
|
|
"whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with\n"
|
|
"FORMAT_RAW cannot be autodetected.\n"
|
|
"\n"
|
|
"memlimit can be specified to limit the amount of memory used by the\n"
|
|
"decompressor. This will cause decompression to fail if the input\n"
|
|
"cannot be decompressed within the given limit.\n"
|
|
"\n"
|
|
"filters specifies a custom filter chain. This argument is required for\n"
|
|
"FORMAT_RAW, and not accepted with any other format. When provided,\n"
|
|
"this should be a sequence of dicts, each indicating the ID and options\n"
|
|
"for a single filter.\n"
|
|
"\n"
|
|
"For one-shot decompression, use the decompress() function instead.\n");
|
|
|
|
static PyTypeObject Decompressor_type = {
|
|
PyVarObject_HEAD_INIT(NULL, 0)
|
|
"_lzma.LZMADecompressor", /* tp_name */
|
|
sizeof(Decompressor), /* tp_basicsize */
|
|
0, /* tp_itemsize */
|
|
(destructor)Decompressor_dealloc, /* tp_dealloc */
|
|
0, /* tp_print */
|
|
0, /* tp_getattr */
|
|
0, /* tp_setattr */
|
|
0, /* tp_reserved */
|
|
0, /* tp_repr */
|
|
0, /* tp_as_number */
|
|
0, /* tp_as_sequence */
|
|
0, /* tp_as_mapping */
|
|
0, /* tp_hash */
|
|
0, /* tp_call */
|
|
0, /* tp_str */
|
|
0, /* tp_getattro */
|
|
0, /* tp_setattro */
|
|
0, /* tp_as_buffer */
|
|
Py_TPFLAGS_DEFAULT, /* tp_flags */
|
|
Decompressor_doc, /* tp_doc */
|
|
0, /* tp_traverse */
|
|
0, /* tp_clear */
|
|
0, /* tp_richcompare */
|
|
0, /* tp_weaklistoffset */
|
|
0, /* tp_iter */
|
|
0, /* tp_iternext */
|
|
Decompressor_methods, /* tp_methods */
|
|
Decompressor_members, /* tp_members */
|
|
0, /* tp_getset */
|
|
0, /* tp_base */
|
|
0, /* tp_dict */
|
|
0, /* tp_descr_get */
|
|
0, /* tp_descr_set */
|
|
0, /* tp_dictoffset */
|
|
(initproc)Decompressor_init, /* tp_init */
|
|
0, /* tp_alloc */
|
|
PyType_GenericNew, /* tp_new */
|
|
};
|
|
|
|
|
|
/* Module-level functions. */
|
|
|
|
PyDoc_STRVAR(check_is_supported_doc,
|
|
"check_is_supported(check_id) -> bool\n"
|
|
"\n"
|
|
"Test whether the given integrity check is supported.\n"
|
|
"\n"
|
|
"Always returns True for CHECK_NONE and CHECK_CRC32.\n");
|
|
|
|
static PyObject *
|
|
check_is_supported(PyObject *self, PyObject *args)
|
|
{
|
|
int check_id;
|
|
|
|
if (!PyArg_ParseTuple(args, "i:check_is_supported", &check_id))
|
|
return NULL;
|
|
|
|
return PyBool_FromLong(lzma_check_is_supported(check_id));
|
|
}
|
|
|
|
|
|
/* Module initialization. */
|
|
|
|
static PyMethodDef module_methods[] = {
|
|
{"check_is_supported", (PyCFunction)check_is_supported,
|
|
METH_VARARGS, check_is_supported_doc},
|
|
{NULL}
|
|
};
|
|
|
|
static PyModuleDef _lzmamodule = {
|
|
PyModuleDef_HEAD_INIT,
|
|
"_lzma",
|
|
NULL,
|
|
-1,
|
|
module_methods,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
};
|
|
|
|
/* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
|
|
would not work correctly on platforms with 32-bit longs. */
|
|
static int
|
|
module_add_int_constant(PyObject *m, const char *name, PY_LONG_LONG value)
|
|
{
|
|
PyObject *o = PyLong_FromLongLong(value);
|
|
if (o == NULL)
|
|
return -1;
|
|
if (PyModule_AddObject(m, name, o) == 0)
|
|
return 0;
|
|
Py_DECREF(o);
|
|
return -1;
|
|
}
|
|
|
|
#define ADD_INT_PREFIX_MACRO(m, macro) \
|
|
module_add_int_constant(m, #macro, LZMA_ ## macro)
|
|
|
|
PyMODINIT_FUNC
|
|
PyInit__lzma(void)
|
|
{
|
|
PyObject *m;
|
|
|
|
empty_tuple = PyTuple_New(0);
|
|
if (empty_tuple == NULL)
|
|
return NULL;
|
|
|
|
m = PyModule_Create(&_lzmamodule);
|
|
if (m == NULL)
|
|
return NULL;
|
|
|
|
if (PyModule_AddIntMacro(m, FORMAT_AUTO) == -1 ||
|
|
PyModule_AddIntMacro(m, FORMAT_XZ) == -1 ||
|
|
PyModule_AddIntMacro(m, FORMAT_ALONE) == -1 ||
|
|
PyModule_AddIntMacro(m, FORMAT_RAW) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, CHECK_NONE) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, CHECK_CRC32) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, CHECK_CRC64) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, CHECK_SHA256) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, CHECK_ID_MAX) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, CHECK_UNKNOWN) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, FILTER_LZMA1) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, FILTER_LZMA2) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, FILTER_DELTA) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, FILTER_X86) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, FILTER_IA64) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, FILTER_ARM) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, FILTER_ARMTHUMB) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, FILTER_SPARC) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, FILTER_POWERPC) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, MF_HC3) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, MF_HC4) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, MF_BT2) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, MF_BT3) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, MF_BT4) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, MODE_FAST) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, MODE_NORMAL) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, PRESET_DEFAULT) == -1 ||
|
|
ADD_INT_PREFIX_MACRO(m, PRESET_EXTREME) == -1)
|
|
return NULL;
|
|
|
|
Error = PyErr_NewExceptionWithDoc(
|
|
"_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
|
|
if (Error == NULL)
|
|
return NULL;
|
|
Py_INCREF(Error);
|
|
if (PyModule_AddObject(m, "LZMAError", Error) == -1)
|
|
return NULL;
|
|
|
|
if (PyType_Ready(&Compressor_type) == -1)
|
|
return NULL;
|
|
Py_INCREF(&Compressor_type);
|
|
if (PyModule_AddObject(m, "LZMACompressor",
|
|
(PyObject *)&Compressor_type) == -1)
|
|
return NULL;
|
|
|
|
if (PyType_Ready(&Decompressor_type) == -1)
|
|
return NULL;
|
|
Py_INCREF(&Decompressor_type);
|
|
if (PyModule_AddObject(m, "LZMADecompressor",
|
|
(PyObject *)&Decompressor_type) == -1)
|
|
return NULL;
|
|
|
|
return m;
|
|
}
|