gh-101006: Improve error handling when read marshal data (GH-101007)

* EOFError no longer overrides other errors such as MemoryError or OSError at
  the start of the object.
* Raise more relevant error when the NULL object occurs as a code object
  component.
* Minimize an overhead of calling PyErr_Occurred().
This commit is contained in:
Serhiy Storchaka 2023-06-29 12:22:19 +03:00 committed by GitHub
parent 3fb7c608e5
commit 8bf6904b22
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 72 additions and 61 deletions

View File

@ -0,0 +1 @@
Improve error handling when read :mod:`marshal` data.

View File

@ -751,23 +751,28 @@ r_string(Py_ssize_t n, RFILE *p)
static int
r_byte(RFILE *p)
{
int c = EOF;
if (p->ptr != NULL) {
if (p->ptr < p->end)
c = (unsigned char) *p->ptr++;
return c;
if (p->ptr < p->end) {
return (unsigned char) *p->ptr++;
}
}
if (!p->readable) {
else if (!p->readable) {
assert(p->fp);
c = getc(p->fp);
int c = getc(p->fp);
if (c != EOF) {
return c;
}
}
else {
const char *ptr = r_string(1, p);
if (ptr != NULL)
c = *(const unsigned char *) ptr;
if (ptr != NULL) {
return *(const unsigned char *) ptr;
}
return EOF;
}
return c;
PyErr_SetString(PyExc_EOFError,
"EOF read where not expected");
return EOF;
}
static int
@ -828,10 +833,11 @@ r_PyLong(RFILE *p)
digit d;
n = r_long(p);
if (PyErr_Occurred())
return NULL;
if (n == 0)
return (PyObject *)_PyLong_New(0);
if (n == -1 && PyErr_Occurred()) {
return NULL;
}
if (n < -SIZE32_MAX || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (long size out of range)");
@ -850,10 +856,6 @@ r_PyLong(RFILE *p)
d = 0;
for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
md = r_short(p);
if (PyErr_Occurred()) {
Py_DECREF(ob);
return NULL;
}
if (md < 0 || md > PyLong_MARSHAL_BASE)
goto bad_digit;
d += (digit)md << j*PyLong_MARSHAL_SHIFT;
@ -864,10 +866,6 @@ r_PyLong(RFILE *p)
d = 0;
for (j=0; j < shorts_in_top_digit; j++) {
md = r_short(p);
if (PyErr_Occurred()) {
Py_DECREF(ob);
return NULL;
}
if (md < 0 || md > PyLong_MARSHAL_BASE)
goto bad_digit;
/* topmost marshal digit should be nonzero */
@ -879,18 +877,17 @@ r_PyLong(RFILE *p)
}
d += (digit)md << j*PyLong_MARSHAL_SHIFT;
}
if (PyErr_Occurred()) {
Py_DECREF(ob);
return NULL;
}
assert(!PyErr_Occurred());
/* top digit should be nonzero, else the resulting PyLong won't be
normalized */
ob->long_value.ob_digit[size-1] = d;
return (PyObject *)ob;
bad_digit:
Py_DECREF(ob);
PyErr_SetString(PyExc_ValueError,
"bad marshal data (digit out of range in long)");
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (digit out of range in long)");
}
return NULL;
}
@ -913,8 +910,6 @@ r_float_str(RFILE *p)
const char *ptr;
n = r_byte(p);
if (n == EOF) {
PyErr_SetString(PyExc_EOFError,
"EOF read where object expected");
return -1;
}
ptr = r_string(n, p);
@ -992,8 +987,10 @@ r_object(RFILE *p)
PyObject *retval = NULL;
if (code == EOF) {
PyErr_SetString(PyExc_EOFError,
"EOF read where object expected");
if (PyErr_ExceptionMatches(PyExc_EOFError)) {
PyErr_SetString(PyExc_EOFError,
"EOF read where object expected");
}
return NULL;
}
@ -1040,7 +1037,10 @@ r_object(RFILE *p)
case TYPE_INT:
n = r_long(p);
retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
if (n == -1 && PyErr_Occurred()) {
break;
}
retval = PyLong_FromLong(n);
R_REF(retval);
break;
@ -1106,10 +1106,11 @@ r_object(RFILE *p)
{
const char *ptr;
n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (bytes object size out of range)");
}
break;
}
v = PyBytes_FromStringAndSize((char *)NULL, n);
@ -1131,10 +1132,11 @@ r_object(RFILE *p)
/* fall through */
case TYPE_ASCII:
n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (string size out of range)");
}
break;
}
goto _read_ascii;
@ -1145,8 +1147,6 @@ r_object(RFILE *p)
case TYPE_SHORT_ASCII:
n = r_byte(p);
if (n == EOF) {
PyErr_SetString(PyExc_EOFError,
"EOF read where object expected");
break;
}
_read_ascii:
@ -1173,10 +1173,11 @@ r_object(RFILE *p)
const char *buffer;
n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (string size out of range)");
}
break;
}
if (n != 0) {
@ -1198,16 +1199,18 @@ r_object(RFILE *p)
}
case TYPE_SMALL_TUPLE:
n = (unsigned char) r_byte(p);
if (PyErr_Occurred())
n = r_byte(p);
if (n == EOF) {
break;
}
goto _read_tuple;
case TYPE_TUPLE:
n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (tuple size out of range)");
}
break;
}
_read_tuple:
@ -1232,10 +1235,11 @@ r_object(RFILE *p)
case TYPE_LIST:
n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (list size out of range)");
}
break;
}
v = PyList_New(n);
@ -1288,10 +1292,11 @@ r_object(RFILE *p)
case TYPE_SET:
case TYPE_FROZENSET:
n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (set size out of range)");
}
break;
}
@ -1368,20 +1373,20 @@ r_object(RFILE *p)
/* XXX ignore long->int overflows for now */
argcount = (int)r_long(p);
if (PyErr_Occurred())
if (argcount == -1 && PyErr_Occurred())
goto code_error;
posonlyargcount = (int)r_long(p);
if (PyErr_Occurred()) {
if (posonlyargcount == -1 && PyErr_Occurred()) {
goto code_error;
}
kwonlyargcount = (int)r_long(p);
if (PyErr_Occurred())
if (kwonlyargcount == -1 && PyErr_Occurred())
goto code_error;
stacksize = (int)r_long(p);
if (PyErr_Occurred())
if (stacksize == -1 && PyErr_Occurred())
goto code_error;
flags = (int)r_long(p);
if (PyErr_Occurred())
if (flags == -1 && PyErr_Occurred())
goto code_error;
code = r_object(p);
if (code == NULL)
@ -1454,6 +1459,10 @@ r_object(RFILE *p)
v = r_ref_insert(v, idx, flag, p);
code_error:
if (v == NULL && !PyErr_Occurred()) {
PyErr_SetString(PyExc_TypeError,
"NULL object in marshal data for code object");
}
Py_XDECREF(code);
Py_XDECREF(consts);
Py_XDECREF(names);
@ -1471,9 +1480,10 @@ r_object(RFILE *p)
case TYPE_REF:
n = r_long(p);
if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
if (n == -1 && PyErr_Occurred())
break;
PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (invalid reference)");
}
break;
}
v = PyList_GET_ITEM(p->refs, n);