gh-101006: Improve error handling when read marshal data (GH-101007)

* EOFError no longer overrides other errors such as MemoryError or OSError at
  the start of the object.
* Raise more relevant error when the NULL object occurs as a code object
  component.
* Minimize an overhead of calling PyErr_Occurred().
This commit is contained in:
Serhiy Storchaka 2023-06-29 12:22:19 +03:00 committed by GitHub
parent 3fb7c608e5
commit 8bf6904b22
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 72 additions and 61 deletions

View File

@ -0,0 +1 @@
Improve error handling when read :mod:`marshal` data.

View File

@ -751,23 +751,28 @@ r_string(Py_ssize_t n, RFILE *p)
static int static int
r_byte(RFILE *p) r_byte(RFILE *p)
{ {
int c = EOF;
if (p->ptr != NULL) { if (p->ptr != NULL) {
if (p->ptr < p->end) if (p->ptr < p->end) {
c = (unsigned char) *p->ptr++; return (unsigned char) *p->ptr++;
return c; }
} }
if (!p->readable) { else if (!p->readable) {
assert(p->fp); assert(p->fp);
c = getc(p->fp); int c = getc(p->fp);
if (c != EOF) {
return c;
}
} }
else { else {
const char *ptr = r_string(1, p); const char *ptr = r_string(1, p);
if (ptr != NULL) if (ptr != NULL) {
c = *(const unsigned char *) ptr; return *(const unsigned char *) ptr;
}
return EOF;
} }
return c; PyErr_SetString(PyExc_EOFError,
"EOF read where not expected");
return EOF;
} }
static int static int
@ -828,10 +833,11 @@ r_PyLong(RFILE *p)
digit d; digit d;
n = r_long(p); n = r_long(p);
if (PyErr_Occurred())
return NULL;
if (n == 0) if (n == 0)
return (PyObject *)_PyLong_New(0); return (PyObject *)_PyLong_New(0);
if (n == -1 && PyErr_Occurred()) {
return NULL;
}
if (n < -SIZE32_MAX || n > SIZE32_MAX) { if (n < -SIZE32_MAX || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"bad marshal data (long size out of range)"); "bad marshal data (long size out of range)");
@ -850,10 +856,6 @@ r_PyLong(RFILE *p)
d = 0; d = 0;
for (j=0; j < PyLong_MARSHAL_RATIO; j++) { for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
md = r_short(p); md = r_short(p);
if (PyErr_Occurred()) {
Py_DECREF(ob);
return NULL;
}
if (md < 0 || md > PyLong_MARSHAL_BASE) if (md < 0 || md > PyLong_MARSHAL_BASE)
goto bad_digit; goto bad_digit;
d += (digit)md << j*PyLong_MARSHAL_SHIFT; d += (digit)md << j*PyLong_MARSHAL_SHIFT;
@ -864,10 +866,6 @@ r_PyLong(RFILE *p)
d = 0; d = 0;
for (j=0; j < shorts_in_top_digit; j++) { for (j=0; j < shorts_in_top_digit; j++) {
md = r_short(p); md = r_short(p);
if (PyErr_Occurred()) {
Py_DECREF(ob);
return NULL;
}
if (md < 0 || md > PyLong_MARSHAL_BASE) if (md < 0 || md > PyLong_MARSHAL_BASE)
goto bad_digit; goto bad_digit;
/* topmost marshal digit should be nonzero */ /* topmost marshal digit should be nonzero */
@ -879,18 +877,17 @@ r_PyLong(RFILE *p)
} }
d += (digit)md << j*PyLong_MARSHAL_SHIFT; d += (digit)md << j*PyLong_MARSHAL_SHIFT;
} }
if (PyErr_Occurred()) { assert(!PyErr_Occurred());
Py_DECREF(ob);
return NULL;
}
/* top digit should be nonzero, else the resulting PyLong won't be /* top digit should be nonzero, else the resulting PyLong won't be
normalized */ normalized */
ob->long_value.ob_digit[size-1] = d; ob->long_value.ob_digit[size-1] = d;
return (PyObject *)ob; return (PyObject *)ob;
bad_digit: bad_digit:
Py_DECREF(ob); Py_DECREF(ob);
PyErr_SetString(PyExc_ValueError, if (!PyErr_Occurred()) {
"bad marshal data (digit out of range in long)"); PyErr_SetString(PyExc_ValueError,
"bad marshal data (digit out of range in long)");
}
return NULL; return NULL;
} }
@ -913,8 +910,6 @@ r_float_str(RFILE *p)
const char *ptr; const char *ptr;
n = r_byte(p); n = r_byte(p);
if (n == EOF) { if (n == EOF) {
PyErr_SetString(PyExc_EOFError,
"EOF read where object expected");
return -1; return -1;
} }
ptr = r_string(n, p); ptr = r_string(n, p);
@ -992,8 +987,10 @@ r_object(RFILE *p)
PyObject *retval = NULL; PyObject *retval = NULL;
if (code == EOF) { if (code == EOF) {
PyErr_SetString(PyExc_EOFError, if (PyErr_ExceptionMatches(PyExc_EOFError)) {
"EOF read where object expected"); PyErr_SetString(PyExc_EOFError,
"EOF read where object expected");
}
return NULL; return NULL;
} }
@ -1040,7 +1037,10 @@ r_object(RFILE *p)
case TYPE_INT: case TYPE_INT:
n = r_long(p); n = r_long(p);
retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n); if (n == -1 && PyErr_Occurred()) {
break;
}
retval = PyLong_FromLong(n);
R_REF(retval); R_REF(retval);
break; break;
@ -1106,10 +1106,11 @@ r_object(RFILE *p)
{ {
const char *ptr; const char *ptr;
n = r_long(p); n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) { if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)"); if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (bytes object size out of range)");
}
break; break;
} }
v = PyBytes_FromStringAndSize((char *)NULL, n); v = PyBytes_FromStringAndSize((char *)NULL, n);
@ -1131,10 +1132,11 @@ r_object(RFILE *p)
/* fall through */ /* fall through */
case TYPE_ASCII: case TYPE_ASCII:
n = r_long(p); n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) { if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (string size out of range)");
}
break; break;
} }
goto _read_ascii; goto _read_ascii;
@ -1145,8 +1147,6 @@ r_object(RFILE *p)
case TYPE_SHORT_ASCII: case TYPE_SHORT_ASCII:
n = r_byte(p); n = r_byte(p);
if (n == EOF) { if (n == EOF) {
PyErr_SetString(PyExc_EOFError,
"EOF read where object expected");
break; break;
} }
_read_ascii: _read_ascii:
@ -1173,10 +1173,11 @@ r_object(RFILE *p)
const char *buffer; const char *buffer;
n = r_long(p); n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) { if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (string size out of range)");
}
break; break;
} }
if (n != 0) { if (n != 0) {
@ -1198,16 +1199,18 @@ r_object(RFILE *p)
} }
case TYPE_SMALL_TUPLE: case TYPE_SMALL_TUPLE:
n = (unsigned char) r_byte(p); n = r_byte(p);
if (PyErr_Occurred()) if (n == EOF) {
break; break;
}
goto _read_tuple; goto _read_tuple;
case TYPE_TUPLE: case TYPE_TUPLE:
n = r_long(p); n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) { if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)"); if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (tuple size out of range)");
}
break; break;
} }
_read_tuple: _read_tuple:
@ -1232,10 +1235,11 @@ r_object(RFILE *p)
case TYPE_LIST: case TYPE_LIST:
n = r_long(p); n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) { if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)"); if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (list size out of range)");
}
break; break;
} }
v = PyList_New(n); v = PyList_New(n);
@ -1288,10 +1292,11 @@ r_object(RFILE *p)
case TYPE_SET: case TYPE_SET:
case TYPE_FROZENSET: case TYPE_FROZENSET:
n = r_long(p); n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) { if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)"); if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (set size out of range)");
}
break; break;
} }
@ -1368,20 +1373,20 @@ r_object(RFILE *p)
/* XXX ignore long->int overflows for now */ /* XXX ignore long->int overflows for now */
argcount = (int)r_long(p); argcount = (int)r_long(p);
if (PyErr_Occurred()) if (argcount == -1 && PyErr_Occurred())
goto code_error; goto code_error;
posonlyargcount = (int)r_long(p); posonlyargcount = (int)r_long(p);
if (PyErr_Occurred()) { if (posonlyargcount == -1 && PyErr_Occurred()) {
goto code_error; goto code_error;
} }
kwonlyargcount = (int)r_long(p); kwonlyargcount = (int)r_long(p);
if (PyErr_Occurred()) if (kwonlyargcount == -1 && PyErr_Occurred())
goto code_error; goto code_error;
stacksize = (int)r_long(p); stacksize = (int)r_long(p);
if (PyErr_Occurred()) if (stacksize == -1 && PyErr_Occurred())
goto code_error; goto code_error;
flags = (int)r_long(p); flags = (int)r_long(p);
if (PyErr_Occurred()) if (flags == -1 && PyErr_Occurred())
goto code_error; goto code_error;
code = r_object(p); code = r_object(p);
if (code == NULL) if (code == NULL)
@ -1454,6 +1459,10 @@ r_object(RFILE *p)
v = r_ref_insert(v, idx, flag, p); v = r_ref_insert(v, idx, flag, p);
code_error: code_error:
if (v == NULL && !PyErr_Occurred()) {
PyErr_SetString(PyExc_TypeError,
"NULL object in marshal data for code object");
}
Py_XDECREF(code); Py_XDECREF(code);
Py_XDECREF(consts); Py_XDECREF(consts);
Py_XDECREF(names); Py_XDECREF(names);
@ -1471,9 +1480,10 @@ r_object(RFILE *p)
case TYPE_REF: case TYPE_REF:
n = r_long(p); n = r_long(p);
if (n < 0 || n >= PyList_GET_SIZE(p->refs)) { if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
if (n == -1 && PyErr_Occurred()) if (!PyErr_Occurred()) {
break; PyErr_SetString(PyExc_ValueError,
PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)"); "bad marshal data (invalid reference)");
}
break; break;
} }
v = PyList_GET_ITEM(p->refs, n); v = PyList_GET_ITEM(p->refs, n);