2019-04-18 05:02:26 +08:00
|
|
|
/* JSON accelerator C extensor: _json module.
|
|
|
|
*
|
|
|
|
* It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
|
|
|
|
* and as an extension module (Py_BUILD_CORE_MODULE define) on other
|
|
|
|
* platforms. */
|
2017-09-08 13:51:28 +08:00
|
|
|
|
2019-04-18 05:02:26 +08:00
|
|
|
#if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
|
|
|
|
# error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
|
2017-09-08 13:51:28 +08:00
|
|
|
#endif
|
|
|
|
|
2008-05-08 22:29:10 +08:00
|
|
|
#include "Python.h"
|
2009-05-02 20:36:44 +08:00
|
|
|
#include "structmember.h"
|
2018-11-01 09:30:36 +08:00
|
|
|
#include "pycore_accu.h"
|
2012-03-22 21:42:18 +08:00
|
|
|
|
2009-05-02 20:36:44 +08:00
|
|
|
#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
|
|
|
|
#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
|
|
|
|
#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
|
|
|
|
#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
|
|
|
|
|
|
|
|
static PyTypeObject PyScannerType;
|
|
|
|
static PyTypeObject PyEncoderType;
|
|
|
|
|
|
|
|
typedef struct _PyScannerObject {
|
|
|
|
PyObject_HEAD
|
2017-07-13 13:52:08 +08:00
|
|
|
signed char strict;
|
2009-05-02 20:36:44 +08:00
|
|
|
PyObject *object_hook;
|
|
|
|
PyObject *object_pairs_hook;
|
|
|
|
PyObject *parse_float;
|
|
|
|
PyObject *parse_int;
|
|
|
|
PyObject *parse_constant;
|
2010-09-05 04:16:53 +08:00
|
|
|
PyObject *memo;
|
2009-05-02 20:36:44 +08:00
|
|
|
} PyScannerObject;
|
|
|
|
|
|
|
|
static PyMemberDef scanner_members[] = {
|
2017-05-28 20:31:49 +08:00
|
|
|
{"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
|
2009-05-02 20:36:44 +08:00
|
|
|
{"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
|
|
|
|
{"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
|
|
|
|
{"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
|
|
|
|
{"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
|
|
|
|
{"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
|
|
|
|
{NULL}
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct _PyEncoderObject {
|
|
|
|
PyObject_HEAD
|
|
|
|
PyObject *markers;
|
|
|
|
PyObject *defaultfn;
|
|
|
|
PyObject *encoder;
|
|
|
|
PyObject *indent;
|
|
|
|
PyObject *key_separator;
|
|
|
|
PyObject *item_separator;
|
2017-05-28 20:31:49 +08:00
|
|
|
char sort_keys;
|
|
|
|
char skipkeys;
|
2009-05-02 20:36:44 +08:00
|
|
|
int allow_nan;
|
2017-05-28 20:31:49 +08:00
|
|
|
PyCFunction fast_encode;
|
2009-05-02 20:36:44 +08:00
|
|
|
} PyEncoderObject;
|
|
|
|
|
|
|
|
static PyMemberDef encoder_members[] = {
|
|
|
|
{"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
|
|
|
|
{"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
|
|
|
|
{"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
|
|
|
|
{"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
|
|
|
|
{"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
|
|
|
|
{"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
|
2017-05-28 20:31:49 +08:00
|
|
|
{"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
|
|
|
|
{"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
|
2009-05-02 20:36:44 +08:00
|
|
|
{NULL}
|
|
|
|
};
|
|
|
|
|
2011-08-20 00:03:14 +08:00
|
|
|
/* Forward decls */
|
|
|
|
|
2009-05-02 20:36:44 +08:00
|
|
|
static PyObject *
|
|
|
|
ascii_escape_unicode(PyObject *pystr);
|
|
|
|
static PyObject *
|
2019-11-05 18:44:28 +08:00
|
|
|
py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr);
|
2009-05-02 20:36:44 +08:00
|
|
|
void init_json(void);
|
|
|
|
static PyObject *
|
|
|
|
scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
|
|
|
|
static PyObject *
|
|
|
|
_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
|
|
|
|
static PyObject *
|
|
|
|
scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
|
|
|
|
static void
|
|
|
|
scanner_dealloc(PyObject *self);
|
|
|
|
static int
|
|
|
|
scanner_clear(PyObject *self);
|
|
|
|
static PyObject *
|
|
|
|
encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
|
|
|
|
static void
|
|
|
|
encoder_dealloc(PyObject *self);
|
|
|
|
static int
|
|
|
|
encoder_clear(PyObject *self);
|
|
|
|
static int
|
2011-10-07 01:09:51 +08:00
|
|
|
encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
|
2009-05-02 20:36:44 +08:00
|
|
|
static int
|
2011-10-07 01:09:51 +08:00
|
|
|
encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
|
2009-05-02 20:36:44 +08:00
|
|
|
static int
|
2011-10-07 01:09:51 +08:00
|
|
|
encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
|
2009-05-02 20:36:44 +08:00
|
|
|
static PyObject *
|
2009-05-02 23:55:19 +08:00
|
|
|
_encoded_const(PyObject *obj);
|
2009-05-02 20:36:44 +08:00
|
|
|
static void
|
2015-12-26 02:01:53 +08:00
|
|
|
raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
|
2009-05-02 20:36:44 +08:00
|
|
|
static PyObject *
|
|
|
|
encoder_encode_string(PyEncoderObject *s, PyObject *obj);
|
|
|
|
static PyObject *
|
|
|
|
encoder_encode_float(PyEncoderObject *s, PyObject *obj);
|
2008-05-08 22:29:10 +08:00
|
|
|
|
|
|
|
#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
|
2009-05-02 20:36:44 +08:00
|
|
|
#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
|
2008-05-08 22:29:10 +08:00
|
|
|
|
|
|
|
static Py_ssize_t
|
2011-09-28 13:41:54 +08:00
|
|
|
ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
|
2008-05-08 22:29:10 +08:00
|
|
|
{
|
2009-05-02 20:36:44 +08:00
|
|
|
/* Escape unicode code point c to ASCII escape sequences
|
|
|
|
in char *output. output must have at least 12 bytes unused to
|
|
|
|
accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
|
2008-05-08 22:29:10 +08:00
|
|
|
output[chars++] = '\\';
|
|
|
|
switch (c) {
|
2009-05-02 20:36:44 +08:00
|
|
|
case '\\': output[chars++] = c; break;
|
|
|
|
case '"': output[chars++] = c; break;
|
2008-05-08 22:29:10 +08:00
|
|
|
case '\b': output[chars++] = 'b'; break;
|
|
|
|
case '\f': output[chars++] = 'f'; break;
|
|
|
|
case '\n': output[chars++] = 'n'; break;
|
|
|
|
case '\r': output[chars++] = 'r'; break;
|
|
|
|
case '\t': output[chars++] = 't'; break;
|
|
|
|
default:
|
|
|
|
if (c >= 0x10000) {
|
|
|
|
/* UTF-16 surrogate pair */
|
2012-10-30 08:42:39 +08:00
|
|
|
Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
|
2008-05-08 22:29:10 +08:00
|
|
|
output[chars++] = 'u';
|
2012-10-30 08:42:39 +08:00
|
|
|
output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
|
|
|
|
output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
|
|
|
|
output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
|
|
|
|
output[chars++] = Py_hexdigits[(v ) & 0xf];
|
|
|
|
c = Py_UNICODE_LOW_SURROGATE(c);
|
2008-05-08 22:29:10 +08:00
|
|
|
output[chars++] = '\\';
|
|
|
|
}
|
|
|
|
output[chars++] = 'u';
|
2011-10-14 08:13:11 +08:00
|
|
|
output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
|
|
|
|
output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
|
|
|
|
output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
|
|
|
|
output[chars++] = Py_hexdigits[(c ) & 0xf];
|
2008-05-08 22:29:10 +08:00
|
|
|
}
|
|
|
|
return chars;
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
ascii_escape_unicode(PyObject *pystr)
|
|
|
|
{
|
2009-05-02 20:36:44 +08:00
|
|
|
/* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
|
2008-05-08 22:29:10 +08:00
|
|
|
Py_ssize_t i;
|
|
|
|
Py_ssize_t input_chars;
|
|
|
|
Py_ssize_t output_size;
|
|
|
|
Py_ssize_t chars;
|
|
|
|
PyObject *rval;
|
2011-09-28 13:41:54 +08:00
|
|
|
void *input;
|
|
|
|
unsigned char *output;
|
|
|
|
int kind;
|
2008-05-08 22:29:10 +08:00
|
|
|
|
2011-09-28 13:41:54 +08:00
|
|
|
if (PyUnicode_READY(pystr) == -1)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
input_chars = PyUnicode_GET_LENGTH(pystr);
|
|
|
|
input = PyUnicode_DATA(pystr);
|
|
|
|
kind = PyUnicode_KIND(pystr);
|
|
|
|
|
|
|
|
/* Compute the output size */
|
|
|
|
for (i = 0, output_size = 2; i < input_chars; i++) {
|
|
|
|
Py_UCS4 c = PyUnicode_READ(kind, input, i);
|
2015-02-02 06:53:53 +08:00
|
|
|
Py_ssize_t d;
|
|
|
|
if (S_CHAR(c)) {
|
|
|
|
d = 1;
|
|
|
|
}
|
2011-09-28 13:41:54 +08:00
|
|
|
else {
|
|
|
|
switch(c) {
|
2011-10-12 03:56:19 +08:00
|
|
|
case '\\': case '"': case '\b': case '\f':
|
2011-09-28 13:41:54 +08:00
|
|
|
case '\n': case '\r': case '\t':
|
2015-02-02 06:53:53 +08:00
|
|
|
d = 2; break;
|
2011-09-28 13:41:54 +08:00
|
|
|
default:
|
2015-02-02 06:53:53 +08:00
|
|
|
d = c >= 0x10000 ? 12 : 6;
|
2011-09-28 13:41:54 +08:00
|
|
|
}
|
|
|
|
}
|
2015-02-02 06:53:53 +08:00
|
|
|
if (output_size > PY_SSIZE_T_MAX - d) {
|
|
|
|
PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
output_size += d;
|
2011-09-28 13:41:54 +08:00
|
|
|
}
|
2009-05-02 20:36:44 +08:00
|
|
|
|
2011-09-28 13:41:54 +08:00
|
|
|
rval = PyUnicode_New(output_size, 127);
|
2008-05-08 22:29:10 +08:00
|
|
|
if (rval == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2011-09-28 13:41:54 +08:00
|
|
|
output = PyUnicode_1BYTE_DATA(rval);
|
2008-05-08 22:29:10 +08:00
|
|
|
chars = 0;
|
|
|
|
output[chars++] = '"';
|
|
|
|
for (i = 0; i < input_chars; i++) {
|
2011-09-28 13:41:54 +08:00
|
|
|
Py_UCS4 c = PyUnicode_READ(kind, input, i);
|
2008-05-08 22:29:10 +08:00
|
|
|
if (S_CHAR(c)) {
|
2009-05-02 20:36:44 +08:00
|
|
|
output[chars++] = c;
|
2008-05-08 22:29:10 +08:00
|
|
|
}
|
2009-05-02 20:36:44 +08:00
|
|
|
else {
|
|
|
|
chars = ascii_escape_unichar(c, output, chars);
|
2008-05-08 22:29:10 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
output[chars++] = '"';
|
2013-01-03 16:21:55 +08:00
|
|
|
#ifdef Py_DEBUG
|
2012-04-27 19:55:39 +08:00
|
|
|
assert(_PyUnicode_CheckConsistency(rval, 1));
|
2013-01-03 16:21:55 +08:00
|
|
|
#endif
|
2008-05-08 22:29:10 +08:00
|
|
|
return rval;
|
|
|
|
}
|
|
|
|
|
2015-01-11 23:41:01 +08:00
|
|
|
static PyObject *
|
|
|
|
escape_unicode(PyObject *pystr)
|
|
|
|
{
|
|
|
|
/* Take a PyUnicode pystr and return a new escaped PyUnicode */
|
|
|
|
Py_ssize_t i;
|
|
|
|
Py_ssize_t input_chars;
|
|
|
|
Py_ssize_t output_size;
|
|
|
|
Py_ssize_t chars;
|
|
|
|
PyObject *rval;
|
|
|
|
void *input;
|
|
|
|
int kind;
|
|
|
|
Py_UCS4 maxchar;
|
|
|
|
|
|
|
|
if (PyUnicode_READY(pystr) == -1)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
|
|
|
|
input_chars = PyUnicode_GET_LENGTH(pystr);
|
|
|
|
input = PyUnicode_DATA(pystr);
|
|
|
|
kind = PyUnicode_KIND(pystr);
|
|
|
|
|
|
|
|
/* Compute the output size */
|
|
|
|
for (i = 0, output_size = 2; i < input_chars; i++) {
|
|
|
|
Py_UCS4 c = PyUnicode_READ(kind, input, i);
|
2015-06-28 04:01:51 +08:00
|
|
|
Py_ssize_t d;
|
2015-01-11 23:41:01 +08:00
|
|
|
switch (c) {
|
|
|
|
case '\\': case '"': case '\b': case '\f':
|
|
|
|
case '\n': case '\r': case '\t':
|
2015-06-28 04:01:51 +08:00
|
|
|
d = 2;
|
2015-01-11 23:41:01 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
if (c <= 0x1f)
|
2015-06-28 04:01:51 +08:00
|
|
|
d = 6;
|
2015-01-11 23:41:01 +08:00
|
|
|
else
|
2015-06-28 04:01:51 +08:00
|
|
|
d = 1;
|
|
|
|
}
|
|
|
|
if (output_size > PY_SSIZE_T_MAX - d) {
|
|
|
|
PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
|
|
|
|
return NULL;
|
2015-01-11 23:41:01 +08:00
|
|
|
}
|
2015-06-28 04:01:51 +08:00
|
|
|
output_size += d;
|
2015-01-11 23:41:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
rval = PyUnicode_New(output_size, maxchar);
|
|
|
|
if (rval == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
kind = PyUnicode_KIND(rval);
|
|
|
|
|
|
|
|
#define ENCODE_OUTPUT do { \
|
|
|
|
chars = 0; \
|
|
|
|
output[chars++] = '"'; \
|
|
|
|
for (i = 0; i < input_chars; i++) { \
|
|
|
|
Py_UCS4 c = PyUnicode_READ(kind, input, i); \
|
|
|
|
switch (c) { \
|
|
|
|
case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
|
|
|
|
case '"': output[chars++] = '\\'; output[chars++] = c; break; \
|
|
|
|
case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
|
|
|
|
case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
|
|
|
|
case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
|
|
|
|
case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
|
|
|
|
case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
|
|
|
|
default: \
|
|
|
|
if (c <= 0x1f) { \
|
|
|
|
output[chars++] = '\\'; \
|
|
|
|
output[chars++] = 'u'; \
|
|
|
|
output[chars++] = '0'; \
|
|
|
|
output[chars++] = '0'; \
|
|
|
|
output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
|
|
|
|
output[chars++] = Py_hexdigits[(c ) & 0xf]; \
|
|
|
|
} else { \
|
|
|
|
output[chars++] = c; \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
output[chars++] = '"'; \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
if (kind == PyUnicode_1BYTE_KIND) {
|
|
|
|
Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
|
|
|
|
ENCODE_OUTPUT;
|
|
|
|
} else if (kind == PyUnicode_2BYTE_KIND) {
|
|
|
|
Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
|
|
|
|
ENCODE_OUTPUT;
|
|
|
|
} else {
|
|
|
|
Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
|
|
|
|
assert(kind == PyUnicode_4BYTE_KIND);
|
|
|
|
ENCODE_OUTPUT;
|
|
|
|
}
|
|
|
|
#undef ENCODE_OUTPUT
|
|
|
|
|
|
|
|
#ifdef Py_DEBUG
|
|
|
|
assert(_PyUnicode_CheckConsistency(rval, 1));
|
|
|
|
#endif
|
|
|
|
return rval;
|
|
|
|
}
|
|
|
|
|
2009-05-02 20:36:44 +08:00
|
|
|
static void
|
2015-12-26 02:01:53 +08:00
|
|
|
raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
|
2008-05-08 22:29:10 +08:00
|
|
|
{
|
2015-01-26 19:16:30 +08:00
|
|
|
/* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
|
|
|
|
static PyObject *JSONDecodeError = NULL;
|
|
|
|
PyObject *exc;
|
|
|
|
if (JSONDecodeError == NULL) {
|
2008-05-08 22:29:10 +08:00
|
|
|
PyObject *decoder = PyImport_ImportModule("json.decoder");
|
|
|
|
if (decoder == NULL)
|
|
|
|
return;
|
2015-01-26 19:16:30 +08:00
|
|
|
JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
|
2009-05-02 20:36:44 +08:00
|
|
|
Py_DECREF(decoder);
|
2015-01-26 19:16:30 +08:00
|
|
|
if (JSONDecodeError == NULL)
|
2008-05-08 22:29:10 +08:00
|
|
|
return;
|
|
|
|
}
|
2016-12-09 07:33:39 +08:00
|
|
|
exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
|
2015-01-26 19:16:30 +08:00
|
|
|
if (exc) {
|
|
|
|
PyErr_SetObject(JSONDecodeError, exc);
|
|
|
|
Py_DECREF(exc);
|
2008-10-17 05:17:24 +08:00
|
|
|
}
|
2008-05-08 22:29:10 +08:00
|
|
|
}
|
|
|
|
|
2013-01-03 14:44:15 +08:00
|
|
|
static void
|
|
|
|
raise_stop_iteration(Py_ssize_t idx)
|
|
|
|
{
|
|
|
|
PyObject *value = PyLong_FromSsize_t(idx);
|
|
|
|
if (value != NULL) {
|
|
|
|
PyErr_SetObject(PyExc_StopIteration, value);
|
|
|
|
Py_DECREF(value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-05-08 22:29:10 +08:00
|
|
|
static PyObject *
|
2009-05-02 20:36:44 +08:00
|
|
|
_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
|
|
|
|
/* return (rval, idx) tuple, stealing reference to rval */
|
|
|
|
PyObject *tpl;
|
|
|
|
PyObject *pyidx;
|
|
|
|
/*
|
|
|
|
steal a reference to rval, returns (rval, idx)
|
|
|
|
*/
|
|
|
|
if (rval == NULL) {
|
|
|
|
return NULL;
|
2008-07-20 06:26:35 +08:00
|
|
|
}
|
2009-05-02 20:36:44 +08:00
|
|
|
pyidx = PyLong_FromSsize_t(idx);
|
|
|
|
if (pyidx == NULL) {
|
|
|
|
Py_DECREF(rval);
|
|
|
|
return NULL;
|
2008-05-08 22:29:10 +08:00
|
|
|
}
|
2009-05-02 20:36:44 +08:00
|
|
|
tpl = PyTuple_New(2);
|
|
|
|
if (tpl == NULL) {
|
|
|
|
Py_DECREF(pyidx);
|
|
|
|
Py_DECREF(rval);
|
|
|
|
return NULL;
|
2008-05-08 22:29:10 +08:00
|
|
|
}
|
2009-05-02 20:36:44 +08:00
|
|
|
PyTuple_SET_ITEM(tpl, 0, rval);
|
|
|
|
PyTuple_SET_ITEM(tpl, 1, pyidx);
|
|
|
|
return tpl;
|
2008-05-08 22:29:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
2009-05-02 20:36:44 +08:00
|
|
|
scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
|
2008-05-08 22:29:10 +08:00
|
|
|
{
|
2009-05-02 20:36:44 +08:00
|
|
|
/* Read the JSON string from PyUnicode pystr.
|
|
|
|
end is the index of the first character after the quote.
|
|
|
|
if strict is zero then literal control characters are allowed
|
|
|
|
*next_end_ptr is a return-by-reference index of the character
|
|
|
|
after the end quote
|
|
|
|
|
|
|
|
Return value is a new PyUnicode
|
|
|
|
*/
|
2010-09-05 04:16:53 +08:00
|
|
|
PyObject *rval = NULL;
|
2011-09-28 13:41:54 +08:00
|
|
|
Py_ssize_t len;
|
2008-05-08 22:29:10 +08:00
|
|
|
Py_ssize_t begin = end - 1;
|
2011-02-23 04:15:44 +08:00
|
|
|
Py_ssize_t next /* = begin */;
|
2011-09-28 13:41:54 +08:00
|
|
|
const void *buf;
|
|
|
|
int kind;
|
2010-09-05 04:16:53 +08:00
|
|
|
|
2011-09-28 13:41:54 +08:00
|
|
|
if (PyUnicode_READY(pystr) == -1)
|
|
|
|
return 0;
|
|
|
|
|
2019-10-17 15:12:41 +08:00
|
|
|
_PyUnicodeWriter writer;
|
|
|
|
_PyUnicodeWriter_Init(&writer);
|
|
|
|
writer.overallocate = 1;
|
|
|
|
|
2011-09-28 13:41:54 +08:00
|
|
|
len = PyUnicode_GET_LENGTH(pystr);
|
|
|
|
buf = PyUnicode_DATA(pystr);
|
|
|
|
kind = PyUnicode_KIND(pystr);
|
|
|
|
|
2013-01-03 14:44:15 +08:00
|
|
|
if (end < 0 || len < end) {
|
2008-07-20 06:26:35 +08:00
|
|
|
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
|
|
|
|
goto bail;
|
|
|
|
}
|
2008-05-08 22:29:10 +08:00
|
|
|
while (1) {
|
|
|
|
/* Find the end of the string or the next escape */
|
2019-08-08 16:57:10 +08:00
|
|
|
Py_UCS4 c;
|
|
|
|
{
|
|
|
|
// Use tight scope variable to help register allocation.
|
|
|
|
Py_UCS4 d = 0;
|
|
|
|
for (next = end; next < len; next++) {
|
|
|
|
d = PyUnicode_READ(kind, buf, next);
|
|
|
|
if (d == '"' || d == '\\') {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (d <= 0x1f && strict) {
|
|
|
|
raise_errmsg("Invalid control character at", pystr, next);
|
|
|
|
goto bail;
|
|
|
|
}
|
2008-05-08 22:29:10 +08:00
|
|
|
}
|
2019-08-08 16:57:10 +08:00
|
|
|
c = d;
|
2008-05-08 22:29:10 +08:00
|
|
|
}
|
2019-10-17 15:12:41 +08:00
|
|
|
|
|
|
|
if (c == '"') {
|
|
|
|
// Fast path for simple case.
|
|
|
|
if (writer.buffer == NULL) {
|
|
|
|
PyObject *ret = PyUnicode_Substring(pystr, end, next);
|
|
|
|
if (ret == NULL) {
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
*next_end_ptr = next + 1;;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (c != '\\') {
|
2008-05-08 22:29:10 +08:00
|
|
|
raise_errmsg("Unterminated string starting at", pystr, begin);
|
|
|
|
goto bail;
|
|
|
|
}
|
2019-10-17 15:12:41 +08:00
|
|
|
|
2008-05-08 22:29:10 +08:00
|
|
|
/* Pick up this chunk if it's not zero length */
|
|
|
|
if (next != end) {
|
2019-10-17 15:12:41 +08:00
|
|
|
if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
|
2008-05-08 22:29:10 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
next++;
|
|
|
|
if (c == '"') {
|
|
|
|
end = next;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (next == len) {
|
|
|
|
raise_errmsg("Unterminated string starting at", pystr, begin);
|
|
|
|
goto bail;
|
|
|
|
}
|
2011-09-28 13:41:54 +08:00
|
|
|
c = PyUnicode_READ(kind, buf, next);
|
2008-05-08 22:29:10 +08:00
|
|
|
if (c != 'u') {
|
|
|
|
/* Non-unicode backslash escapes */
|
|
|
|
end = next + 1;
|
|
|
|
switch (c) {
|
|
|
|
case '"': break;
|
|
|
|
case '\\': break;
|
|
|
|
case '/': break;
|
|
|
|
case 'b': c = '\b'; break;
|
|
|
|
case 'f': c = '\f'; break;
|
|
|
|
case 'n': c = '\n'; break;
|
|
|
|
case 'r': c = '\r'; break;
|
|
|
|
case 't': c = '\t'; break;
|
|
|
|
default: c = 0;
|
|
|
|
}
|
|
|
|
if (c == 0) {
|
|
|
|
raise_errmsg("Invalid \\escape", pystr, end - 2);
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
c = 0;
|
|
|
|
next++;
|
|
|
|
end = next + 4;
|
|
|
|
if (end >= len) {
|
|
|
|
raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
/* Decode 4 hex digits */
|
|
|
|
for (; next < end; next++) {
|
2011-09-28 13:41:54 +08:00
|
|
|
Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
|
2009-05-02 20:36:44 +08:00
|
|
|
c <<= 4;
|
2008-05-08 22:29:10 +08:00
|
|
|
switch (digit) {
|
|
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
|
|
case '5': case '6': case '7': case '8': case '9':
|
2009-05-02 20:36:44 +08:00
|
|
|
c |= (digit - '0'); break;
|
2008-05-08 22:29:10 +08:00
|
|
|
case 'a': case 'b': case 'c': case 'd': case 'e':
|
|
|
|
case 'f':
|
2009-05-02 20:36:44 +08:00
|
|
|
c |= (digit - 'a' + 10); break;
|
2008-05-08 22:29:10 +08:00
|
|
|
case 'A': case 'B': case 'C': case 'D': case 'E':
|
|
|
|
case 'F':
|
2009-05-02 20:36:44 +08:00
|
|
|
c |= (digit - 'A' + 10); break;
|
2008-05-08 22:29:10 +08:00
|
|
|
default:
|
|
|
|
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* Surrogate pair */
|
2013-11-27 03:25:28 +08:00
|
|
|
if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
|
|
|
|
PyUnicode_READ(kind, buf, next++) == '\\' &&
|
|
|
|
PyUnicode_READ(kind, buf, next++) == 'u') {
|
2011-09-28 13:41:54 +08:00
|
|
|
Py_UCS4 c2 = 0;
|
2008-05-08 22:29:10 +08:00
|
|
|
end += 6;
|
|
|
|
/* Decode 4 hex digits */
|
|
|
|
for (; next < end; next++) {
|
2011-09-28 13:41:54 +08:00
|
|
|
Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
|
2010-10-09 23:24:28 +08:00
|
|
|
c2 <<= 4;
|
2008-05-08 22:29:10 +08:00
|
|
|
switch (digit) {
|
|
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
|
|
case '5': case '6': case '7': case '8': case '9':
|
2009-05-02 20:36:44 +08:00
|
|
|
c2 |= (digit - '0'); break;
|
2008-05-08 22:29:10 +08:00
|
|
|
case 'a': case 'b': case 'c': case 'd': case 'e':
|
|
|
|
case 'f':
|
2009-05-02 20:36:44 +08:00
|
|
|
c2 |= (digit - 'a' + 10); break;
|
2008-05-08 22:29:10 +08:00
|
|
|
case 'A': case 'B': case 'C': case 'D': case 'E':
|
|
|
|
case 'F':
|
2009-05-02 20:36:44 +08:00
|
|
|
c2 |= (digit - 'A' + 10); break;
|
2008-05-08 22:29:10 +08:00
|
|
|
default:
|
|
|
|
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
}
|
2013-11-27 03:25:28 +08:00
|
|
|
if (Py_UNICODE_IS_LOW_SURROGATE(c2))
|
|
|
|
c = Py_UNICODE_JOIN_SURROGATES(c, c2);
|
|
|
|
else
|
|
|
|
end -= 6;
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
2008-05-08 22:29:10 +08:00
|
|
|
}
|
2019-10-17 15:12:41 +08:00
|
|
|
if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
|
2008-05-08 22:29:10 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
2010-09-05 04:16:53 +08:00
|
|
|
}
|
|
|
|
|
2019-10-17 15:12:41 +08:00
|
|
|
rval = _PyUnicodeWriter_Finish(&writer);
|
2009-05-02 20:36:44 +08:00
|
|
|
*next_end_ptr = end;
|
|
|
|
return rval;
|
2019-10-17 15:12:41 +08:00
|
|
|
|
2008-05-08 22:29:10 +08:00
|
|
|
bail:
|
2009-05-02 20:36:44 +08:00
|
|
|
*next_end_ptr = -1;
|
2019-10-17 15:12:41 +08:00
|
|
|
_PyUnicodeWriter_Dealloc(&writer);
|
2008-05-08 22:29:10 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
PyDoc_STRVAR(pydoc_scanstring,
|
2010-08-03 04:16:18 +08:00
|
|
|
"scanstring(string, end, strict=True) -> (string, end)\n"
|
2009-05-02 20:36:44 +08:00
|
|
|
"\n"
|
|
|
|
"Scan the string s for a JSON string. End is the index of the\n"
|
|
|
|
"character in s after the quote that started the JSON string.\n"
|
|
|
|
"Unescapes all valid JSON string escape sequences and raises ValueError\n"
|
|
|
|
"on attempt to decode an invalid string. If strict is False then literal\n"
|
|
|
|
"control characters are allowed in the string.\n"
|
|
|
|
"\n"
|
|
|
|
"Returns a tuple of the decoded string and the index of the character in s\n"
|
|
|
|
"after the end quote."
|
|
|
|
);
|
2008-05-08 22:29:10 +08:00
|
|
|
|
|
|
|
static PyObject *
|
2019-11-05 18:44:28 +08:00
|
|
|
py_scanstring(PyObject* Py_UNUSED(self), PyObject *args)
|
2008-05-08 22:29:10 +08:00
|
|
|
{
|
|
|
|
PyObject *pystr;
|
2009-05-02 20:36:44 +08:00
|
|
|
PyObject *rval;
|
2008-05-08 22:29:10 +08:00
|
|
|
Py_ssize_t end;
|
2009-05-02 20:36:44 +08:00
|
|
|
Py_ssize_t next_end = -1;
|
|
|
|
int strict = 1;
|
2012-12-02 02:34:16 +08:00
|
|
|
if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
|
2008-05-08 22:29:10 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
2009-05-02 20:36:44 +08:00
|
|
|
if (PyUnicode_Check(pystr)) {
|
|
|
|
rval = scanstring_unicode(pystr, end, strict, &next_end);
|
2008-05-08 22:29:10 +08:00
|
|
|
}
|
|
|
|
else {
|
2010-05-09 23:52:27 +08:00
|
|
|
PyErr_Format(PyExc_TypeError,
|
2010-08-03 04:16:18 +08:00
|
|
|
"first argument must be a string, not %.80s",
|
2008-05-08 22:29:10 +08:00
|
|
|
Py_TYPE(pystr)->tp_name);
|
|
|
|
return NULL;
|
|
|
|
}
|
2009-05-02 20:36:44 +08:00
|
|
|
return _build_rval_index_tuple(rval, next_end);
|
2008-05-08 22:29:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
PyDoc_STRVAR(pydoc_encode_basestring_ascii,
|
2010-08-03 04:16:18 +08:00
|
|
|
"encode_basestring_ascii(string) -> string\n"
|
2009-05-02 20:36:44 +08:00
|
|
|
"\n"
|
|
|
|
"Return an ASCII-only JSON representation of a Python string"
|
|
|
|
);
|
2008-05-08 22:29:10 +08:00
|
|
|
|
|
|
|
static PyObject *
|
2019-11-05 18:44:28 +08:00
|
|
|
py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr)
|
2008-05-08 22:29:10 +08:00
|
|
|
{
|
|
|
|
PyObject *rval;
|
2009-05-02 20:36:44 +08:00
|
|
|
/* Return an ASCII-only JSON representation of a Python string */
|
2008-05-08 22:29:10 +08:00
|
|
|
/* METH_O */
|
2009-05-02 20:36:44 +08:00
|
|
|
if (PyUnicode_Check(pystr)) {
|
2008-05-08 22:29:10 +08:00
|
|
|
rval = ascii_escape_unicode(pystr);
|
|
|
|
}
|
|
|
|
else {
|
2009-05-02 20:36:44 +08:00
|
|
|
PyErr_Format(PyExc_TypeError,
|
|
|
|
"first argument must be a string, not %.80s",
|
2008-05-08 22:29:10 +08:00
|
|
|
Py_TYPE(pystr)->tp_name);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return rval;
|
|
|
|
}
|
|
|
|
|
2015-01-11 23:41:01 +08:00
|
|
|
|
|
|
|
PyDoc_STRVAR(pydoc_encode_basestring,
|
|
|
|
"encode_basestring(string) -> string\n"
|
|
|
|
"\n"
|
|
|
|
"Return a JSON representation of a Python string"
|
|
|
|
);
|
|
|
|
|
|
|
|
static PyObject *
|
2019-11-05 18:44:28 +08:00
|
|
|
py_encode_basestring(PyObject* Py_UNUSED(self), PyObject *pystr)
|
2015-01-11 23:41:01 +08:00
|
|
|
{
|
|
|
|
PyObject *rval;
|
|
|
|
/* Return a JSON representation of a Python string */
|
|
|
|
/* METH_O */
|
|
|
|
if (PyUnicode_Check(pystr)) {
|
|
|
|
rval = escape_unicode(pystr);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
PyErr_Format(PyExc_TypeError,
|
|
|
|
"first argument must be a string, not %.80s",
|
|
|
|
Py_TYPE(pystr)->tp_name);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return rval;
|
|
|
|
}
|
|
|
|
|
2009-05-02 20:36:44 +08:00
|
|
|
static void
|
|
|
|
scanner_dealloc(PyObject *self)
|
|
|
|
{
|
2017-08-24 13:55:17 +08:00
|
|
|
/* bpo-31095: UnTrack is needed before calling any callbacks */
|
|
|
|
PyObject_GC_UnTrack(self);
|
2009-05-02 20:36:44 +08:00
|
|
|
scanner_clear(self);
|
|
|
|
Py_TYPE(self)->tp_free(self);
|
|
|
|
}
|
2008-05-08 22:29:10 +08:00
|
|
|
|
2009-05-02 20:36:44 +08:00
|
|
|
static int
|
|
|
|
scanner_traverse(PyObject *self, visitproc visit, void *arg)
|
|
|
|
{
|
|
|
|
PyScannerObject *s;
|
|
|
|
assert(PyScanner_Check(self));
|
|
|
|
s = (PyScannerObject *)self;
|
|
|
|
Py_VISIT(s->object_hook);
|
|
|
|
Py_VISIT(s->object_pairs_hook);
|
|
|
|
Py_VISIT(s->parse_float);
|
|
|
|
Py_VISIT(s->parse_int);
|
|
|
|
Py_VISIT(s->parse_constant);
|
|
|
|
return 0;
|
|
|
|
}
|
2008-06-11 13:26:20 +08:00
|
|
|
|
2009-05-02 20:36:44 +08:00
|
|
|
static int
|
|
|
|
scanner_clear(PyObject *self)
|
2008-05-08 22:29:10 +08:00
|
|
|
{
|
2009-05-02 20:36:44 +08:00
|
|
|
PyScannerObject *s;
|
|
|
|
assert(PyScanner_Check(self));
|
|
|
|
s = (PyScannerObject *)self;
|
|
|
|
Py_CLEAR(s->object_hook);
|
|
|
|
Py_CLEAR(s->object_pairs_hook);
|
|
|
|
Py_CLEAR(s->parse_float);
|
|
|
|
Py_CLEAR(s->parse_int);
|
|
|
|
Py_CLEAR(s->parse_constant);
|
2010-09-05 04:16:53 +08:00
|
|
|
Py_CLEAR(s->memo);
|
2009-05-02 20:36:44 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
2017-05-28 20:31:49 +08:00
|
|
|
_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
|
|
|
|
{
|
2009-05-02 20:36:44 +08:00
|
|
|
/* Read a JSON object from PyUnicode pystr.
|
|
|
|
idx is the index of the first character after the opening curly brace.
|
|
|
|
*next_idx_ptr is a return-by-reference index to the first character after
|
|
|
|
the closing curly brace.
|
|
|
|
|
|
|
|
Returns a new PyObject (usually a dict, but object_hook can change that)
|
|
|
|
*/
|
2011-09-28 13:41:54 +08:00
|
|
|
void *str;
|
|
|
|
int kind;
|
|
|
|
Py_ssize_t end_idx;
|
2009-05-02 20:36:44 +08:00
|
|
|
PyObject *val = NULL;
|
2010-09-05 04:16:53 +08:00
|
|
|
PyObject *rval = NULL;
|
2009-05-02 20:36:44 +08:00
|
|
|
PyObject *key = NULL;
|
2010-09-05 04:16:53 +08:00
|
|
|
int has_pairs_hook = (s->object_pairs_hook != Py_None);
|
2009-05-02 20:36:44 +08:00
|
|
|
Py_ssize_t next_idx;
|
2010-09-05 04:16:53 +08:00
|
|
|
|
2011-09-28 13:41:54 +08:00
|
|
|
if (PyUnicode_READY(pystr) == -1)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
str = PyUnicode_DATA(pystr);
|
|
|
|
kind = PyUnicode_KIND(pystr);
|
|
|
|
end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
|
|
|
|
|
2010-09-05 04:16:53 +08:00
|
|
|
if (has_pairs_hook)
|
|
|
|
rval = PyList_New(0);
|
|
|
|
else
|
|
|
|
rval = PyDict_New();
|
2009-05-02 20:36:44 +08:00
|
|
|
if (rval == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* skip whitespace after { */
|
2011-09-28 13:41:54 +08:00
|
|
|
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
|
2009-05-02 20:36:44 +08:00
|
|
|
|
|
|
|
/* only loop if the object is non-empty */
|
2013-01-03 14:44:15 +08:00
|
|
|
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
|
|
|
|
while (1) {
|
2010-09-05 04:16:53 +08:00
|
|
|
PyObject *memokey;
|
|
|
|
|
2009-05-02 20:36:44 +08:00
|
|
|
/* read key */
|
2013-01-03 14:44:15 +08:00
|
|
|
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
|
2012-06-29 07:58:26 +08:00
|
|
|
raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
2017-05-28 20:31:49 +08:00
|
|
|
key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
|
2009-05-02 20:36:44 +08:00
|
|
|
if (key == NULL)
|
|
|
|
goto bail;
|
2019-08-08 16:57:10 +08:00
|
|
|
memokey = PyDict_SetDefault(s->memo, key, key);
|
|
|
|
if (memokey == NULL) {
|
2019-02-25 23:59:46 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
2019-08-08 16:57:10 +08:00
|
|
|
Py_INCREF(memokey);
|
|
|
|
Py_DECREF(key);
|
|
|
|
key = memokey;
|
2009-05-02 20:36:44 +08:00
|
|
|
idx = next_idx;
|
|
|
|
|
|
|
|
/* skip whitespace between key and : delimiter, read :, skip whitespace */
|
2011-09-28 13:41:54 +08:00
|
|
|
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
|
|
|
|
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
|
2012-06-29 07:58:26 +08:00
|
|
|
raise_errmsg("Expecting ':' delimiter", pystr, idx);
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
idx++;
|
2011-09-28 13:41:54 +08:00
|
|
|
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
|
2009-05-02 20:36:44 +08:00
|
|
|
|
|
|
|
/* read any JSON term */
|
|
|
|
val = scan_once_unicode(s, pystr, idx, &next_idx);
|
|
|
|
if (val == NULL)
|
|
|
|
goto bail;
|
|
|
|
|
2010-09-05 04:16:53 +08:00
|
|
|
if (has_pairs_hook) {
|
|
|
|
PyObject *item = PyTuple_Pack(2, key, val);
|
|
|
|
if (item == NULL)
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
2010-09-05 04:16:53 +08:00
|
|
|
Py_CLEAR(key);
|
|
|
|
Py_CLEAR(val);
|
|
|
|
if (PyList_Append(rval, item) == -1) {
|
|
|
|
Py_DECREF(item);
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
2010-09-05 04:16:53 +08:00
|
|
|
Py_DECREF(item);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (PyDict_SetItem(rval, key, val) < 0)
|
|
|
|
goto bail;
|
|
|
|
Py_CLEAR(key);
|
|
|
|
Py_CLEAR(val);
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
idx = next_idx;
|
|
|
|
|
|
|
|
/* skip whitespace before } or , */
|
2011-09-28 13:41:54 +08:00
|
|
|
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
|
2009-05-02 20:36:44 +08:00
|
|
|
|
|
|
|
/* bail if the object is closed or we didn't get the , delimiter */
|
2013-01-03 14:44:15 +08:00
|
|
|
if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
|
2009-05-02 20:36:44 +08:00
|
|
|
break;
|
2013-01-03 14:44:15 +08:00
|
|
|
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
|
2012-06-29 07:58:26 +08:00
|
|
|
raise_errmsg("Expecting ',' delimiter", pystr, idx);
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
idx++;
|
|
|
|
|
|
|
|
/* skip whitespace after , delimiter */
|
2011-09-28 13:41:54 +08:00
|
|
|
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*next_idx_ptr = idx + 1;
|
|
|
|
|
2010-09-05 04:16:53 +08:00
|
|
|
if (has_pairs_hook) {
|
2019-07-04 18:31:34 +08:00
|
|
|
val = _PyObject_CallOneArg(s->object_pairs_hook, rval);
|
2009-05-02 20:36:44 +08:00
|
|
|
Py_DECREF(rval);
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if object_hook is not None: rval = object_hook(rval) */
|
|
|
|
if (s->object_hook != Py_None) {
|
2019-07-04 18:31:34 +08:00
|
|
|
val = _PyObject_CallOneArg(s->object_hook, rval);
|
2009-05-02 20:36:44 +08:00
|
|
|
Py_DECREF(rval);
|
2010-09-05 04:16:53 +08:00
|
|
|
return val;
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
return rval;
|
|
|
|
bail:
|
|
|
|
Py_XDECREF(key);
|
|
|
|
Py_XDECREF(val);
|
2010-09-05 04:16:53 +08:00
|
|
|
Py_XDECREF(rval);
|
2009-05-02 20:36:44 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
|
2015-02-17 16:14:30 +08:00
|
|
|
/* Read a JSON array from PyUnicode pystr.
|
2009-05-02 20:36:44 +08:00
|
|
|
idx is the index of the first character after the opening brace.
|
|
|
|
*next_idx_ptr is a return-by-reference index to the first character after
|
|
|
|
the closing brace.
|
|
|
|
|
|
|
|
Returns a new PyList
|
|
|
|
*/
|
2011-09-28 13:41:54 +08:00
|
|
|
void *str;
|
|
|
|
int kind;
|
|
|
|
Py_ssize_t end_idx;
|
2009-05-02 20:36:44 +08:00
|
|
|
PyObject *val = NULL;
|
2017-01-03 17:17:44 +08:00
|
|
|
PyObject *rval;
|
2009-05-02 20:36:44 +08:00
|
|
|
Py_ssize_t next_idx;
|
|
|
|
|
2011-09-28 13:41:54 +08:00
|
|
|
if (PyUnicode_READY(pystr) == -1)
|
|
|
|
return NULL;
|
|
|
|
|
2017-01-03 17:17:44 +08:00
|
|
|
rval = PyList_New(0);
|
|
|
|
if (rval == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
2011-09-28 13:41:54 +08:00
|
|
|
str = PyUnicode_DATA(pystr);
|
|
|
|
kind = PyUnicode_KIND(pystr);
|
|
|
|
end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
|
|
|
|
|
2009-05-02 20:36:44 +08:00
|
|
|
/* skip whitespace after [ */
|
2011-09-28 13:41:54 +08:00
|
|
|
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
|
2009-05-02 20:36:44 +08:00
|
|
|
|
|
|
|
/* only loop if the array is non-empty */
|
2013-01-03 14:44:15 +08:00
|
|
|
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
|
|
|
|
while (1) {
|
2009-05-02 20:36:44 +08:00
|
|
|
|
|
|
|
/* read any JSON term */
|
|
|
|
val = scan_once_unicode(s, pystr, idx, &next_idx);
|
|
|
|
if (val == NULL)
|
|
|
|
goto bail;
|
|
|
|
|
|
|
|
if (PyList_Append(rval, val) == -1)
|
|
|
|
goto bail;
|
|
|
|
|
|
|
|
Py_CLEAR(val);
|
|
|
|
idx = next_idx;
|
|
|
|
|
|
|
|
/* skip whitespace between term and , */
|
2011-09-28 13:41:54 +08:00
|
|
|
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
|
2009-05-02 20:36:44 +08:00
|
|
|
|
|
|
|
/* bail if the array is closed or we didn't get the , delimiter */
|
2013-01-03 14:44:15 +08:00
|
|
|
if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
|
2009-05-02 20:36:44 +08:00
|
|
|
break;
|
2013-01-03 14:44:15 +08:00
|
|
|
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
|
2012-06-29 07:58:26 +08:00
|
|
|
raise_errmsg("Expecting ',' delimiter", pystr, idx);
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
idx++;
|
|
|
|
|
|
|
|
/* skip whitespace after , */
|
2011-09-28 13:41:54 +08:00
|
|
|
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-09-28 13:41:54 +08:00
|
|
|
/* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
|
|
|
|
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
|
2013-01-03 14:44:15 +08:00
|
|
|
raise_errmsg("Expecting value", pystr, end_idx);
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
*next_idx_ptr = idx + 1;
|
|
|
|
return rval;
|
|
|
|
bail:
|
|
|
|
Py_XDECREF(val);
|
|
|
|
Py_DECREF(rval);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
2015-02-17 16:14:30 +08:00
|
|
|
_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
|
|
|
|
/* Read a JSON constant.
|
2009-05-02 20:36:44 +08:00
|
|
|
constant is the constant string that was found
|
|
|
|
("NaN", "Infinity", "-Infinity").
|
|
|
|
idx is the index of the first character of the constant
|
|
|
|
*next_idx_ptr is a return-by-reference index to the first character after
|
|
|
|
the constant.
|
|
|
|
|
|
|
|
Returns the result of parse_constant
|
|
|
|
*/
|
|
|
|
PyObject *cstr;
|
|
|
|
PyObject *rval;
|
|
|
|
/* constant is "NaN", "Infinity", or "-Infinity" */
|
|
|
|
cstr = PyUnicode_InternFromString(constant);
|
|
|
|
if (cstr == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* rval = parse_constant(constant) */
|
2019-07-04 18:31:34 +08:00
|
|
|
rval = _PyObject_CallOneArg(s->parse_constant, cstr);
|
2011-10-12 04:11:42 +08:00
|
|
|
idx += PyUnicode_GET_LENGTH(cstr);
|
2009-05-02 20:36:44 +08:00
|
|
|
Py_DECREF(cstr);
|
|
|
|
*next_idx_ptr = idx;
|
|
|
|
return rval;
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
|
|
|
|
/* Read a JSON number from PyUnicode pystr.
|
|
|
|
idx is the index of the first character of the number
|
|
|
|
*next_idx_ptr is a return-by-reference index to the first character after
|
|
|
|
the number.
|
|
|
|
|
|
|
|
Returns a new PyObject representation of that number:
|
2015-02-17 16:14:30 +08:00
|
|
|
PyLong, or PyFloat.
|
2009-05-02 20:36:44 +08:00
|
|
|
May return other types if parse_int or parse_float are set
|
|
|
|
*/
|
2011-09-28 13:41:54 +08:00
|
|
|
void *str;
|
|
|
|
int kind;
|
|
|
|
Py_ssize_t end_idx;
|
2009-05-02 20:36:44 +08:00
|
|
|
Py_ssize_t idx = start;
|
|
|
|
int is_float = 0;
|
|
|
|
PyObject *rval;
|
2011-04-26 01:16:06 +08:00
|
|
|
PyObject *numstr = NULL;
|
|
|
|
PyObject *custom_func;
|
2009-05-02 20:36:44 +08:00
|
|
|
|
2011-09-28 13:41:54 +08:00
|
|
|
if (PyUnicode_READY(pystr) == -1)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
str = PyUnicode_DATA(pystr);
|
|
|
|
kind = PyUnicode_KIND(pystr);
|
|
|
|
end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
|
|
|
|
|
2009-05-02 20:36:44 +08:00
|
|
|
/* read a sign if it's there, make sure it's not the end of the string */
|
2011-09-28 13:41:54 +08:00
|
|
|
if (PyUnicode_READ(kind, str, idx) == '-') {
|
2009-05-02 20:36:44 +08:00
|
|
|
idx++;
|
|
|
|
if (idx > end_idx) {
|
2013-01-03 14:44:15 +08:00
|
|
|
raise_stop_iteration(start);
|
2009-05-02 20:36:44 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* read as many integer digits as we find as long as it doesn't start with 0 */
|
2011-09-28 13:41:54 +08:00
|
|
|
if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
|
2009-05-02 20:36:44 +08:00
|
|
|
idx++;
|
2011-09-28 13:41:54 +08:00
|
|
|
while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
/* if it starts with 0 we only expect one integer digit */
|
2011-09-28 13:41:54 +08:00
|
|
|
else if (PyUnicode_READ(kind, str, idx) == '0') {
|
2009-05-02 20:36:44 +08:00
|
|
|
idx++;
|
|
|
|
}
|
|
|
|
/* no integer digits, error */
|
|
|
|
else {
|
2013-01-03 14:44:15 +08:00
|
|
|
raise_stop_iteration(start);
|
2009-05-02 20:36:44 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if the next char is '.' followed by a digit then read all float digits */
|
2011-09-28 13:41:54 +08:00
|
|
|
if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
|
2009-05-02 20:36:44 +08:00
|
|
|
is_float = 1;
|
|
|
|
idx += 2;
|
2011-09-28 13:41:54 +08:00
|
|
|
while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
|
2011-09-28 13:41:54 +08:00
|
|
|
if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
|
2009-05-02 20:36:44 +08:00
|
|
|
Py_ssize_t e_start = idx;
|
|
|
|
idx++;
|
|
|
|
|
|
|
|
/* read an exponent sign if present */
|
2011-09-28 13:41:54 +08:00
|
|
|
if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
|
2009-05-02 20:36:44 +08:00
|
|
|
|
|
|
|
/* read all digits */
|
2011-09-28 13:41:54 +08:00
|
|
|
while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
|
2009-05-02 20:36:44 +08:00
|
|
|
|
|
|
|
/* if we got a digit, then parse as float. if not, backtrack */
|
2011-09-28 13:41:54 +08:00
|
|
|
if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
|
2009-05-02 20:36:44 +08:00
|
|
|
is_float = 1;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
idx = e_start;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-04-26 01:16:06 +08:00
|
|
|
if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
|
|
|
|
custom_func = s->parse_float;
|
|
|
|
else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
|
|
|
|
custom_func = s->parse_int;
|
|
|
|
else
|
|
|
|
custom_func = NULL;
|
|
|
|
|
|
|
|
if (custom_func) {
|
|
|
|
/* copy the section we determined to be a number */
|
2011-09-28 13:41:54 +08:00
|
|
|
numstr = PyUnicode_FromKindAndData(kind,
|
2011-10-08 02:55:35 +08:00
|
|
|
(char*)str + kind * start,
|
2011-09-28 13:41:54 +08:00
|
|
|
idx - start);
|
2011-04-26 01:16:06 +08:00
|
|
|
if (numstr == NULL)
|
|
|
|
return NULL;
|
2019-07-04 18:31:34 +08:00
|
|
|
rval = _PyObject_CallOneArg(custom_func, numstr);
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
else {
|
2011-04-26 01:16:06 +08:00
|
|
|
Py_ssize_t i, n;
|
|
|
|
char *buf;
|
|
|
|
/* Straight conversion to ASCII, to avoid costly conversion of
|
|
|
|
decimal unicode digits (which cannot appear here) */
|
|
|
|
n = idx - start;
|
|
|
|
numstr = PyBytes_FromStringAndSize(NULL, n);
|
|
|
|
if (numstr == NULL)
|
|
|
|
return NULL;
|
|
|
|
buf = PyBytes_AS_STRING(numstr);
|
|
|
|
for (i = 0; i < n; i++) {
|
2011-09-28 13:41:54 +08:00
|
|
|
buf[i] = (char) PyUnicode_READ(kind, str, i + start);
|
2011-04-26 01:16:06 +08:00
|
|
|
}
|
|
|
|
if (is_float)
|
|
|
|
rval = PyFloat_FromString(numstr);
|
|
|
|
else
|
|
|
|
rval = PyLong_FromString(buf, NULL, 10);
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
Py_DECREF(numstr);
|
|
|
|
*next_idx_ptr = idx;
|
|
|
|
return rval;
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
|
|
|
|
{
|
|
|
|
/* Read one JSON term (of any kind) from PyUnicode pystr.
|
|
|
|
idx is the index of the first character of the term
|
|
|
|
*next_idx_ptr is a return-by-reference index to the first character after
|
|
|
|
the number.
|
|
|
|
|
|
|
|
Returns a new PyObject representation of the term.
|
|
|
|
*/
|
2011-05-07 22:58:09 +08:00
|
|
|
PyObject *res;
|
2011-09-28 13:41:54 +08:00
|
|
|
void *str;
|
|
|
|
int kind;
|
|
|
|
Py_ssize_t length;
|
|
|
|
|
|
|
|
if (PyUnicode_READY(pystr) == -1)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
str = PyUnicode_DATA(pystr);
|
|
|
|
kind = PyUnicode_KIND(pystr);
|
|
|
|
length = PyUnicode_GET_LENGTH(pystr);
|
|
|
|
|
2014-04-14 23:45:21 +08:00
|
|
|
if (idx < 0) {
|
2014-04-14 23:46:51 +08:00
|
|
|
PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
|
2014-04-14 23:45:21 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (idx >= length) {
|
2013-01-03 14:44:15 +08:00
|
|
|
raise_stop_iteration(idx);
|
2009-05-02 20:36:44 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
2011-09-28 13:41:54 +08:00
|
|
|
|
|
|
|
switch (PyUnicode_READ(kind, str, idx)) {
|
2009-05-02 20:36:44 +08:00
|
|
|
case '"':
|
|
|
|
/* string */
|
2017-05-28 20:31:49 +08:00
|
|
|
return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
|
2009-05-02 20:36:44 +08:00
|
|
|
case '{':
|
|
|
|
/* object */
|
2011-05-07 22:58:09 +08:00
|
|
|
if (Py_EnterRecursiveCall(" while decoding a JSON object "
|
|
|
|
"from a unicode string"))
|
|
|
|
return NULL;
|
|
|
|
res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
|
|
|
|
Py_LeaveRecursiveCall();
|
|
|
|
return res;
|
2009-05-02 20:36:44 +08:00
|
|
|
case '[':
|
|
|
|
/* array */
|
2011-05-07 22:58:09 +08:00
|
|
|
if (Py_EnterRecursiveCall(" while decoding a JSON array "
|
|
|
|
"from a unicode string"))
|
|
|
|
return NULL;
|
|
|
|
res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
|
|
|
|
Py_LeaveRecursiveCall();
|
|
|
|
return res;
|
2009-05-02 20:36:44 +08:00
|
|
|
case 'n':
|
|
|
|
/* null */
|
2011-09-28 13:41:54 +08:00
|
|
|
if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
|
2009-05-02 20:36:44 +08:00
|
|
|
*next_idx_ptr = idx + 4;
|
2017-01-23 16:23:58 +08:00
|
|
|
Py_RETURN_NONE;
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 't':
|
|
|
|
/* true */
|
2011-09-28 13:41:54 +08:00
|
|
|
if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
|
2009-05-02 20:36:44 +08:00
|
|
|
*next_idx_ptr = idx + 4;
|
2017-01-23 16:23:58 +08:00
|
|
|
Py_RETURN_TRUE;
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'f':
|
|
|
|
/* false */
|
2011-10-12 03:56:19 +08:00
|
|
|
if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
|
|
|
|
PyUnicode_READ(kind, str, idx + 2) == 'l' &&
|
|
|
|
PyUnicode_READ(kind, str, idx + 3) == 's' &&
|
2011-09-28 13:41:54 +08:00
|
|
|
PyUnicode_READ(kind, str, idx + 4) == 'e') {
|
2009-05-02 20:36:44 +08:00
|
|
|
*next_idx_ptr = idx + 5;
|
2017-01-23 16:23:58 +08:00
|
|
|
Py_RETURN_FALSE;
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'N':
|
|
|
|
/* NaN */
|
2011-10-12 03:56:19 +08:00
|
|
|
if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
|
2011-09-28 13:41:54 +08:00
|
|
|
PyUnicode_READ(kind, str, idx + 2) == 'N') {
|
2009-05-02 20:36:44 +08:00
|
|
|
return _parse_constant(s, "NaN", idx, next_idx_ptr);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'I':
|
|
|
|
/* Infinity */
|
2011-10-12 03:56:19 +08:00
|
|
|
if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
|
|
|
|
PyUnicode_READ(kind, str, idx + 2) == 'f' &&
|
|
|
|
PyUnicode_READ(kind, str, idx + 3) == 'i' &&
|
2011-09-28 13:41:54 +08:00
|
|
|
PyUnicode_READ(kind, str, idx + 4) == 'n' &&
|
2011-10-12 03:56:19 +08:00
|
|
|
PyUnicode_READ(kind, str, idx + 5) == 'i' &&
|
|
|
|
PyUnicode_READ(kind, str, idx + 6) == 't' &&
|
2011-09-28 13:41:54 +08:00
|
|
|
PyUnicode_READ(kind, str, idx + 7) == 'y') {
|
2009-05-02 20:36:44 +08:00
|
|
|
return _parse_constant(s, "Infinity", idx, next_idx_ptr);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case '-':
|
|
|
|
/* -Infinity */
|
2011-10-12 03:56:19 +08:00
|
|
|
if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
|
2011-09-28 13:41:54 +08:00
|
|
|
PyUnicode_READ(kind, str, idx + 2) == 'n' &&
|
|
|
|
PyUnicode_READ(kind, str, idx + 3) == 'f' &&
|
2011-10-12 03:56:19 +08:00
|
|
|
PyUnicode_READ(kind, str, idx + 4) == 'i' &&
|
2011-09-28 13:41:54 +08:00
|
|
|
PyUnicode_READ(kind, str, idx + 5) == 'n' &&
|
2011-10-12 03:56:19 +08:00
|
|
|
PyUnicode_READ(kind, str, idx + 6) == 'i' &&
|
|
|
|
PyUnicode_READ(kind, str, idx + 7) == 't' &&
|
2011-09-28 13:41:54 +08:00
|
|
|
PyUnicode_READ(kind, str, idx + 8) == 'y') {
|
2009-05-02 20:36:44 +08:00
|
|
|
return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* Didn't find a string, object, array, or named constant. Look for a number. */
|
|
|
|
return _match_number_unicode(s, pystr, idx, next_idx_ptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
|
|
|
|
{
|
|
|
|
/* Python callable interface to scan_once_{str,unicode} */
|
|
|
|
PyObject *pystr;
|
|
|
|
PyObject *rval;
|
|
|
|
Py_ssize_t idx;
|
|
|
|
Py_ssize_t next_idx = -1;
|
|
|
|
static char *kwlist[] = {"string", "idx", NULL};
|
|
|
|
PyScannerObject *s;
|
|
|
|
assert(PyScanner_Check(self));
|
|
|
|
s = (PyScannerObject *)self;
|
2012-12-02 02:34:16 +08:00
|
|
|
if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
|
2009-05-02 20:36:44 +08:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (PyUnicode_Check(pystr)) {
|
|
|
|
rval = scan_once_unicode(s, pystr, idx, &next_idx);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
PyErr_Format(PyExc_TypeError,
|
|
|
|
"first argument must be a string, not %.80s",
|
|
|
|
Py_TYPE(pystr)->tp_name);
|
|
|
|
return NULL;
|
|
|
|
}
|
2010-09-05 04:16:53 +08:00
|
|
|
PyDict_Clear(s->memo);
|
|
|
|
if (rval == NULL)
|
|
|
|
return NULL;
|
2009-05-02 20:36:44 +08:00
|
|
|
return _build_rval_index_tuple(rval, next_idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|
|
|
{
|
|
|
|
PyScannerObject *s;
|
|
|
|
PyObject *ctx;
|
2017-05-28 20:31:49 +08:00
|
|
|
PyObject *strict;
|
2009-05-02 20:36:44 +08:00
|
|
|
static char *kwlist[] = {"context", NULL};
|
|
|
|
|
|
|
|
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
|
2017-05-05 15:08:49 +08:00
|
|
|
return NULL;
|
2009-05-02 20:36:44 +08:00
|
|
|
|
2017-05-05 15:08:49 +08:00
|
|
|
s = (PyScannerObject *)type->tp_alloc(type, 0);
|
|
|
|
if (s == NULL) {
|
|
|
|
return NULL;
|
2010-09-05 04:16:53 +08:00
|
|
|
}
|
|
|
|
|
2017-05-05 15:08:49 +08:00
|
|
|
s->memo = PyDict_New();
|
|
|
|
if (s->memo == NULL)
|
|
|
|
goto bail;
|
|
|
|
|
2009-05-02 20:36:44 +08:00
|
|
|
/* All of these will fail "gracefully" so we don't need to verify them */
|
2017-05-28 20:31:49 +08:00
|
|
|
strict = PyObject_GetAttrString(ctx, "strict");
|
|
|
|
if (strict == NULL)
|
|
|
|
goto bail;
|
|
|
|
s->strict = PyObject_IsTrue(strict);
|
|
|
|
Py_DECREF(strict);
|
|
|
|
if (s->strict < 0)
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
|
|
|
s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
|
|
|
|
if (s->object_hook == NULL)
|
|
|
|
goto bail;
|
|
|
|
s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
|
|
|
|
if (s->object_pairs_hook == NULL)
|
|
|
|
goto bail;
|
|
|
|
s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
|
|
|
|
if (s->parse_float == NULL)
|
|
|
|
goto bail;
|
|
|
|
s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
|
|
|
|
if (s->parse_int == NULL)
|
|
|
|
goto bail;
|
|
|
|
s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
|
|
|
|
if (s->parse_constant == NULL)
|
|
|
|
goto bail;
|
|
|
|
|
2017-05-05 15:08:49 +08:00
|
|
|
return (PyObject *)s;
|
2009-05-02 20:36:44 +08:00
|
|
|
|
|
|
|
bail:
|
2017-05-05 15:08:49 +08:00
|
|
|
Py_DECREF(s);
|
|
|
|
return NULL;
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
PyDoc_STRVAR(scanner_doc, "JSON scanner object");
|
|
|
|
|
|
|
|
static
|
|
|
|
PyTypeObject PyScannerType = {
|
|
|
|
PyVarObject_HEAD_INIT(NULL, 0)
|
|
|
|
"_json.Scanner", /* tp_name */
|
|
|
|
sizeof(PyScannerObject), /* tp_basicsize */
|
|
|
|
0, /* tp_itemsize */
|
|
|
|
scanner_dealloc, /* tp_dealloc */
|
2019-05-31 10:13:39 +08:00
|
|
|
0, /* tp_vectorcall_offset */
|
2009-05-02 20:36:44 +08:00
|
|
|
0, /* tp_getattr */
|
|
|
|
0, /* tp_setattr */
|
2019-05-31 10:13:39 +08:00
|
|
|
0, /* tp_as_async */
|
2009-05-02 20:36:44 +08:00
|
|
|
0, /* tp_repr */
|
|
|
|
0, /* tp_as_number */
|
|
|
|
0, /* tp_as_sequence */
|
|
|
|
0, /* tp_as_mapping */
|
|
|
|
0, /* tp_hash */
|
|
|
|
scanner_call, /* tp_call */
|
|
|
|
0, /* tp_str */
|
|
|
|
0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
|
|
|
|
0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
|
|
|
|
0, /* tp_as_buffer */
|
|
|
|
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
|
|
|
|
scanner_doc, /* tp_doc */
|
|
|
|
scanner_traverse, /* tp_traverse */
|
|
|
|
scanner_clear, /* tp_clear */
|
|
|
|
0, /* tp_richcompare */
|
|
|
|
0, /* tp_weaklistoffset */
|
|
|
|
0, /* tp_iter */
|
|
|
|
0, /* tp_iternext */
|
|
|
|
0, /* tp_methods */
|
|
|
|
scanner_members, /* tp_members */
|
|
|
|
0, /* tp_getset */
|
|
|
|
0, /* tp_base */
|
|
|
|
0, /* tp_dict */
|
|
|
|
0, /* tp_descr_get */
|
|
|
|
0, /* tp_descr_set */
|
|
|
|
0, /* tp_dictoffset */
|
2017-05-05 15:08:49 +08:00
|
|
|
0, /* tp_init */
|
2009-05-02 20:36:44 +08:00
|
|
|
0,/* PyType_GenericAlloc, */ /* tp_alloc */
|
|
|
|
scanner_new, /* tp_new */
|
|
|
|
0,/* PyObject_GC_Del, */ /* tp_free */
|
|
|
|
};
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|
|
|
{
|
|
|
|
static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
|
|
|
|
|
|
|
|
PyEncoderObject *s;
|
2009-12-08 23:57:31 +08:00
|
|
|
PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
|
2017-05-28 20:31:49 +08:00
|
|
|
PyObject *item_separator;
|
|
|
|
int sort_keys, skipkeys, allow_nan;
|
2009-05-02 20:36:44 +08:00
|
|
|
|
2017-05-28 20:31:49 +08:00
|
|
|
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
|
2015-07-26 14:01:22 +08:00
|
|
|
&markers, &defaultfn, &encoder, &indent,
|
|
|
|
&key_separator, &item_separator,
|
2009-12-08 23:57:31 +08:00
|
|
|
&sort_keys, &skipkeys, &allow_nan))
|
2017-05-05 15:08:49 +08:00
|
|
|
return NULL;
|
2009-05-02 20:36:44 +08:00
|
|
|
|
2015-07-26 14:01:22 +08:00
|
|
|
if (markers != Py_None && !PyDict_Check(markers)) {
|
|
|
|
PyErr_Format(PyExc_TypeError,
|
|
|
|
"make_encoder() argument 1 must be dict or None, "
|
|
|
|
"not %.200s", Py_TYPE(markers)->tp_name);
|
2017-05-05 15:08:49 +08:00
|
|
|
return NULL;
|
2015-07-26 14:01:22 +08:00
|
|
|
}
|
|
|
|
|
2017-05-05 15:08:49 +08:00
|
|
|
s = (PyEncoderObject *)type->tp_alloc(type, 0);
|
|
|
|
if (s == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
2009-12-08 23:57:31 +08:00
|
|
|
s->markers = markers;
|
|
|
|
s->defaultfn = defaultfn;
|
|
|
|
s->encoder = encoder;
|
|
|
|
s->indent = indent;
|
|
|
|
s->key_separator = key_separator;
|
|
|
|
s->item_separator = item_separator;
|
|
|
|
s->sort_keys = sort_keys;
|
|
|
|
s->skipkeys = skipkeys;
|
2017-05-28 20:31:49 +08:00
|
|
|
s->allow_nan = allow_nan;
|
2015-01-11 23:41:01 +08:00
|
|
|
s->fast_encode = NULL;
|
|
|
|
if (PyCFunction_Check(s->encoder)) {
|
|
|
|
PyCFunction f = PyCFunction_GetFunction(s->encoder);
|
|
|
|
if (f == (PyCFunction)py_encode_basestring_ascii ||
|
|
|
|
f == (PyCFunction)py_encode_basestring) {
|
|
|
|
s->fast_encode = f;
|
|
|
|
}
|
|
|
|
}
|
2009-12-08 23:57:31 +08:00
|
|
|
|
2009-05-02 20:36:44 +08:00
|
|
|
Py_INCREF(s->markers);
|
|
|
|
Py_INCREF(s->defaultfn);
|
|
|
|
Py_INCREF(s->encoder);
|
|
|
|
Py_INCREF(s->indent);
|
|
|
|
Py_INCREF(s->key_separator);
|
|
|
|
Py_INCREF(s->item_separator);
|
2017-05-05 15:08:49 +08:00
|
|
|
return (PyObject *)s;
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
|
|
|
|
{
|
|
|
|
/* Python callable interface to encode_listencode_obj */
|
|
|
|
static char *kwlist[] = {"obj", "_current_indent_level", NULL};
|
|
|
|
PyObject *obj;
|
|
|
|
Py_ssize_t indent_level;
|
|
|
|
PyEncoderObject *s;
|
2011-10-07 01:09:51 +08:00
|
|
|
_PyAccu acc;
|
2011-08-20 00:03:14 +08:00
|
|
|
|
2009-05-02 20:36:44 +08:00
|
|
|
assert(PyEncoder_Check(self));
|
|
|
|
s = (PyEncoderObject *)self;
|
2012-12-02 02:34:16 +08:00
|
|
|
if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
|
|
|
|
&obj, &indent_level))
|
2009-05-02 20:36:44 +08:00
|
|
|
return NULL;
|
2011-10-07 01:09:51 +08:00
|
|
|
if (_PyAccu_Init(&acc))
|
2009-05-02 20:36:44 +08:00
|
|
|
return NULL;
|
2011-08-20 00:03:14 +08:00
|
|
|
if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
|
2011-10-07 01:09:51 +08:00
|
|
|
_PyAccu_Destroy(&acc);
|
2009-05-02 20:36:44 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
2011-10-07 01:09:51 +08:00
|
|
|
return _PyAccu_FinishAsList(&acc);
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
_encoded_const(PyObject *obj)
|
|
|
|
{
|
|
|
|
/* Return the JSON string representation of None, True, False */
|
|
|
|
if (obj == Py_None) {
|
|
|
|
static PyObject *s_null = NULL;
|
|
|
|
if (s_null == NULL) {
|
|
|
|
s_null = PyUnicode_InternFromString("null");
|
|
|
|
}
|
2018-09-13 05:05:20 +08:00
|
|
|
Py_XINCREF(s_null);
|
2009-05-02 20:36:44 +08:00
|
|
|
return s_null;
|
|
|
|
}
|
|
|
|
else if (obj == Py_True) {
|
|
|
|
static PyObject *s_true = NULL;
|
|
|
|
if (s_true == NULL) {
|
|
|
|
s_true = PyUnicode_InternFromString("true");
|
|
|
|
}
|
2018-09-13 05:05:20 +08:00
|
|
|
Py_XINCREF(s_true);
|
2009-05-02 20:36:44 +08:00
|
|
|
return s_true;
|
|
|
|
}
|
|
|
|
else if (obj == Py_False) {
|
|
|
|
static PyObject *s_false = NULL;
|
|
|
|
if (s_false == NULL) {
|
|
|
|
s_false = PyUnicode_InternFromString("false");
|
|
|
|
}
|
2018-09-13 05:05:20 +08:00
|
|
|
Py_XINCREF(s_false);
|
2009-05-02 20:36:44 +08:00
|
|
|
return s_false;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
PyErr_SetString(PyExc_ValueError, "not a const");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
encoder_encode_float(PyEncoderObject *s, PyObject *obj)
|
|
|
|
{
|
2016-04-10 19:41:19 +08:00
|
|
|
/* Return the JSON representation of a PyFloat. */
|
2009-05-02 20:36:44 +08:00
|
|
|
double i = PyFloat_AS_DOUBLE(obj);
|
|
|
|
if (!Py_IS_FINITE(i)) {
|
|
|
|
if (!s->allow_nan) {
|
2013-08-11 04:01:45 +08:00
|
|
|
PyErr_SetString(
|
|
|
|
PyExc_ValueError,
|
|
|
|
"Out of range float values are not JSON compliant"
|
|
|
|
);
|
2009-05-02 20:36:44 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (i > 0) {
|
|
|
|
return PyUnicode_FromString("Infinity");
|
|
|
|
}
|
|
|
|
else if (i < 0) {
|
|
|
|
return PyUnicode_FromString("-Infinity");
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return PyUnicode_FromString("NaN");
|
|
|
|
}
|
|
|
|
}
|
2016-04-10 19:41:19 +08:00
|
|
|
return PyFloat_Type.tp_repr(obj);
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
encoder_encode_string(PyEncoderObject *s, PyObject *obj)
|
|
|
|
{
|
|
|
|
/* Return the JSON representation of a string */
|
2017-09-24 17:07:12 +08:00
|
|
|
PyObject *encoded;
|
|
|
|
|
|
|
|
if (s->fast_encode) {
|
2015-01-11 23:41:01 +08:00
|
|
|
return s->fast_encode(NULL, obj);
|
2017-09-24 17:07:12 +08:00
|
|
|
}
|
2019-07-04 18:31:34 +08:00
|
|
|
encoded = _PyObject_CallOneArg(s->encoder, obj);
|
2017-09-24 17:07:12 +08:00
|
|
|
if (encoded != NULL && !PyUnicode_Check(encoded)) {
|
|
|
|
PyErr_Format(PyExc_TypeError,
|
|
|
|
"encoder() must return a string, not %.80s",
|
|
|
|
Py_TYPE(encoded)->tp_name);
|
|
|
|
Py_DECREF(encoded);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return encoded;
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2011-10-07 01:09:51 +08:00
|
|
|
_steal_accumulate(_PyAccu *acc, PyObject *stolen)
|
2009-05-02 20:36:44 +08:00
|
|
|
{
|
|
|
|
/* Append stolen and then decrement its reference count */
|
2011-10-07 01:09:51 +08:00
|
|
|
int rval = _PyAccu_Accumulate(acc, stolen);
|
2009-05-02 20:36:44 +08:00
|
|
|
Py_DECREF(stolen);
|
|
|
|
return rval;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2011-10-07 01:09:51 +08:00
|
|
|
encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
|
2011-08-20 00:03:14 +08:00
|
|
|
PyObject *obj, Py_ssize_t indent_level)
|
2009-05-02 20:36:44 +08:00
|
|
|
{
|
2011-08-20 00:03:14 +08:00
|
|
|
/* Encode Python object obj to a JSON term */
|
2009-05-02 20:36:44 +08:00
|
|
|
PyObject *newobj;
|
|
|
|
int rv;
|
|
|
|
|
|
|
|
if (obj == Py_None || obj == Py_True || obj == Py_False) {
|
|
|
|
PyObject *cstr = _encoded_const(obj);
|
|
|
|
if (cstr == NULL)
|
|
|
|
return -1;
|
2011-08-20 00:03:14 +08:00
|
|
|
return _steal_accumulate(acc, cstr);
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
else if (PyUnicode_Check(obj))
|
|
|
|
{
|
|
|
|
PyObject *encoded = encoder_encode_string(s, obj);
|
|
|
|
if (encoded == NULL)
|
|
|
|
return -1;
|
2011-08-20 00:03:14 +08:00
|
|
|
return _steal_accumulate(acc, encoded);
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
else if (PyLong_Check(obj)) {
|
2019-05-07 03:29:40 +08:00
|
|
|
PyObject *encoded = PyLong_Type.tp_repr(obj);
|
2009-05-02 20:36:44 +08:00
|
|
|
if (encoded == NULL)
|
|
|
|
return -1;
|
2011-08-20 00:03:14 +08:00
|
|
|
return _steal_accumulate(acc, encoded);
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
else if (PyFloat_Check(obj)) {
|
|
|
|
PyObject *encoded = encoder_encode_float(s, obj);
|
|
|
|
if (encoded == NULL)
|
|
|
|
return -1;
|
2011-08-20 00:03:14 +08:00
|
|
|
return _steal_accumulate(acc, encoded);
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
else if (PyList_Check(obj) || PyTuple_Check(obj)) {
|
2011-05-11 06:02:56 +08:00
|
|
|
if (Py_EnterRecursiveCall(" while encoding a JSON object"))
|
|
|
|
return -1;
|
2011-08-20 00:03:14 +08:00
|
|
|
rv = encoder_listencode_list(s, acc, obj, indent_level);
|
2011-05-11 06:02:56 +08:00
|
|
|
Py_LeaveRecursiveCall();
|
|
|
|
return rv;
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
else if (PyDict_Check(obj)) {
|
2011-05-11 06:02:56 +08:00
|
|
|
if (Py_EnterRecursiveCall(" while encoding a JSON object"))
|
|
|
|
return -1;
|
2011-08-20 00:03:14 +08:00
|
|
|
rv = encoder_listencode_dict(s, acc, obj, indent_level);
|
2011-05-11 06:02:56 +08:00
|
|
|
Py_LeaveRecursiveCall();
|
|
|
|
return rv;
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
PyObject *ident = NULL;
|
|
|
|
if (s->markers != Py_None) {
|
|
|
|
int has_key;
|
|
|
|
ident = PyLong_FromVoidPtr(obj);
|
|
|
|
if (ident == NULL)
|
|
|
|
return -1;
|
|
|
|
has_key = PyDict_Contains(s->markers, ident);
|
|
|
|
if (has_key) {
|
|
|
|
if (has_key != -1)
|
|
|
|
PyErr_SetString(PyExc_ValueError, "Circular reference detected");
|
|
|
|
Py_DECREF(ident);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (PyDict_SetItem(s->markers, ident, obj)) {
|
|
|
|
Py_DECREF(ident);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
2019-07-04 18:31:34 +08:00
|
|
|
newobj = _PyObject_CallOneArg(s->defaultfn, obj);
|
2009-05-02 20:36:44 +08:00
|
|
|
if (newobj == NULL) {
|
|
|
|
Py_XDECREF(ident);
|
|
|
|
return -1;
|
|
|
|
}
|
2011-05-11 06:02:56 +08:00
|
|
|
|
2017-01-03 17:17:44 +08:00
|
|
|
if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
|
|
|
|
Py_DECREF(newobj);
|
|
|
|
Py_XDECREF(ident);
|
2011-05-11 06:02:56 +08:00
|
|
|
return -1;
|
2017-01-03 17:17:44 +08:00
|
|
|
}
|
2011-08-20 00:03:14 +08:00
|
|
|
rv = encoder_listencode_obj(s, acc, newobj, indent_level);
|
2011-05-11 06:02:56 +08:00
|
|
|
Py_LeaveRecursiveCall();
|
|
|
|
|
2009-05-02 20:36:44 +08:00
|
|
|
Py_DECREF(newobj);
|
|
|
|
if (rv) {
|
|
|
|
Py_XDECREF(ident);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (ident != NULL) {
|
|
|
|
if (PyDict_DelItem(s->markers, ident)) {
|
|
|
|
Py_XDECREF(ident);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
Py_XDECREF(ident);
|
|
|
|
}
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2011-10-07 01:09:51 +08:00
|
|
|
encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
|
2011-08-20 00:03:14 +08:00
|
|
|
PyObject *dct, Py_ssize_t indent_level)
|
2009-05-02 20:36:44 +08:00
|
|
|
{
|
2011-08-20 00:03:14 +08:00
|
|
|
/* Encode Python dict dct a JSON term */
|
2009-05-02 20:36:44 +08:00
|
|
|
static PyObject *open_dict = NULL;
|
|
|
|
static PyObject *close_dict = NULL;
|
|
|
|
static PyObject *empty_dict = NULL;
|
|
|
|
PyObject *kstr = NULL;
|
|
|
|
PyObject *ident = NULL;
|
2009-05-27 14:50:31 +08:00
|
|
|
PyObject *it = NULL;
|
2009-05-27 17:58:34 +08:00
|
|
|
PyObject *items;
|
|
|
|
PyObject *item = NULL;
|
2009-05-02 20:36:44 +08:00
|
|
|
Py_ssize_t idx;
|
|
|
|
|
|
|
|
if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
|
|
|
|
open_dict = PyUnicode_InternFromString("{");
|
|
|
|
close_dict = PyUnicode_InternFromString("}");
|
|
|
|
empty_dict = PyUnicode_InternFromString("{}");
|
|
|
|
if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
|
|
|
|
return -1;
|
|
|
|
}
|
2017-01-13 14:38:15 +08:00
|
|
|
if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
|
2011-10-07 01:09:51 +08:00
|
|
|
return _PyAccu_Accumulate(acc, empty_dict);
|
2009-05-02 20:36:44 +08:00
|
|
|
|
|
|
|
if (s->markers != Py_None) {
|
|
|
|
int has_key;
|
|
|
|
ident = PyLong_FromVoidPtr(dct);
|
|
|
|
if (ident == NULL)
|
|
|
|
goto bail;
|
|
|
|
has_key = PyDict_Contains(s->markers, ident);
|
|
|
|
if (has_key) {
|
|
|
|
if (has_key != -1)
|
|
|
|
PyErr_SetString(PyExc_ValueError, "Circular reference detected");
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
if (PyDict_SetItem(s->markers, ident, dct)) {
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-10-07 01:09:51 +08:00
|
|
|
if (_PyAccu_Accumulate(acc, open_dict))
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
|
|
|
|
|
|
|
if (s->indent != Py_None) {
|
|
|
|
/* TODO: DOES NOT RUN */
|
|
|
|
indent_level += 1;
|
|
|
|
/*
|
|
|
|
newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
|
|
|
|
separator = _item_separator + newline_indent
|
|
|
|
buf += newline_indent
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
2015-05-03 10:28:04 +08:00
|
|
|
items = PyMapping_Items(dct);
|
2010-11-05 00:51:32 +08:00
|
|
|
if (items == NULL)
|
2009-05-27 19:19:02 +08:00
|
|
|
goto bail;
|
2017-07-16 12:29:16 +08:00
|
|
|
if (s->sort_keys && PyList_Sort(items) < 0) {
|
|
|
|
Py_DECREF(items);
|
2015-05-03 10:28:04 +08:00
|
|
|
goto bail;
|
2017-07-16 12:29:16 +08:00
|
|
|
}
|
2009-05-27 17:58:34 +08:00
|
|
|
it = PyObject_GetIter(items);
|
2010-11-05 00:51:32 +08:00
|
|
|
Py_DECREF(items);
|
|
|
|
if (it == NULL)
|
2009-05-27 14:50:31 +08:00
|
|
|
goto bail;
|
2009-05-02 20:36:44 +08:00
|
|
|
idx = 0;
|
2009-05-27 17:58:34 +08:00
|
|
|
while ((item = PyIter_Next(it)) != NULL) {
|
|
|
|
PyObject *encoded, *key, *value;
|
2017-03-21 14:53:25 +08:00
|
|
|
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
|
2009-05-27 17:58:34 +08:00
|
|
|
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
key = PyTuple_GET_ITEM(item, 0);
|
2009-05-02 20:36:44 +08:00
|
|
|
if (PyUnicode_Check(key)) {
|
|
|
|
Py_INCREF(key);
|
|
|
|
kstr = key;
|
|
|
|
}
|
|
|
|
else if (PyFloat_Check(key)) {
|
|
|
|
kstr = encoder_encode_float(s, key);
|
|
|
|
if (kstr == NULL)
|
|
|
|
goto bail;
|
|
|
|
}
|
2009-05-27 17:58:34 +08:00
|
|
|
else if (key == Py_True || key == Py_False || key == Py_None) {
|
2010-05-09 23:52:27 +08:00
|
|
|
/* This must come before the PyLong_Check because
|
|
|
|
True and False are also 1 and 0.*/
|
2009-05-27 17:58:34 +08:00
|
|
|
kstr = _encoded_const(key);
|
2009-05-02 20:36:44 +08:00
|
|
|
if (kstr == NULL)
|
|
|
|
goto bail;
|
|
|
|
}
|
2009-05-27 17:58:34 +08:00
|
|
|
else if (PyLong_Check(key)) {
|
2019-05-07 03:29:40 +08:00
|
|
|
kstr = PyLong_Type.tp_repr(key);
|
2013-08-11 04:01:45 +08:00
|
|
|
if (kstr == NULL) {
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
2013-08-11 04:01:45 +08:00
|
|
|
}
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
2017-05-28 20:31:49 +08:00
|
|
|
else if (s->skipkeys) {
|
2009-05-27 17:58:34 +08:00
|
|
|
Py_DECREF(item);
|
2009-05-02 20:36:44 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else {
|
2017-11-25 23:38:20 +08:00
|
|
|
PyErr_Format(PyExc_TypeError,
|
|
|
|
"keys must be str, int, float, bool or None, "
|
|
|
|
"not %.100s", key->ob_type->tp_name);
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (idx) {
|
2011-10-07 01:09:51 +08:00
|
|
|
if (_PyAccu_Accumulate(acc, s->item_separator))
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
|
|
|
|
encoded = encoder_encode_string(s, kstr);
|
|
|
|
Py_CLEAR(kstr);
|
|
|
|
if (encoded == NULL)
|
|
|
|
goto bail;
|
2011-10-07 01:09:51 +08:00
|
|
|
if (_PyAccu_Accumulate(acc, encoded)) {
|
2009-05-02 20:36:44 +08:00
|
|
|
Py_DECREF(encoded);
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
Py_DECREF(encoded);
|
2011-10-07 01:09:51 +08:00
|
|
|
if (_PyAccu_Accumulate(acc, s->key_separator))
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
2009-05-27 14:50:31 +08:00
|
|
|
|
2009-05-27 17:58:34 +08:00
|
|
|
value = PyTuple_GET_ITEM(item, 1);
|
2011-08-20 00:03:14 +08:00
|
|
|
if (encoder_listencode_obj(s, acc, value, indent_level))
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
|
|
|
idx += 1;
|
2009-05-27 17:58:34 +08:00
|
|
|
Py_DECREF(item);
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
2009-05-27 14:50:31 +08:00
|
|
|
if (PyErr_Occurred())
|
|
|
|
goto bail;
|
|
|
|
Py_CLEAR(it);
|
|
|
|
|
2009-05-02 20:36:44 +08:00
|
|
|
if (ident != NULL) {
|
|
|
|
if (PyDict_DelItem(s->markers, ident))
|
|
|
|
goto bail;
|
|
|
|
Py_CLEAR(ident);
|
|
|
|
}
|
2011-02-23 04:15:44 +08:00
|
|
|
/* TODO DOES NOT RUN; dead code
|
2009-05-02 20:36:44 +08:00
|
|
|
if (s->indent != Py_None) {
|
|
|
|
indent_level -= 1;
|
2011-02-23 04:15:44 +08:00
|
|
|
|
|
|
|
yield '\n' + (' ' * (_indent * _current_indent_level))
|
|
|
|
}*/
|
2011-10-07 01:09:51 +08:00
|
|
|
if (_PyAccu_Accumulate(acc, close_dict))
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
bail:
|
2009-05-27 14:50:31 +08:00
|
|
|
Py_XDECREF(it);
|
2009-05-27 17:58:34 +08:00
|
|
|
Py_XDECREF(item);
|
2009-05-02 20:36:44 +08:00
|
|
|
Py_XDECREF(kstr);
|
|
|
|
Py_XDECREF(ident);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int
|
2011-10-07 01:09:51 +08:00
|
|
|
encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
|
2011-08-20 00:03:14 +08:00
|
|
|
PyObject *seq, Py_ssize_t indent_level)
|
2009-05-02 20:36:44 +08:00
|
|
|
{
|
2011-08-20 00:03:14 +08:00
|
|
|
/* Encode Python list seq to a JSON term */
|
2009-05-02 20:36:44 +08:00
|
|
|
static PyObject *open_array = NULL;
|
|
|
|
static PyObject *close_array = NULL;
|
|
|
|
static PyObject *empty_array = NULL;
|
|
|
|
PyObject *ident = NULL;
|
|
|
|
PyObject *s_fast = NULL;
|
|
|
|
Py_ssize_t i;
|
|
|
|
|
|
|
|
if (open_array == NULL || close_array == NULL || empty_array == NULL) {
|
|
|
|
open_array = PyUnicode_InternFromString("[");
|
|
|
|
close_array = PyUnicode_InternFromString("]");
|
|
|
|
empty_array = PyUnicode_InternFromString("[]");
|
|
|
|
if (open_array == NULL || close_array == NULL || empty_array == NULL)
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
ident = NULL;
|
|
|
|
s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
|
|
|
|
if (s_fast == NULL)
|
|
|
|
return -1;
|
2012-11-02 02:52:06 +08:00
|
|
|
if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
|
2009-05-02 20:36:44 +08:00
|
|
|
Py_DECREF(s_fast);
|
2011-10-07 01:09:51 +08:00
|
|
|
return _PyAccu_Accumulate(acc, empty_array);
|
2009-05-02 20:36:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (s->markers != Py_None) {
|
|
|
|
int has_key;
|
|
|
|
ident = PyLong_FromVoidPtr(seq);
|
|
|
|
if (ident == NULL)
|
|
|
|
goto bail;
|
|
|
|
has_key = PyDict_Contains(s->markers, ident);
|
|
|
|
if (has_key) {
|
|
|
|
if (has_key != -1)
|
|
|
|
PyErr_SetString(PyExc_ValueError, "Circular reference detected");
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
if (PyDict_SetItem(s->markers, ident, seq)) {
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-10-07 01:09:51 +08:00
|
|
|
if (_PyAccu_Accumulate(acc, open_array))
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
|
|
|
if (s->indent != Py_None) {
|
|
|
|
/* TODO: DOES NOT RUN */
|
|
|
|
indent_level += 1;
|
|
|
|
/*
|
|
|
|
newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
|
|
|
|
separator = _item_separator + newline_indent
|
|
|
|
buf += newline_indent
|
|
|
|
*/
|
|
|
|
}
|
2012-11-02 02:52:06 +08:00
|
|
|
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
|
|
|
|
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
|
2009-05-02 20:36:44 +08:00
|
|
|
if (i) {
|
2011-10-07 01:09:51 +08:00
|
|
|
if (_PyAccu_Accumulate(acc, s->item_separator))
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
2011-08-20 00:03:14 +08:00
|
|
|
if (encoder_listencode_obj(s, acc, obj, indent_level))
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
if (ident != NULL) {
|
|
|
|
if (PyDict_DelItem(s->markers, ident))
|
|
|
|
goto bail;
|
|
|
|
Py_CLEAR(ident);
|
|
|
|
}
|
2011-02-23 04:15:44 +08:00
|
|
|
|
|
|
|
/* TODO: DOES NOT RUN
|
2009-05-02 20:36:44 +08:00
|
|
|
if (s->indent != Py_None) {
|
|
|
|
indent_level -= 1;
|
2011-02-23 04:15:44 +08:00
|
|
|
|
|
|
|
yield '\n' + (' ' * (_indent * _current_indent_level))
|
|
|
|
}*/
|
2011-10-07 01:09:51 +08:00
|
|
|
if (_PyAccu_Accumulate(acc, close_array))
|
2009-05-02 20:36:44 +08:00
|
|
|
goto bail;
|
|
|
|
Py_DECREF(s_fast);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
bail:
|
|
|
|
Py_XDECREF(ident);
|
|
|
|
Py_DECREF(s_fast);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
encoder_dealloc(PyObject *self)
|
|
|
|
{
|
2017-08-24 13:55:17 +08:00
|
|
|
/* bpo-31095: UnTrack is needed before calling any callbacks */
|
|
|
|
PyObject_GC_UnTrack(self);
|
2009-05-02 20:36:44 +08:00
|
|
|
encoder_clear(self);
|
|
|
|
Py_TYPE(self)->tp_free(self);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
encoder_traverse(PyObject *self, visitproc visit, void *arg)
|
|
|
|
{
|
|
|
|
PyEncoderObject *s;
|
|
|
|
assert(PyEncoder_Check(self));
|
|
|
|
s = (PyEncoderObject *)self;
|
|
|
|
Py_VISIT(s->markers);
|
|
|
|
Py_VISIT(s->defaultfn);
|
|
|
|
Py_VISIT(s->encoder);
|
|
|
|
Py_VISIT(s->indent);
|
|
|
|
Py_VISIT(s->key_separator);
|
|
|
|
Py_VISIT(s->item_separator);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
encoder_clear(PyObject *self)
|
|
|
|
{
|
|
|
|
/* Deallocate Encoder */
|
|
|
|
PyEncoderObject *s;
|
|
|
|
assert(PyEncoder_Check(self));
|
|
|
|
s = (PyEncoderObject *)self;
|
|
|
|
Py_CLEAR(s->markers);
|
|
|
|
Py_CLEAR(s->defaultfn);
|
|
|
|
Py_CLEAR(s->encoder);
|
|
|
|
Py_CLEAR(s->indent);
|
|
|
|
Py_CLEAR(s->key_separator);
|
|
|
|
Py_CLEAR(s->item_separator);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
|
|
|
|
|
|
|
|
static
|
|
|
|
PyTypeObject PyEncoderType = {
|
|
|
|
PyVarObject_HEAD_INIT(NULL, 0)
|
|
|
|
"_json.Encoder", /* tp_name */
|
|
|
|
sizeof(PyEncoderObject), /* tp_basicsize */
|
|
|
|
0, /* tp_itemsize */
|
|
|
|
encoder_dealloc, /* tp_dealloc */
|
2019-05-31 10:13:39 +08:00
|
|
|
0, /* tp_vectorcall_offset */
|
2009-05-02 20:36:44 +08:00
|
|
|
0, /* tp_getattr */
|
|
|
|
0, /* tp_setattr */
|
2019-05-31 10:13:39 +08:00
|
|
|
0, /* tp_as_async */
|
2009-05-02 20:36:44 +08:00
|
|
|
0, /* tp_repr */
|
|
|
|
0, /* tp_as_number */
|
|
|
|
0, /* tp_as_sequence */
|
|
|
|
0, /* tp_as_mapping */
|
|
|
|
0, /* tp_hash */
|
|
|
|
encoder_call, /* tp_call */
|
|
|
|
0, /* tp_str */
|
|
|
|
0, /* tp_getattro */
|
|
|
|
0, /* tp_setattro */
|
|
|
|
0, /* tp_as_buffer */
|
|
|
|
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
|
|
|
|
encoder_doc, /* tp_doc */
|
|
|
|
encoder_traverse, /* tp_traverse */
|
|
|
|
encoder_clear, /* tp_clear */
|
|
|
|
0, /* tp_richcompare */
|
|
|
|
0, /* tp_weaklistoffset */
|
|
|
|
0, /* tp_iter */
|
|
|
|
0, /* tp_iternext */
|
|
|
|
0, /* tp_methods */
|
|
|
|
encoder_members, /* tp_members */
|
|
|
|
0, /* tp_getset */
|
|
|
|
0, /* tp_base */
|
|
|
|
0, /* tp_dict */
|
|
|
|
0, /* tp_descr_get */
|
|
|
|
0, /* tp_descr_set */
|
|
|
|
0, /* tp_dictoffset */
|
2017-05-05 15:08:49 +08:00
|
|
|
0, /* tp_init */
|
2009-05-02 20:36:44 +08:00
|
|
|
0, /* tp_alloc */
|
|
|
|
encoder_new, /* tp_new */
|
|
|
|
0, /* tp_free */
|
|
|
|
};
|
|
|
|
|
|
|
|
static PyMethodDef speedups_methods[] = {
|
|
|
|
{"encode_basestring_ascii",
|
|
|
|
(PyCFunction)py_encode_basestring_ascii,
|
|
|
|
METH_O,
|
|
|
|
pydoc_encode_basestring_ascii},
|
2015-01-11 23:41:01 +08:00
|
|
|
{"encode_basestring",
|
|
|
|
(PyCFunction)py_encode_basestring,
|
|
|
|
METH_O,
|
|
|
|
pydoc_encode_basestring},
|
2009-05-02 20:36:44 +08:00
|
|
|
{"scanstring",
|
|
|
|
(PyCFunction)py_scanstring,
|
|
|
|
METH_VARARGS,
|
|
|
|
pydoc_scanstring},
|
|
|
|
{NULL, NULL, 0, NULL}
|
|
|
|
};
|
|
|
|
|
|
|
|
PyDoc_STRVAR(module_doc,
|
|
|
|
"json speedups\n");
|
|
|
|
|
|
|
|
static struct PyModuleDef jsonmodule = {
|
2010-05-09 23:52:27 +08:00
|
|
|
PyModuleDef_HEAD_INIT,
|
|
|
|
"_json",
|
|
|
|
module_doc,
|
|
|
|
-1,
|
|
|
|
speedups_methods,
|
|
|
|
NULL,
|
|
|
|
NULL,
|
|
|
|
NULL,
|
|
|
|
NULL
|
2009-05-02 20:36:44 +08:00
|
|
|
};
|
|
|
|
|
2015-03-18 00:48:27 +08:00
|
|
|
PyMODINIT_FUNC
|
2009-05-02 20:36:44 +08:00
|
|
|
PyInit__json(void)
|
|
|
|
{
|
|
|
|
PyObject *m = PyModule_Create(&jsonmodule);
|
|
|
|
if (!m)
|
|
|
|
return NULL;
|
|
|
|
if (PyType_Ready(&PyScannerType) < 0)
|
|
|
|
goto fail;
|
|
|
|
if (PyType_Ready(&PyEncoderType) < 0)
|
|
|
|
goto fail;
|
|
|
|
Py_INCREF((PyObject*)&PyScannerType);
|
|
|
|
if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
|
|
|
|
Py_DECREF((PyObject*)&PyScannerType);
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
Py_INCREF((PyObject*)&PyEncoderType);
|
|
|
|
if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
|
|
|
|
Py_DECREF((PyObject*)&PyEncoderType);
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
return m;
|
|
|
|
fail:
|
|
|
|
Py_DECREF(m);
|
|
|
|
return NULL;
|
2008-05-08 22:29:10 +08:00
|
|
|
}
|