2020-04-23 06:29:27 +08:00
|
|
|
#include <Python.h>
|
2021-04-29 13:58:44 +08:00
|
|
|
#include "pycore_ast.h" // _PyAST_Validate(),
|
2020-04-23 06:29:27 +08:00
|
|
|
#include <errcode.h>
|
2020-06-12 00:30:46 +08:00
|
|
|
#include "tokenizer.h"
|
2020-04-23 06:29:27 +08:00
|
|
|
|
|
|
|
#include "pegen.h"
|
2020-06-12 00:30:46 +08:00
|
|
|
#include "string_parser.h"
|
2020-04-23 06:29:27 +08:00
|
|
|
|
2020-05-01 03:12:19 +08:00
|
|
|
PyObject *
|
2021-06-12 21:11:59 +08:00
|
|
|
_PyPegen_new_type_comment(Parser *p, const char *s)
|
2020-05-01 03:12:19 +08:00
|
|
|
{
|
|
|
|
PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
|
|
|
|
if (res == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2021-03-24 09:23:01 +08:00
|
|
|
if (_PyArena_AddPyObject(p->arena, res) < 0) {
|
2020-05-01 03:12:19 +08:00
|
|
|
Py_DECREF(res);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
arg_ty
|
|
|
|
_PyPegen_add_type_comment_to_arg(Parser *p, arg_ty a, Token *tc)
|
|
|
|
{
|
|
|
|
if (tc == NULL) {
|
|
|
|
return a;
|
|
|
|
}
|
2021-06-12 21:11:59 +08:00
|
|
|
const char *bytes = PyBytes_AsString(tc->bytes);
|
2020-05-01 03:12:19 +08:00
|
|
|
if (bytes == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
PyObject *tco = _PyPegen_new_type_comment(p, bytes);
|
|
|
|
if (tco == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_arg(a->arg, a->annotation, tco,
|
|
|
|
a->lineno, a->col_offset, a->end_lineno, a->end_col_offset,
|
|
|
|
p->arena);
|
2020-05-01 03:12:19 +08:00
|
|
|
}
|
|
|
|
|
2020-04-23 06:29:27 +08:00
|
|
|
static int
|
|
|
|
init_normalization(Parser *p)
|
|
|
|
{
|
2020-04-23 23:36:06 +08:00
|
|
|
if (p->normalize) {
|
|
|
|
return 1;
|
|
|
|
}
|
2020-04-23 06:29:27 +08:00
|
|
|
PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
|
|
|
|
if (!m)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
p->normalize = PyObject_GetAttrString(m, "normalize");
|
|
|
|
Py_DECREF(m);
|
|
|
|
if (!p->normalize)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2020-04-28 01:02:07 +08:00
|
|
|
/* Checks if the NOTEQUAL token is valid given the current parser flags
|
|
|
|
0 indicates success and nonzero indicates failure (an exception may be set) */
|
|
|
|
int
|
2020-10-31 07:48:42 +08:00
|
|
|
_PyPegen_check_barry_as_flufl(Parser *p, Token* t) {
|
2020-04-28 01:02:07 +08:00
|
|
|
assert(t->bytes != NULL);
|
|
|
|
assert(t->type == NOTEQUAL);
|
|
|
|
|
2021-06-12 21:11:59 +08:00
|
|
|
const char* tok_str = PyBytes_AS_STRING(t->bytes);
|
2020-06-15 21:23:43 +08:00
|
|
|
if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>") != 0) {
|
2020-04-28 01:02:07 +08:00
|
|
|
RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='");
|
|
|
|
return -1;
|
2020-06-15 21:23:43 +08:00
|
|
|
}
|
|
|
|
if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) {
|
2020-04-28 01:02:07 +08:00
|
|
|
return strcmp(tok_str, "!=");
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-04-23 06:29:27 +08:00
|
|
|
PyObject *
|
2021-06-12 21:11:59 +08:00
|
|
|
_PyPegen_new_identifier(Parser *p, const char *n)
|
2020-04-23 06:29:27 +08:00
|
|
|
{
|
|
|
|
PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
|
|
|
|
if (!id) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
/* PyUnicode_DecodeUTF8 should always return a ready string. */
|
|
|
|
assert(PyUnicode_IS_READY(id));
|
|
|
|
/* Check whether there are non-ASCII characters in the
|
|
|
|
identifier; if so, normalize to NFKC. */
|
|
|
|
if (!PyUnicode_IS_ASCII(id))
|
|
|
|
{
|
|
|
|
PyObject *id2;
|
2020-04-23 23:36:06 +08:00
|
|
|
if (!init_normalization(p))
|
2020-04-23 06:29:27 +08:00
|
|
|
{
|
|
|
|
Py_DECREF(id);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
PyObject *form = PyUnicode_InternFromString("NFKC");
|
|
|
|
if (form == NULL)
|
|
|
|
{
|
|
|
|
Py_DECREF(id);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
PyObject *args[2] = {form, id};
|
|
|
|
id2 = _PyObject_FastCall(p->normalize, args, 2);
|
|
|
|
Py_DECREF(id);
|
|
|
|
Py_DECREF(form);
|
|
|
|
if (!id2) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (!PyUnicode_Check(id2))
|
|
|
|
{
|
|
|
|
PyErr_Format(PyExc_TypeError,
|
|
|
|
"unicodedata.normalize() must return a string, not "
|
|
|
|
"%.200s",
|
|
|
|
_PyType_Name(Py_TYPE(id2)));
|
|
|
|
Py_DECREF(id2);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
id = id2;
|
|
|
|
}
|
|
|
|
PyUnicode_InternInPlace(&id);
|
2021-03-24 09:23:01 +08:00
|
|
|
if (_PyArena_AddPyObject(p->arena, id) < 0)
|
2020-04-23 06:29:27 +08:00
|
|
|
{
|
|
|
|
Py_DECREF(id);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
return id;
|
|
|
|
|
|
|
|
error:
|
|
|
|
p->error_indicator = 1;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
_create_dummy_identifier(Parser *p)
|
|
|
|
{
|
|
|
|
return _PyPegen_new_identifier(p, "");
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline Py_ssize_t
|
2020-06-16 23:49:43 +08:00
|
|
|
byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
|
2020-04-23 06:29:27 +08:00
|
|
|
{
|
|
|
|
const char *str = PyUnicode_AsUTF8(line);
|
2020-04-23 23:36:06 +08:00
|
|
|
if (!str) {
|
|
|
|
return 0;
|
|
|
|
}
|
2021-03-23 00:24:39 +08:00
|
|
|
Py_ssize_t len = strlen(str);
|
2021-04-12 23:59:30 +08:00
|
|
|
if (col_offset > len + 1) {
|
|
|
|
col_offset = len + 1;
|
2021-03-23 00:24:39 +08:00
|
|
|
}
|
|
|
|
assert(col_offset >= 0);
|
2020-05-01 21:13:43 +08:00
|
|
|
PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!text) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
Py_ssize_t size = PyUnicode_GET_LENGTH(text);
|
|
|
|
Py_DECREF(text);
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *
|
|
|
|
_PyPegen_get_expr_name(expr_ty e)
|
|
|
|
{
|
2020-06-08 09:57:00 +08:00
|
|
|
assert(e != NULL);
|
2020-04-23 06:29:27 +08:00
|
|
|
switch (e->kind) {
|
|
|
|
case Attribute_kind:
|
|
|
|
return "attribute";
|
|
|
|
case Subscript_kind:
|
|
|
|
return "subscript";
|
|
|
|
case Starred_kind:
|
|
|
|
return "starred";
|
|
|
|
case Name_kind:
|
|
|
|
return "name";
|
|
|
|
case List_kind:
|
|
|
|
return "list";
|
|
|
|
case Tuple_kind:
|
|
|
|
return "tuple";
|
|
|
|
case Lambda_kind:
|
|
|
|
return "lambda";
|
|
|
|
case Call_kind:
|
|
|
|
return "function call";
|
|
|
|
case BoolOp_kind:
|
|
|
|
case BinOp_kind:
|
|
|
|
case UnaryOp_kind:
|
2021-04-12 23:59:30 +08:00
|
|
|
return "expression";
|
2020-04-23 06:29:27 +08:00
|
|
|
case GeneratorExp_kind:
|
|
|
|
return "generator expression";
|
|
|
|
case Yield_kind:
|
|
|
|
case YieldFrom_kind:
|
|
|
|
return "yield expression";
|
|
|
|
case Await_kind:
|
|
|
|
return "await expression";
|
|
|
|
case ListComp_kind:
|
|
|
|
return "list comprehension";
|
|
|
|
case SetComp_kind:
|
|
|
|
return "set comprehension";
|
|
|
|
case DictComp_kind:
|
|
|
|
return "dict comprehension";
|
|
|
|
case Dict_kind:
|
2021-04-12 23:59:30 +08:00
|
|
|
return "dict literal";
|
2020-04-23 06:29:27 +08:00
|
|
|
case Set_kind:
|
|
|
|
return "set display";
|
|
|
|
case JoinedStr_kind:
|
|
|
|
case FormattedValue_kind:
|
|
|
|
return "f-string expression";
|
|
|
|
case Constant_kind: {
|
|
|
|
PyObject *value = e->v.Constant.value;
|
|
|
|
if (value == Py_None) {
|
|
|
|
return "None";
|
|
|
|
}
|
|
|
|
if (value == Py_False) {
|
|
|
|
return "False";
|
|
|
|
}
|
|
|
|
if (value == Py_True) {
|
|
|
|
return "True";
|
|
|
|
}
|
|
|
|
if (value == Py_Ellipsis) {
|
2021-06-01 19:07:05 +08:00
|
|
|
return "ellipsis";
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
return "literal";
|
|
|
|
}
|
|
|
|
case Compare_kind:
|
|
|
|
return "comparison";
|
|
|
|
case IfExp_kind:
|
|
|
|
return "conditional expression";
|
|
|
|
case NamedExpr_kind:
|
|
|
|
return "named expression";
|
|
|
|
default:
|
|
|
|
PyErr_Format(PyExc_SystemError,
|
|
|
|
"unexpected expression in assignment %d (line %d)",
|
|
|
|
e->kind, e->lineno);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-23 23:36:06 +08:00
|
|
|
static int
|
2020-04-23 06:29:27 +08:00
|
|
|
raise_decode_error(Parser *p)
|
|
|
|
{
|
2020-04-23 23:36:06 +08:00
|
|
|
assert(PyErr_Occurred());
|
2020-04-23 06:29:27 +08:00
|
|
|
const char *errtype = NULL;
|
|
|
|
if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
|
|
|
|
errtype = "unicode error";
|
|
|
|
}
|
|
|
|
else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
|
|
|
|
errtype = "value error";
|
|
|
|
}
|
|
|
|
if (errtype) {
|
2020-06-15 21:23:43 +08:00
|
|
|
PyObject *type;
|
|
|
|
PyObject *value;
|
|
|
|
PyObject *tback;
|
|
|
|
PyObject *errstr;
|
2020-04-23 06:29:27 +08:00
|
|
|
PyErr_Fetch(&type, &value, &tback);
|
|
|
|
errstr = PyObject_Str(value);
|
|
|
|
if (errstr) {
|
|
|
|
RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
|
|
|
|
Py_DECREF(errstr);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
PyErr_Clear();
|
|
|
|
RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
|
|
|
|
}
|
|
|
|
Py_XDECREF(type);
|
|
|
|
Py_XDECREF(value);
|
|
|
|
Py_XDECREF(tback);
|
|
|
|
}
|
2020-04-23 23:36:06 +08:00
|
|
|
|
|
|
|
return -1;
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
2021-01-20 07:59:33 +08:00
|
|
|
static inline void
|
|
|
|
raise_unclosed_parentheses_error(Parser *p) {
|
|
|
|
int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
|
|
|
|
int error_col = p->tok->parencolstack[p->tok->level-1];
|
|
|
|
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
|
bpo-43914: Highlight invalid ranges in SyntaxErrors (#25525)
To improve the user experience understanding what part of the error messages associated with SyntaxErrors is wrong, we can highlight the whole error range and not only place the caret at the first character. In this way:
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^
SyntaxError: Generator expression must be parenthesized
becomes
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
2021-04-23 21:27:05 +08:00
|
|
|
error_lineno, error_col, error_lineno, -1,
|
2021-01-20 07:59:33 +08:00
|
|
|
"'%c' was never closed",
|
|
|
|
p->tok->parenstack[p->tok->level-1]);
|
|
|
|
}
|
|
|
|
|
2020-04-23 06:29:27 +08:00
|
|
|
static void
|
|
|
|
raise_tokenizer_init_error(PyObject *filename)
|
|
|
|
{
|
|
|
|
if (!(PyErr_ExceptionMatches(PyExc_LookupError)
|
2021-06-15 00:46:11 +08:00
|
|
|
|| PyErr_ExceptionMatches(PyExc_SyntaxError)
|
2020-04-23 06:29:27 +08:00
|
|
|
|| PyErr_ExceptionMatches(PyExc_ValueError)
|
|
|
|
|| PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
|
|
|
|
return;
|
|
|
|
}
|
2020-04-23 23:36:06 +08:00
|
|
|
PyObject *errstr = NULL;
|
|
|
|
PyObject *tuple = NULL;
|
2020-06-15 21:23:43 +08:00
|
|
|
PyObject *type;
|
|
|
|
PyObject *value;
|
|
|
|
PyObject *tback;
|
2020-04-23 06:29:27 +08:00
|
|
|
PyErr_Fetch(&type, &value, &tback);
|
|
|
|
errstr = PyObject_Str(value);
|
2020-04-23 23:36:06 +08:00
|
|
|
if (!errstr) {
|
|
|
|
goto error;
|
|
|
|
}
|
2020-04-23 06:29:27 +08:00
|
|
|
|
2020-04-23 23:36:06 +08:00
|
|
|
PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!tmp) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2020-04-23 23:36:06 +08:00
|
|
|
tuple = PyTuple_Pack(2, errstr, tmp);
|
2020-04-23 06:29:27 +08:00
|
|
|
Py_DECREF(tmp);
|
|
|
|
if (!value) {
|
|
|
|
goto error;
|
|
|
|
}
|
2020-04-23 23:36:06 +08:00
|
|
|
PyErr_SetObject(PyExc_SyntaxError, tuple);
|
2020-04-23 06:29:27 +08:00
|
|
|
|
|
|
|
error:
|
|
|
|
Py_XDECREF(type);
|
|
|
|
Py_XDECREF(value);
|
|
|
|
Py_XDECREF(tback);
|
2020-04-23 23:36:06 +08:00
|
|
|
Py_XDECREF(errstr);
|
|
|
|
Py_XDECREF(tuple);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
tokenizer_error(Parser *p)
|
|
|
|
{
|
|
|
|
if (PyErr_Occurred()) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *msg = NULL;
|
|
|
|
PyObject* errtype = PyExc_SyntaxError;
|
2021-03-23 01:28:11 +08:00
|
|
|
Py_ssize_t col_offset = -1;
|
2020-04-23 06:29:27 +08:00
|
|
|
switch (p->tok->done) {
|
|
|
|
case E_TOKEN:
|
|
|
|
msg = "invalid token";
|
|
|
|
break;
|
2020-04-28 08:23:35 +08:00
|
|
|
case E_EOF:
|
2021-01-20 07:59:33 +08:00
|
|
|
if (p->tok->level) {
|
|
|
|
raise_unclosed_parentheses_error(p);
|
|
|
|
} else {
|
|
|
|
RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
|
|
|
|
}
|
2020-05-01 21:13:43 +08:00
|
|
|
return -1;
|
2020-04-23 06:29:27 +08:00
|
|
|
case E_DEDENT:
|
2020-05-01 21:13:43 +08:00
|
|
|
RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
|
|
|
|
return -1;
|
2020-04-23 06:29:27 +08:00
|
|
|
case E_INTR:
|
|
|
|
if (!PyErr_Occurred()) {
|
|
|
|
PyErr_SetNone(PyExc_KeyboardInterrupt);
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
case E_NOMEM:
|
|
|
|
PyErr_NoMemory();
|
|
|
|
return -1;
|
|
|
|
case E_TABSPACE:
|
|
|
|
errtype = PyExc_TabError;
|
|
|
|
msg = "inconsistent use of tabs and spaces in indentation";
|
|
|
|
break;
|
|
|
|
case E_TOODEEP:
|
|
|
|
errtype = PyExc_IndentationError;
|
|
|
|
msg = "too many levels of indentation";
|
|
|
|
break;
|
|
|
|
case E_LINECONT:
|
2021-03-23 01:28:11 +08:00
|
|
|
col_offset = strlen(strtok(p->tok->buf, "\n")) - 1;
|
2020-04-23 06:29:27 +08:00
|
|
|
msg = "unexpected character after line continuation character";
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
msg = "unknown parsing error";
|
|
|
|
}
|
|
|
|
|
bpo-43914: Highlight invalid ranges in SyntaxErrors (#25525)
To improve the user experience understanding what part of the error messages associated with SyntaxErrors is wrong, we can highlight the whole error range and not only place the caret at the first character. In this way:
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^
SyntaxError: Generator expression must be parenthesized
becomes
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
2021-04-23 21:27:05 +08:00
|
|
|
RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, col_offset, p->tok->lineno, -1, msg);
|
2020-04-23 06:29:27 +08:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void *
|
2020-05-14 03:36:27 +08:00
|
|
|
_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
|
|
|
|
{
|
|
|
|
Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
|
2020-06-16 23:49:43 +08:00
|
|
|
Py_ssize_t col_offset;
|
bpo-43914: Highlight invalid ranges in SyntaxErrors (#25525)
To improve the user experience understanding what part of the error messages associated with SyntaxErrors is wrong, we can highlight the whole error range and not only place the caret at the first character. In this way:
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^
SyntaxError: Generator expression must be parenthesized
becomes
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
2021-04-23 21:27:05 +08:00
|
|
|
Py_ssize_t end_col_offset = -1;
|
2020-05-14 03:36:27 +08:00
|
|
|
if (t->col_offset == -1) {
|
|
|
|
col_offset = Py_SAFE_DOWNCAST(p->tok->cur - p->tok->buf,
|
|
|
|
intptr_t, int);
|
|
|
|
} else {
|
|
|
|
col_offset = t->col_offset + 1;
|
|
|
|
}
|
|
|
|
|
bpo-43914: Highlight invalid ranges in SyntaxErrors (#25525)
To improve the user experience understanding what part of the error messages associated with SyntaxErrors is wrong, we can highlight the whole error range and not only place the caret at the first character. In this way:
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^
SyntaxError: Generator expression must be parenthesized
becomes
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
2021-04-23 21:27:05 +08:00
|
|
|
if (t->end_col_offset != -1) {
|
|
|
|
end_col_offset = t->end_col_offset + 1;
|
|
|
|
}
|
|
|
|
|
2020-05-14 03:36:27 +08:00
|
|
|
va_list va;
|
|
|
|
va_start(va, errmsg);
|
bpo-43914: Highlight invalid ranges in SyntaxErrors (#25525)
To improve the user experience understanding what part of the error messages associated with SyntaxErrors is wrong, we can highlight the whole error range and not only place the caret at the first character. In this way:
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^
SyntaxError: Generator expression must be parenthesized
becomes
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
2021-04-23 21:27:05 +08:00
|
|
|
_PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va);
|
2020-05-14 03:36:27 +08:00
|
|
|
va_end(va);
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-01-15 05:36:30 +08:00
|
|
|
static PyObject *
|
|
|
|
get_error_line(Parser *p, Py_ssize_t lineno)
|
|
|
|
{
|
2021-03-23 00:24:39 +08:00
|
|
|
/* If the file descriptor is interactive, the source lines of the current
|
|
|
|
* (multi-line) statement are stored in p->tok->interactive_src_start.
|
|
|
|
* If not, we're parsing from a string, which means that the whole source
|
|
|
|
* is stored in p->tok->str. */
|
2021-01-15 05:36:30 +08:00
|
|
|
assert(p->tok->fp == NULL || p->tok->fp == stdin);
|
|
|
|
|
2021-03-14 11:38:40 +08:00
|
|
|
char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;
|
|
|
|
|
2021-01-15 05:36:30 +08:00
|
|
|
for (int i = 0; i < lineno - 1; i++) {
|
|
|
|
cur_line = strchr(cur_line, '\n') + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *next_newline;
|
|
|
|
if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line
|
|
|
|
next_newline = cur_line + strlen(cur_line);
|
|
|
|
}
|
|
|
|
return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace");
|
|
|
|
}
|
|
|
|
|
2020-05-14 03:36:27 +08:00
|
|
|
void *
|
|
|
|
_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
|
2020-06-16 23:49:43 +08:00
|
|
|
Py_ssize_t lineno, Py_ssize_t col_offset,
|
bpo-43914: Highlight invalid ranges in SyntaxErrors (#25525)
To improve the user experience understanding what part of the error messages associated with SyntaxErrors is wrong, we can highlight the whole error range and not only place the caret at the first character. In this way:
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^
SyntaxError: Generator expression must be parenthesized
becomes
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
2021-04-23 21:27:05 +08:00
|
|
|
Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
|
2020-05-14 03:36:27 +08:00
|
|
|
const char *errmsg, va_list va)
|
2020-04-23 06:29:27 +08:00
|
|
|
{
|
|
|
|
PyObject *value = NULL;
|
|
|
|
PyObject *errstr = NULL;
|
2020-05-14 03:36:27 +08:00
|
|
|
PyObject *error_line = NULL;
|
2020-04-23 06:29:27 +08:00
|
|
|
PyObject *tmp = NULL;
|
2020-05-04 08:20:09 +08:00
|
|
|
p->error_indicator = 1;
|
2020-04-23 06:29:27 +08:00
|
|
|
|
bpo-43914: Highlight invalid ranges in SyntaxErrors (#25525)
To improve the user experience understanding what part of the error messages associated with SyntaxErrors is wrong, we can highlight the whole error range and not only place the caret at the first character. In this way:
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^
SyntaxError: Generator expression must be parenthesized
becomes
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
2021-04-23 21:27:05 +08:00
|
|
|
if (end_lineno == CURRENT_POS) {
|
|
|
|
end_lineno = p->tok->lineno;
|
|
|
|
}
|
|
|
|
if (end_col_offset == CURRENT_POS) {
|
|
|
|
end_col_offset = p->tok->cur - p->tok->line_start;
|
|
|
|
}
|
|
|
|
|
2020-06-26 19:24:05 +08:00
|
|
|
if (p->start_rule == Py_fstring_input) {
|
|
|
|
const char *fstring_msg = "f-string: ";
|
|
|
|
Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);
|
|
|
|
|
2020-06-28 01:47:00 +08:00
|
|
|
char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character
|
2020-06-26 19:24:05 +08:00
|
|
|
if (!new_errmsg) {
|
|
|
|
return (void *) PyErr_NoMemory();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy both strings into new buffer
|
|
|
|
memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
|
|
|
|
memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
|
|
|
|
new_errmsg[len] = 0;
|
|
|
|
errmsg = new_errmsg;
|
|
|
|
}
|
2020-04-23 06:29:27 +08:00
|
|
|
errstr = PyUnicode_FromFormatV(errmsg, va);
|
|
|
|
if (!errstr) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2021-06-09 07:54:29 +08:00
|
|
|
// PyErr_ProgramTextObject assumes that the text is utf-8 so we cannot call it with a file
|
|
|
|
// with an arbitrary encoding or otherwise we could get some badly decoded text.
|
|
|
|
int uses_utf8_codec = (!p->tok->encoding || strcmp(p->tok->encoding, "utf-8") == 0);
|
2021-03-14 11:38:40 +08:00
|
|
|
if (p->tok->fp_interactive) {
|
|
|
|
error_line = get_error_line(p, lineno);
|
|
|
|
}
|
2021-06-09 07:54:29 +08:00
|
|
|
else if (uses_utf8_codec && p->start_rule == Py_file_input) {
|
2020-06-20 20:57:27 +08:00
|
|
|
error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
2020-05-14 03:36:27 +08:00
|
|
|
if (!error_line) {
|
2021-01-15 05:36:30 +08:00
|
|
|
/* PyErr_ProgramTextObject was not called or returned NULL. If it was not called,
|
|
|
|
then we need to find the error line from some other source, because
|
|
|
|
p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly
|
|
|
|
failed or we're parsing from a string or the REPL. There's a third edge case where
|
|
|
|
we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
|
|
|
|
`PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
|
|
|
|
does not physically exist */
|
2021-06-09 07:54:29 +08:00
|
|
|
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);
|
2021-01-15 05:36:30 +08:00
|
|
|
|
2021-02-01 06:48:23 +08:00
|
|
|
if (p->tok->lineno <= lineno) {
|
2021-01-15 05:36:30 +08:00
|
|
|
Py_ssize_t size = p->tok->inp - p->tok->buf;
|
|
|
|
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
error_line = get_error_line(p, lineno);
|
|
|
|
}
|
2020-05-14 03:36:27 +08:00
|
|
|
if (!error_line) {
|
|
|
|
goto error;
|
2020-05-01 21:13:43 +08:00
|
|
|
}
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
2020-06-28 07:41:48 +08:00
|
|
|
if (p->start_rule == Py_fstring_input) {
|
|
|
|
col_offset -= p->starting_col_offset;
|
bpo-43914: Highlight invalid ranges in SyntaxErrors (#25525)
To improve the user experience understanding what part of the error messages associated with SyntaxErrors is wrong, we can highlight the whole error range and not only place the caret at the first character. In this way:
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^
SyntaxError: Generator expression must be parenthesized
becomes
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
2021-04-23 21:27:05 +08:00
|
|
|
end_col_offset -= p->starting_col_offset;
|
2020-06-28 07:41:48 +08:00
|
|
|
}
|
bpo-43914: Highlight invalid ranges in SyntaxErrors (#25525)
To improve the user experience understanding what part of the error messages associated with SyntaxErrors is wrong, we can highlight the whole error range and not only place the caret at the first character. In this way:
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^
SyntaxError: Generator expression must be parenthesized
becomes
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
2021-04-23 21:27:05 +08:00
|
|
|
|
2020-06-16 23:49:43 +08:00
|
|
|
Py_ssize_t col_number = col_offset;
|
bpo-43914: Highlight invalid ranges in SyntaxErrors (#25525)
To improve the user experience understanding what part of the error messages associated with SyntaxErrors is wrong, we can highlight the whole error range and not only place the caret at the first character. In this way:
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^
SyntaxError: Generator expression must be parenthesized
becomes
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
2021-04-23 21:27:05 +08:00
|
|
|
Py_ssize_t end_col_number = end_col_offset;
|
2020-06-16 23:49:43 +08:00
|
|
|
|
|
|
|
if (p->tok->encoding != NULL) {
|
|
|
|
col_number = byte_offset_to_character_offset(error_line, col_offset);
|
bpo-43914: Highlight invalid ranges in SyntaxErrors (#25525)
To improve the user experience understanding what part of the error messages associated with SyntaxErrors is wrong, we can highlight the whole error range and not only place the caret at the first character. In this way:
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^
SyntaxError: Generator expression must be parenthesized
becomes
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
2021-04-23 21:27:05 +08:00
|
|
|
end_col_number = end_col_number > 0 ?
|
|
|
|
byte_offset_to_character_offset(error_line, end_col_offset) :
|
|
|
|
end_col_number;
|
2020-06-16 23:49:43 +08:00
|
|
|
}
|
bpo-43914: Highlight invalid ranges in SyntaxErrors (#25525)
To improve the user experience understanding what part of the error messages associated with SyntaxErrors is wrong, we can highlight the whole error range and not only place the caret at the first character. In this way:
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^
SyntaxError: Generator expression must be parenthesized
becomes
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
2021-04-23 21:27:05 +08:00
|
|
|
tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!tmp) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
value = PyTuple_Pack(2, errstr, tmp);
|
|
|
|
Py_DECREF(tmp);
|
|
|
|
if (!value) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
PyErr_SetObject(errtype, value);
|
|
|
|
|
|
|
|
Py_DECREF(errstr);
|
|
|
|
Py_DECREF(value);
|
2020-06-26 19:24:05 +08:00
|
|
|
if (p->start_rule == Py_fstring_input) {
|
2020-06-28 01:47:00 +08:00
|
|
|
PyMem_Free((void *)errmsg);
|
2020-06-26 19:24:05 +08:00
|
|
|
}
|
2020-04-23 06:29:27 +08:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
error:
|
|
|
|
Py_XDECREF(errstr);
|
2020-05-14 03:36:27 +08:00
|
|
|
Py_XDECREF(error_line);
|
2020-06-26 19:24:05 +08:00
|
|
|
if (p->start_rule == Py_fstring_input) {
|
2020-06-28 01:47:00 +08:00
|
|
|
PyMem_Free((void *)errmsg);
|
2020-06-26 19:24:05 +08:00
|
|
|
}
|
2020-04-23 06:29:27 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
static const char *
|
|
|
|
token_name(int type)
|
|
|
|
{
|
|
|
|
if (0 <= type && type <= N_TOKENS) {
|
|
|
|
return _PyParser_TokenNames[type];
|
|
|
|
}
|
|
|
|
return "<Huh?>";
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// Here, mark is the start of the node, while p->mark is the end.
|
|
|
|
// If node==NULL, they should be the same.
|
|
|
|
int
|
|
|
|
_PyPegen_insert_memo(Parser *p, int mark, int type, void *node)
|
|
|
|
{
|
|
|
|
// Insert in front
|
2021-03-24 09:23:01 +08:00
|
|
|
Memo *m = _PyArena_Malloc(p->arena, sizeof(Memo));
|
2020-04-23 06:29:27 +08:00
|
|
|
if (m == NULL) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
m->type = type;
|
|
|
|
m->node = node;
|
|
|
|
m->mark = p->mark;
|
|
|
|
m->next = p->tokens[mark]->memo;
|
|
|
|
p->tokens[mark]->memo = m;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Like _PyPegen_insert_memo(), but updates an existing node if found.
|
|
|
|
int
|
|
|
|
_PyPegen_update_memo(Parser *p, int mark, int type, void *node)
|
|
|
|
{
|
|
|
|
for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) {
|
|
|
|
if (m->type == type) {
|
|
|
|
// Update existing node.
|
|
|
|
m->node = node;
|
|
|
|
m->mark = p->mark;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Insert new node.
|
|
|
|
return _PyPegen_insert_memo(p, mark, type, node);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return dummy NAME.
|
|
|
|
void *
|
|
|
|
_PyPegen_dummy_name(Parser *p, ...)
|
|
|
|
{
|
|
|
|
static void *cache = NULL;
|
|
|
|
|
|
|
|
if (cache != NULL) {
|
|
|
|
return cache;
|
|
|
|
}
|
|
|
|
|
|
|
|
PyObject *id = _create_dummy_identifier(p);
|
|
|
|
if (!id) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2021-04-08 03:34:22 +08:00
|
|
|
cache = _PyAST_Name(id, Load, 1, 0, 1, 0, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
return cache;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
_get_keyword_or_name_type(Parser *p, const char *name, int name_len)
|
|
|
|
{
|
2020-07-07 06:42:21 +08:00
|
|
|
assert(name_len > 0);
|
2020-07-07 03:31:16 +08:00
|
|
|
if (name_len >= p->n_keyword_lists ||
|
|
|
|
p->keywords[name_len] == NULL ||
|
|
|
|
p->keywords[name_len]->type == -1) {
|
2020-04-23 06:29:27 +08:00
|
|
|
return NAME;
|
|
|
|
}
|
2020-07-07 03:31:16 +08:00
|
|
|
for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) {
|
2020-04-23 06:29:27 +08:00
|
|
|
if (strncmp(k->str, name, name_len) == 0) {
|
|
|
|
return k->type;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NAME;
|
|
|
|
}
|
|
|
|
|
2020-05-01 03:12:19 +08:00
|
|
|
static int
|
|
|
|
growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
|
|
|
|
assert(initial_size > 0);
|
|
|
|
arr->items = PyMem_Malloc(initial_size * sizeof(*arr->items));
|
|
|
|
arr->size = initial_size;
|
|
|
|
arr->num_items = 0;
|
|
|
|
|
|
|
|
return arr->items != NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
|
|
|
|
if (arr->num_items >= arr->size) {
|
|
|
|
size_t new_size = arr->size * 2;
|
|
|
|
void *new_items_array = PyMem_Realloc(arr->items, new_size * sizeof(*arr->items));
|
|
|
|
if (!new_items_array) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
arr->items = new_items_array;
|
|
|
|
arr->size = new_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
arr->items[arr->num_items].lineno = lineno;
|
|
|
|
arr->items[arr->num_items].comment = comment; // Take ownership
|
|
|
|
arr->num_items++;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
growable_comment_array_deallocate(growable_comment_array *arr) {
|
|
|
|
for (unsigned i = 0; i < arr->num_items; i++) {
|
|
|
|
PyMem_Free(arr->items[i].comment);
|
|
|
|
}
|
|
|
|
PyMem_Free(arr->items);
|
|
|
|
}
|
|
|
|
|
2021-04-09 08:32:25 +08:00
|
|
|
static int
|
|
|
|
initialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) {
|
|
|
|
assert(token != NULL);
|
|
|
|
|
|
|
|
token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : token_type;
|
|
|
|
token->bytes = PyBytes_FromStringAndSize(start, end - start);
|
|
|
|
if (token->bytes == NULL) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (_PyArena_AddPyObject(p->arena, token->bytes) < 0) {
|
|
|
|
Py_DECREF(token->bytes);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *line_start = token_type == STRING ? p->tok->multi_line_start : p->tok->line_start;
|
|
|
|
int lineno = token_type == STRING ? p->tok->first_lineno : p->tok->lineno;
|
|
|
|
int end_lineno = p->tok->lineno;
|
|
|
|
|
|
|
|
int col_offset = (start != NULL && start >= line_start) ? (int)(start - line_start) : -1;
|
|
|
|
int end_col_offset = (end != NULL && end >= p->tok->line_start) ? (int)(end - p->tok->line_start) : -1;
|
|
|
|
|
|
|
|
token->lineno = p->starting_lineno + lineno;
|
|
|
|
token->col_offset = p->tok->lineno == 1 ? p->starting_col_offset + col_offset : col_offset;
|
|
|
|
token->end_lineno = p->starting_lineno + end_lineno;
|
|
|
|
token->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset;
|
|
|
|
|
|
|
|
p->fill += 1;
|
|
|
|
|
|
|
|
if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) {
|
|
|
|
return raise_decode_error(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (token_type == ERRORTOKEN ? tokenizer_error(p) : 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
_resize_tokens_array(Parser *p) {
|
|
|
|
int newsize = p->size * 2;
|
|
|
|
Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
|
|
|
|
if (new_tokens == NULL) {
|
|
|
|
PyErr_NoMemory();
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
p->tokens = new_tokens;
|
|
|
|
|
|
|
|
for (int i = p->size; i < newsize; i++) {
|
|
|
|
p->tokens[i] = PyMem_Calloc(1, sizeof(Token));
|
|
|
|
if (p->tokens[i] == NULL) {
|
|
|
|
p->size = i; // Needed, in order to cleanup correctly after parser fails
|
|
|
|
PyErr_NoMemory();
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
p->size = newsize;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-04-23 06:29:27 +08:00
|
|
|
int
|
|
|
|
_PyPegen_fill_token(Parser *p)
|
|
|
|
{
|
2020-06-15 21:23:43 +08:00
|
|
|
const char *start;
|
|
|
|
const char *end;
|
2020-04-23 06:29:27 +08:00
|
|
|
int type = PyTokenizer_Get(p->tok, &start, &end);
|
2020-05-01 03:12:19 +08:00
|
|
|
|
|
|
|
// Record and skip '# type: ignore' comments
|
|
|
|
while (type == TYPE_IGNORE) {
|
|
|
|
Py_ssize_t len = end - start;
|
|
|
|
char *tag = PyMem_Malloc(len + 1);
|
|
|
|
if (tag == NULL) {
|
|
|
|
PyErr_NoMemory();
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
strncpy(tag, start, len);
|
|
|
|
tag[len] = '\0';
|
|
|
|
// Ownership of tag passes to the growable array
|
|
|
|
if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) {
|
|
|
|
PyErr_NoMemory();
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
type = PyTokenizer_Get(p->tok, &start, &end);
|
|
|
|
}
|
|
|
|
|
2021-04-09 08:32:25 +08:00
|
|
|
// If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing
|
|
|
|
if (p->start_rule == Py_single_input && type == ENDMARKER && p->parsing_started) {
|
2020-04-23 06:29:27 +08:00
|
|
|
type = NEWLINE; /* Add an extra newline */
|
|
|
|
p->parsing_started = 0;
|
|
|
|
|
2020-04-28 01:35:58 +08:00
|
|
|
if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) {
|
2020-04-23 06:29:27 +08:00
|
|
|
p->tok->pendin = -p->tok->indent;
|
|
|
|
p->tok->indent = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
p->parsing_started = 1;
|
|
|
|
}
|
|
|
|
|
2021-04-09 08:32:25 +08:00
|
|
|
// Check if we are at the limit of the token array capacity and resize if needed
|
|
|
|
if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) {
|
2021-04-09 07:05:44 +08:00
|
|
|
return -1;
|
|
|
|
}
|
2020-04-23 06:29:27 +08:00
|
|
|
|
2021-04-09 08:32:25 +08:00
|
|
|
Token *t = p->tokens[p->fill];
|
|
|
|
return initialize_token(p, t, start, end, type);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
2021-04-09 08:17:31 +08:00
|
|
|
|
|
|
|
#if defined(Py_DEBUG)
|
2020-04-23 06:29:27 +08:00
|
|
|
// Instrumentation to count the effectiveness of memoization.
|
|
|
|
// The array counts the number of tokens skipped by memoization,
|
|
|
|
// indexed by type.
|
|
|
|
|
|
|
|
#define NSTATISTICS 2000
|
|
|
|
static long memo_statistics[NSTATISTICS];
|
|
|
|
|
|
|
|
void
|
|
|
|
_PyPegen_clear_memo_statistics()
|
|
|
|
{
|
|
|
|
for (int i = 0; i < NSTATISTICS; i++) {
|
|
|
|
memo_statistics[i] = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
PyObject *
|
|
|
|
_PyPegen_get_memo_statistics()
|
|
|
|
{
|
|
|
|
PyObject *ret = PyList_New(NSTATISTICS);
|
|
|
|
if (ret == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
for (int i = 0; i < NSTATISTICS; i++) {
|
|
|
|
PyObject *value = PyLong_FromLong(memo_statistics[i]);
|
|
|
|
if (value == NULL) {
|
|
|
|
Py_DECREF(ret);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
// PyList_SetItem borrows a reference to value.
|
|
|
|
if (PyList_SetItem(ret, i, value) < 0) {
|
|
|
|
Py_DECREF(ret);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
2021-04-09 08:17:31 +08:00
|
|
|
#endif
|
2020-04-23 06:29:27 +08:00
|
|
|
|
|
|
|
int // bool
|
|
|
|
_PyPegen_is_memoized(Parser *p, int type, void *pres)
|
|
|
|
{
|
|
|
|
if (p->mark == p->fill) {
|
|
|
|
if (_PyPegen_fill_token(p) < 0) {
|
2020-04-23 23:36:06 +08:00
|
|
|
p->error_indicator = 1;
|
2020-04-23 06:29:27 +08:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Token *t = p->tokens[p->mark];
|
|
|
|
|
|
|
|
for (Memo *m = t->memo; m != NULL; m = m->next) {
|
|
|
|
if (m->type == type) {
|
2021-04-09 08:17:31 +08:00
|
|
|
#if defined(PY_DEBUG)
|
2020-04-23 06:29:27 +08:00
|
|
|
if (0 <= type && type < NSTATISTICS) {
|
|
|
|
long count = m->mark - p->mark;
|
|
|
|
// A memoized negative result counts for one.
|
|
|
|
if (count <= 0) {
|
|
|
|
count = 1;
|
|
|
|
}
|
|
|
|
memo_statistics[type] += count;
|
|
|
|
}
|
2021-04-09 08:17:31 +08:00
|
|
|
#endif
|
2020-04-23 06:29:27 +08:00
|
|
|
p->mark = m->mark;
|
|
|
|
*(void **)(pres) = m->node;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-04-23 19:42:13 +08:00
|
|
|
int
|
|
|
|
_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
|
|
|
|
{
|
|
|
|
int mark = p->mark;
|
|
|
|
void *res = func(p);
|
|
|
|
p->mark = mark;
|
|
|
|
return (res != NULL) == positive;
|
|
|
|
}
|
|
|
|
|
2020-05-27 07:15:52 +08:00
|
|
|
int
|
|
|
|
_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg)
|
|
|
|
{
|
|
|
|
int mark = p->mark;
|
|
|
|
void *res = func(p, arg);
|
|
|
|
p->mark = mark;
|
|
|
|
return (res != NULL) == positive;
|
|
|
|
}
|
|
|
|
|
2020-04-23 06:29:27 +08:00
|
|
|
int
|
|
|
|
_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
|
|
|
|
{
|
|
|
|
int mark = p->mark;
|
|
|
|
void *res = func(p, arg);
|
|
|
|
p->mark = mark;
|
|
|
|
return (res != NULL) == positive;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
_PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p)
|
|
|
|
{
|
|
|
|
int mark = p->mark;
|
2020-04-23 19:42:13 +08:00
|
|
|
void *res = (void*)func(p);
|
2020-04-23 06:29:27 +08:00
|
|
|
p->mark = mark;
|
|
|
|
return (res != NULL) == positive;
|
|
|
|
}
|
|
|
|
|
|
|
|
Token *
|
|
|
|
_PyPegen_expect_token(Parser *p, int type)
|
|
|
|
{
|
|
|
|
if (p->mark == p->fill) {
|
|
|
|
if (_PyPegen_fill_token(p) < 0) {
|
2020-04-23 23:36:06 +08:00
|
|
|
p->error_indicator = 1;
|
2020-04-23 06:29:27 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Token *t = p->tokens[p->mark];
|
|
|
|
if (t->type != type) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
p->mark += 1;
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
2021-02-03 03:54:22 +08:00
|
|
|
Token *
|
|
|
|
_PyPegen_expect_forced_token(Parser *p, int type, const char* expected) {
|
|
|
|
|
|
|
|
if (p->error_indicator == 1) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (p->mark == p->fill) {
|
|
|
|
if (_PyPegen_fill_token(p) < 0) {
|
|
|
|
p->error_indicator = 1;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Token *t = p->tokens[p->mark];
|
|
|
|
if (t->type != type) {
|
|
|
|
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "expected '%s'", expected);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
p->mark += 1;
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
2020-05-27 01:58:44 +08:00
|
|
|
expr_ty
|
|
|
|
_PyPegen_expect_soft_keyword(Parser *p, const char *keyword)
|
|
|
|
{
|
|
|
|
if (p->mark == p->fill) {
|
|
|
|
if (_PyPegen_fill_token(p) < 0) {
|
|
|
|
p->error_indicator = 1;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Token *t = p->tokens[p->mark];
|
|
|
|
if (t->type != NAME) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2021-06-12 21:11:59 +08:00
|
|
|
const char *s = PyBytes_AsString(t->bytes);
|
2020-05-27 01:58:44 +08:00
|
|
|
if (!s) {
|
2020-05-28 00:04:11 +08:00
|
|
|
p->error_indicator = 1;
|
2020-05-27 01:58:44 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (strcmp(s, keyword) != 0) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2020-05-28 00:04:11 +08:00
|
|
|
return _PyPegen_name_token(p);
|
2020-05-27 01:58:44 +08:00
|
|
|
}
|
|
|
|
|
2020-04-23 06:29:27 +08:00
|
|
|
Token *
|
|
|
|
_PyPegen_get_last_nonnwhitespace_token(Parser *p)
|
|
|
|
{
|
|
|
|
assert(p->mark >= 0);
|
|
|
|
Token *token = NULL;
|
|
|
|
for (int m = p->mark - 1; m >= 0; m--) {
|
|
|
|
token = p->tokens[m];
|
|
|
|
if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return token;
|
|
|
|
}
|
|
|
|
|
2021-06-10 05:20:01 +08:00
|
|
|
static expr_ty
|
|
|
|
_PyPegen_name_from_token(Parser *p, Token* t)
|
2020-04-23 06:29:27 +08:00
|
|
|
{
|
|
|
|
if (t == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2021-06-12 21:11:59 +08:00
|
|
|
const char *s = PyBytes_AsString(t->bytes);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!s) {
|
2020-05-28 00:04:11 +08:00
|
|
|
p->error_indicator = 1;
|
2020-04-23 06:29:27 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
PyObject *id = _PyPegen_new_identifier(p, s);
|
|
|
|
if (id == NULL) {
|
2020-05-28 00:04:11 +08:00
|
|
|
p->error_indicator = 1;
|
2020-04-23 06:29:27 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_Name(id, Load, t->lineno, t->col_offset, t->end_lineno,
|
|
|
|
t->end_col_offset, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
2021-06-10 05:20:01 +08:00
|
|
|
|
|
|
|
expr_ty
|
|
|
|
_PyPegen_name_token(Parser *p)
|
|
|
|
{
|
|
|
|
Token *t = _PyPegen_expect_token(p, NAME);
|
|
|
|
return _PyPegen_name_from_token(p, t);
|
|
|
|
}
|
|
|
|
|
2020-04-23 06:29:27 +08:00
|
|
|
void *
|
|
|
|
_PyPegen_string_token(Parser *p)
|
|
|
|
{
|
|
|
|
return _PyPegen_expect_token(p, STRING);
|
|
|
|
}
|
|
|
|
|
2021-04-16 04:38:45 +08:00
|
|
|
|
|
|
|
expr_ty _PyPegen_soft_keyword_token(Parser *p) {
|
|
|
|
Token *t = _PyPegen_expect_token(p, NAME);
|
|
|
|
if (t == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
char *the_token;
|
|
|
|
Py_ssize_t size;
|
|
|
|
PyBytes_AsStringAndSize(t->bytes, &the_token, &size);
|
|
|
|
for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) {
|
|
|
|
if (strncmp(*keyword, the_token, size) == 0) {
|
2021-06-10 05:20:01 +08:00
|
|
|
return _PyPegen_name_from_token(p, t);
|
2021-04-16 04:38:45 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-04-23 06:29:27 +08:00
|
|
|
static PyObject *
|
|
|
|
parsenumber_raw(const char *s)
|
|
|
|
{
|
|
|
|
const char *end;
|
|
|
|
long x;
|
|
|
|
double dx;
|
|
|
|
Py_complex compl;
|
|
|
|
int imflag;
|
|
|
|
|
|
|
|
assert(s != NULL);
|
|
|
|
errno = 0;
|
|
|
|
end = s + strlen(s) - 1;
|
|
|
|
imflag = *end == 'j' || *end == 'J';
|
|
|
|
if (s[0] == '0') {
|
|
|
|
x = (long)PyOS_strtoul(s, (char **)&end, 0);
|
|
|
|
if (x < 0 && errno == 0) {
|
|
|
|
return PyLong_FromString(s, (char **)0, 0);
|
|
|
|
}
|
|
|
|
}
|
2020-06-15 21:23:43 +08:00
|
|
|
else {
|
2020-04-23 06:29:27 +08:00
|
|
|
x = PyOS_strtol(s, (char **)&end, 0);
|
2020-06-15 21:23:43 +08:00
|
|
|
}
|
2020-04-23 06:29:27 +08:00
|
|
|
if (*end == '\0') {
|
2020-06-15 21:23:43 +08:00
|
|
|
if (errno != 0) {
|
2020-04-23 06:29:27 +08:00
|
|
|
return PyLong_FromString(s, (char **)0, 0);
|
2020-06-15 21:23:43 +08:00
|
|
|
}
|
2020-04-23 06:29:27 +08:00
|
|
|
return PyLong_FromLong(x);
|
|
|
|
}
|
|
|
|
/* XXX Huge floats may silently fail */
|
|
|
|
if (imflag) {
|
|
|
|
compl.real = 0.;
|
|
|
|
compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
|
2020-06-15 21:23:43 +08:00
|
|
|
if (compl.imag == -1.0 && PyErr_Occurred()) {
|
2020-04-23 06:29:27 +08:00
|
|
|
return NULL;
|
2020-06-15 21:23:43 +08:00
|
|
|
}
|
2020-04-23 06:29:27 +08:00
|
|
|
return PyComplex_FromCComplex(compl);
|
|
|
|
}
|
2020-06-15 21:23:43 +08:00
|
|
|
dx = PyOS_string_to_double(s, NULL, NULL);
|
|
|
|
if (dx == -1.0 && PyErr_Occurred()) {
|
|
|
|
return NULL;
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
2020-06-15 21:23:43 +08:00
|
|
|
return PyFloat_FromDouble(dx);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
parsenumber(const char *s)
|
|
|
|
{
|
2020-06-15 21:23:43 +08:00
|
|
|
char *dup;
|
|
|
|
char *end;
|
2020-04-23 06:29:27 +08:00
|
|
|
PyObject *res = NULL;
|
|
|
|
|
|
|
|
assert(s != NULL);
|
|
|
|
|
|
|
|
if (strchr(s, '_') == NULL) {
|
|
|
|
return parsenumber_raw(s);
|
|
|
|
}
|
|
|
|
/* Create a duplicate without underscores. */
|
|
|
|
dup = PyMem_Malloc(strlen(s) + 1);
|
|
|
|
if (dup == NULL) {
|
|
|
|
return PyErr_NoMemory();
|
|
|
|
}
|
|
|
|
end = dup;
|
|
|
|
for (; *s; s++) {
|
|
|
|
if (*s != '_') {
|
|
|
|
*end++ = *s;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*end = '\0';
|
|
|
|
res = parsenumber_raw(dup);
|
|
|
|
PyMem_Free(dup);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
expr_ty
|
|
|
|
_PyPegen_number_token(Parser *p)
|
|
|
|
{
|
|
|
|
Token *t = _PyPegen_expect_token(p, NUMBER);
|
|
|
|
if (t == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-06-12 21:11:59 +08:00
|
|
|
const char *num_raw = PyBytes_AsString(t->bytes);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (num_raw == NULL) {
|
2020-05-28 00:04:11 +08:00
|
|
|
p->error_indicator = 1;
|
2020-04-23 06:29:27 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-05-01 11:27:52 +08:00
|
|
|
if (p->feature_version < 6 && strchr(num_raw, '_') != NULL) {
|
|
|
|
p->error_indicator = 1;
|
2020-05-04 16:13:30 +08:00
|
|
|
return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported "
|
2020-05-01 11:27:52 +08:00
|
|
|
"in Python 3.6 and greater");
|
|
|
|
}
|
|
|
|
|
2020-04-23 06:29:27 +08:00
|
|
|
PyObject *c = parsenumber(num_raw);
|
|
|
|
|
|
|
|
if (c == NULL) {
|
2020-05-28 00:04:11 +08:00
|
|
|
p->error_indicator = 1;
|
2020-04-23 06:29:27 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-03-24 09:23:01 +08:00
|
|
|
if (_PyArena_AddPyObject(p->arena, c) < 0) {
|
2020-04-23 06:29:27 +08:00
|
|
|
Py_DECREF(c);
|
2020-05-28 00:04:11 +08:00
|
|
|
p->error_indicator = 1;
|
2020-04-23 06:29:27 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno,
|
|
|
|
t->end_col_offset, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
2020-04-29 09:42:27 +08:00
|
|
|
static int // bool
|
|
|
|
newline_in_string(Parser *p, const char *cur)
|
|
|
|
{
|
2020-06-06 07:52:27 +08:00
|
|
|
for (const char *c = cur; c >= p->tok->buf; c--) {
|
|
|
|
if (*c == '\'' || *c == '"') {
|
2020-04-29 09:42:27 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check that the source for a single input statement really is a single
|
|
|
|
statement by looking at what is left in the buffer after parsing.
|
|
|
|
Trailing whitespace and comments are OK. */
|
|
|
|
static int // bool
|
|
|
|
bad_single_statement(Parser *p)
|
|
|
|
{
|
|
|
|
const char *cur = strchr(p->tok->buf, '\n');
|
|
|
|
|
|
|
|
/* Newlines are allowed if preceded by a line continuation character
|
|
|
|
or if they appear inside a string. */
|
2020-10-26 07:03:41 +08:00
|
|
|
if (!cur || (cur != p->tok->buf && *(cur - 1) == '\\')
|
|
|
|
|| newline_in_string(p, cur)) {
|
2020-04-29 09:42:27 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
char c = *cur;
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
while (c == ' ' || c == '\t' || c == '\n' || c == '\014') {
|
|
|
|
c = *++cur;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!c) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (c != '#') {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Suck up comment. */
|
|
|
|
while (c && c != '\n') {
|
|
|
|
c = *++cur;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-23 06:29:27 +08:00
|
|
|
void
|
|
|
|
_PyPegen_Parser_Free(Parser *p)
|
|
|
|
{
|
|
|
|
Py_XDECREF(p->normalize);
|
|
|
|
for (int i = 0; i < p->size; i++) {
|
|
|
|
PyMem_Free(p->tokens[i]);
|
|
|
|
}
|
|
|
|
PyMem_Free(p->tokens);
|
2020-05-01 03:12:19 +08:00
|
|
|
growable_comment_array_deallocate(&p->type_ignore_comments);
|
2020-04-23 06:29:27 +08:00
|
|
|
PyMem_Free(p);
|
|
|
|
}
|
|
|
|
|
2020-04-28 01:02:07 +08:00
|
|
|
static int
|
|
|
|
compute_parser_flags(PyCompilerFlags *flags)
|
|
|
|
{
|
|
|
|
int parser_flags = 0;
|
|
|
|
if (!flags) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) {
|
|
|
|
parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
|
|
|
|
}
|
|
|
|
if (flags->cf_flags & PyCF_IGNORE_COOKIE) {
|
|
|
|
parser_flags |= PyPARSE_IGNORE_COOKIE;
|
|
|
|
}
|
|
|
|
if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) {
|
|
|
|
parser_flags |= PyPARSE_BARRY_AS_BDFL;
|
|
|
|
}
|
|
|
|
if (flags->cf_flags & PyCF_TYPE_COMMENTS) {
|
|
|
|
parser_flags |= PyPARSE_TYPE_COMMENTS;
|
|
|
|
}
|
2020-06-28 08:33:49 +08:00
|
|
|
if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) {
|
2020-05-01 11:27:52 +08:00
|
|
|
parser_flags |= PyPARSE_ASYNC_HACKS;
|
|
|
|
}
|
2020-04-28 01:02:07 +08:00
|
|
|
return parser_flags;
|
|
|
|
}
|
|
|
|
|
2020-04-23 06:29:27 +08:00
|
|
|
Parser *
|
2020-04-28 01:02:07 +08:00
|
|
|
_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
|
2020-05-01 11:27:52 +08:00
|
|
|
int feature_version, int *errcode, PyArena *arena)
|
2020-04-23 06:29:27 +08:00
|
|
|
{
|
|
|
|
Parser *p = PyMem_Malloc(sizeof(Parser));
|
|
|
|
if (p == NULL) {
|
2020-04-23 23:36:06 +08:00
|
|
|
return (Parser *) PyErr_NoMemory();
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
assert(tok != NULL);
|
2020-05-02 00:42:32 +08:00
|
|
|
tok->type_comments = (flags & PyPARSE_TYPE_COMMENTS) > 0;
|
|
|
|
tok->async_hacks = (flags & PyPARSE_ASYNC_HACKS) > 0;
|
2020-04-23 06:29:27 +08:00
|
|
|
p->tok = tok;
|
|
|
|
p->keywords = NULL;
|
|
|
|
p->n_keyword_lists = -1;
|
2021-04-16 04:38:45 +08:00
|
|
|
p->soft_keywords = NULL;
|
2020-04-23 06:29:27 +08:00
|
|
|
p->tokens = PyMem_Malloc(sizeof(Token *));
|
|
|
|
if (!p->tokens) {
|
|
|
|
PyMem_Free(p);
|
2020-04-23 23:36:06 +08:00
|
|
|
return (Parser *) PyErr_NoMemory();
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
2020-05-01 03:12:19 +08:00
|
|
|
p->tokens[0] = PyMem_Calloc(1, sizeof(Token));
|
2020-04-23 23:36:06 +08:00
|
|
|
if (!p->tokens) {
|
|
|
|
PyMem_Free(p->tokens);
|
|
|
|
PyMem_Free(p);
|
|
|
|
return (Parser *) PyErr_NoMemory();
|
|
|
|
}
|
2020-05-01 03:12:19 +08:00
|
|
|
if (!growable_comment_array_init(&p->type_ignore_comments, 10)) {
|
|
|
|
PyMem_Free(p->tokens[0]);
|
|
|
|
PyMem_Free(p->tokens);
|
|
|
|
PyMem_Free(p);
|
|
|
|
return (Parser *) PyErr_NoMemory();
|
|
|
|
}
|
|
|
|
|
2020-04-23 06:29:27 +08:00
|
|
|
p->mark = 0;
|
|
|
|
p->fill = 0;
|
|
|
|
p->size = 1;
|
|
|
|
|
|
|
|
p->errcode = errcode;
|
|
|
|
p->arena = arena;
|
|
|
|
p->start_rule = start_rule;
|
|
|
|
p->parsing_started = 0;
|
|
|
|
p->normalize = NULL;
|
|
|
|
p->error_indicator = 0;
|
|
|
|
|
|
|
|
p->starting_lineno = 0;
|
|
|
|
p->starting_col_offset = 0;
|
2020-04-28 01:02:07 +08:00
|
|
|
p->flags = flags;
|
2020-05-01 11:27:52 +08:00
|
|
|
p->feature_version = feature_version;
|
2020-05-07 18:37:51 +08:00
|
|
|
p->known_err_token = NULL;
|
2020-05-26 01:38:45 +08:00
|
|
|
p->level = 0;
|
2020-10-27 06:42:04 +08:00
|
|
|
p->call_invalid_rules = 0;
|
2021-05-22 01:34:54 +08:00
|
|
|
p->in_raw_rule = 0;
|
2020-04-23 06:29:27 +08:00
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
2020-10-27 06:42:04 +08:00
|
|
|
static void
|
|
|
|
reset_parser_state(Parser *p)
|
|
|
|
{
|
|
|
|
for (int i = 0; i < p->fill; i++) {
|
|
|
|
p->tokens[i]->memo = NULL;
|
|
|
|
}
|
|
|
|
p->mark = 0;
|
|
|
|
p->call_invalid_rules = 1;
|
2021-05-23 06:05:00 +08:00
|
|
|
// Don't try to get extra tokens in interactive mode when trying to
|
|
|
|
// raise specialized errors in the second pass.
|
|
|
|
p->tok->interactive_underflow = IUNDERFLOW_STOP;
|
2020-10-27 06:42:04 +08:00
|
|
|
}
|
|
|
|
|
2021-01-20 07:59:33 +08:00
|
|
|
static int
|
|
|
|
_PyPegen_check_tokenizer_errors(Parser *p) {
|
|
|
|
// Tokenize the whole input to see if there are any tokenization
|
|
|
|
// errors such as mistmatching parentheses. These will get priority
|
|
|
|
// over generic syntax errors only if the line number of the error is
|
|
|
|
// before the one that we had for the generic error.
|
|
|
|
|
|
|
|
// We don't want to tokenize to the end for interactive input
|
|
|
|
if (p->tok->prompt != NULL) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-06-09 03:02:03 +08:00
|
|
|
PyObject *type, *value, *traceback;
|
|
|
|
PyErr_Fetch(&type, &value, &traceback);
|
|
|
|
|
2021-01-20 07:59:33 +08:00
|
|
|
Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
|
|
|
|
Py_ssize_t current_err_line = current_token->lineno;
|
|
|
|
|
2021-06-09 03:02:03 +08:00
|
|
|
int ret = 0;
|
|
|
|
|
2021-01-20 07:59:33 +08:00
|
|
|
for (;;) {
|
|
|
|
const char *start;
|
|
|
|
const char *end;
|
|
|
|
switch (PyTokenizer_Get(p->tok, &start, &end)) {
|
|
|
|
case ERRORTOKEN:
|
|
|
|
if (p->tok->level != 0) {
|
|
|
|
int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
|
|
|
|
if (current_err_line > error_lineno) {
|
|
|
|
raise_unclosed_parentheses_error(p);
|
2021-06-09 03:02:03 +08:00
|
|
|
ret = -1;
|
|
|
|
goto exit;
|
2021-01-20 07:59:33 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ENDMARKER:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2021-06-09 03:02:03 +08:00
|
|
|
|
|
|
|
exit:
|
|
|
|
if (PyErr_Occurred()) {
|
|
|
|
Py_XDECREF(value);
|
|
|
|
Py_XDECREF(type);
|
|
|
|
Py_XDECREF(traceback);
|
|
|
|
} else {
|
|
|
|
PyErr_Restore(type, value, traceback);
|
|
|
|
}
|
|
|
|
return ret;
|
2021-01-20 07:59:33 +08:00
|
|
|
}
|
|
|
|
|
2020-04-23 06:29:27 +08:00
|
|
|
void *
|
|
|
|
_PyPegen_run_parser(Parser *p)
|
|
|
|
{
|
|
|
|
void *res = _PyPegen_parse(p);
|
|
|
|
if (res == NULL) {
|
2021-05-21 23:09:51 +08:00
|
|
|
Token *last_token = p->tokens[p->fill - 1];
|
2020-10-27 06:42:04 +08:00
|
|
|
reset_parser_state(p);
|
|
|
|
_PyPegen_parse(p);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (PyErr_Occurred()) {
|
2021-06-08 19:25:22 +08:00
|
|
|
// Prioritize tokenizer errors to custom syntax errors raised
|
|
|
|
// on the second phase only if the errors come from the parser.
|
|
|
|
if (p->tok->done != E_ERROR && PyErr_ExceptionMatches(PyExc_SyntaxError)) {
|
2021-05-04 08:32:46 +08:00
|
|
|
_PyPegen_check_tokenizer_errors(p);
|
|
|
|
}
|
2020-04-23 06:29:27 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (p->fill == 0) {
|
|
|
|
RAISE_SYNTAX_ERROR("error at start before reading any input");
|
|
|
|
}
|
2021-03-14 11:38:40 +08:00
|
|
|
else if (p->tok->done == E_EOF) {
|
2021-01-20 07:59:33 +08:00
|
|
|
if (p->tok->level) {
|
|
|
|
raise_unclosed_parentheses_error(p);
|
|
|
|
} else {
|
|
|
|
RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
|
|
|
|
}
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (p->tokens[p->fill-1]->type == INDENT) {
|
|
|
|
RAISE_INDENTATION_ERROR("unexpected indent");
|
|
|
|
}
|
|
|
|
else if (p->tokens[p->fill-1]->type == DEDENT) {
|
|
|
|
RAISE_INDENTATION_ERROR("unexpected unindent");
|
|
|
|
}
|
|
|
|
else {
|
2021-05-21 23:09:51 +08:00
|
|
|
// Use the last token we found on the first pass to avoid reporting
|
|
|
|
// incorrect locations for generic syntax errors just because we reached
|
|
|
|
// further away when trying to find specific syntax errors in the second
|
|
|
|
// pass.
|
|
|
|
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax");
|
2021-01-21 03:11:56 +08:00
|
|
|
// _PyPegen_check_tokenizer_errors will override the existing
|
|
|
|
// generic SyntaxError we just raised if errors are found.
|
|
|
|
_PyPegen_check_tokenizer_errors(p);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-04-29 09:42:27 +08:00
|
|
|
if (p->start_rule == Py_single_input && bad_single_statement(p)) {
|
|
|
|
p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future
|
|
|
|
return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement");
|
|
|
|
}
|
|
|
|
|
2021-03-18 09:46:06 +08:00
|
|
|
// test_peg_generator defines _Py_TEST_PEGEN to not call PyAST_Validate()
|
|
|
|
#if defined(Py_DEBUG) && !defined(_Py_TEST_PEGEN)
|
2020-07-28 06:46:59 +08:00
|
|
|
if (p->start_rule == Py_single_input ||
|
|
|
|
p->start_rule == Py_file_input ||
|
|
|
|
p->start_rule == Py_eval_input)
|
|
|
|
{
|
2021-03-18 21:57:49 +08:00
|
|
|
if (!_PyAST_Validate(res)) {
|
2020-10-30 19:48:41 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
2020-07-28 06:46:59 +08:00
|
|
|
}
|
|
|
|
#endif
|
2020-04-23 06:29:27 +08:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
mod_ty
|
|
|
|
_PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob,
|
|
|
|
const char *enc, const char *ps1, const char *ps2,
|
2020-04-28 01:02:07 +08:00
|
|
|
PyCompilerFlags *flags, int *errcode, PyArena *arena)
|
2020-04-23 06:29:27 +08:00
|
|
|
{
|
|
|
|
struct tok_state *tok = PyTokenizer_FromFile(fp, enc, ps1, ps2);
|
|
|
|
if (tok == NULL) {
|
|
|
|
if (PyErr_Occurred()) {
|
|
|
|
raise_tokenizer_init_error(filename_ob);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
2021-03-14 11:38:40 +08:00
|
|
|
if (!tok->fp || ps1 != NULL || ps2 != NULL ||
|
|
|
|
PyUnicode_CompareWithASCIIString(filename_ob, "<stdin>") == 0) {
|
|
|
|
tok->fp_interactive = 1;
|
|
|
|
}
|
2020-04-23 06:29:27 +08:00
|
|
|
// This transfers the ownership to the tokenizer
|
|
|
|
tok->filename = filename_ob;
|
|
|
|
Py_INCREF(filename_ob);
|
|
|
|
|
|
|
|
// From here on we need to clean up even if there's an error
|
|
|
|
mod_ty result = NULL;
|
|
|
|
|
2020-04-28 01:02:07 +08:00
|
|
|
int parser_flags = compute_parser_flags(flags);
|
2020-05-01 11:27:52 +08:00
|
|
|
Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION,
|
|
|
|
errcode, arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (p == NULL) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
result = _PyPegen_run_parser(p);
|
|
|
|
_PyPegen_Parser_Free(p);
|
|
|
|
|
|
|
|
error:
|
|
|
|
PyTokenizer_Free(tok);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
mod_ty
|
|
|
|
_PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob,
|
2020-04-28 01:02:07 +08:00
|
|
|
PyCompilerFlags *flags, PyArena *arena)
|
2020-04-23 06:29:27 +08:00
|
|
|
{
|
|
|
|
int exec_input = start_rule == Py_file_input;
|
|
|
|
|
|
|
|
struct tok_state *tok;
|
2020-04-28 01:02:07 +08:00
|
|
|
if (flags == NULL || flags->cf_flags & PyCF_IGNORE_COOKIE) {
|
2020-04-23 06:29:27 +08:00
|
|
|
tok = PyTokenizer_FromUTF8(str, exec_input);
|
|
|
|
} else {
|
|
|
|
tok = PyTokenizer_FromString(str, exec_input);
|
|
|
|
}
|
|
|
|
if (tok == NULL) {
|
|
|
|
if (PyErr_Occurred()) {
|
|
|
|
raise_tokenizer_init_error(filename_ob);
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
// This transfers the ownership to the tokenizer
|
|
|
|
tok->filename = filename_ob;
|
|
|
|
Py_INCREF(filename_ob);
|
|
|
|
|
|
|
|
// We need to clear up from here on
|
|
|
|
mod_ty result = NULL;
|
|
|
|
|
2020-04-28 01:02:07 +08:00
|
|
|
int parser_flags = compute_parser_flags(flags);
|
2020-06-28 08:33:49 +08:00
|
|
|
int feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ?
|
|
|
|
flags->cf_feature_version : PY_MINOR_VERSION;
|
2020-05-01 11:27:52 +08:00
|
|
|
Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version,
|
|
|
|
NULL, arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (p == NULL) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
result = _PyPegen_run_parser(p);
|
|
|
|
_PyPegen_Parser_Free(p);
|
|
|
|
|
|
|
|
error:
|
|
|
|
PyTokenizer_Free(tok);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_stmt_seq*
|
2020-04-23 06:29:27 +08:00
|
|
|
_PyPegen_interactive_exit(Parser *p)
|
|
|
|
{
|
|
|
|
if (p->errcode) {
|
|
|
|
*(p->errcode) = E_EOF;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Creates a single-element asdl_seq* that contains a */
|
|
|
|
asdl_seq *
|
|
|
|
_PyPegen_singleton_seq(Parser *p, void *a)
|
|
|
|
{
|
|
|
|
assert(a != NULL);
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_seq *seq = (asdl_seq*)_Py_asdl_generic_seq_new(1, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_seq_SET_UNTYPED(seq, 0, a);
|
2020-04-23 06:29:27 +08:00
|
|
|
return seq;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Creates a copy of seq and prepends a to it */
|
|
|
|
asdl_seq *
|
|
|
|
_PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq)
|
|
|
|
{
|
|
|
|
assert(a != NULL);
|
|
|
|
if (!seq) {
|
|
|
|
return _PyPegen_singleton_seq(p, a);
|
|
|
|
}
|
|
|
|
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!new_seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_seq_SET_UNTYPED(new_seq, 0, a);
|
2020-04-23 10:43:08 +08:00
|
|
|
for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i - 1));
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
return new_seq;
|
|
|
|
}
|
|
|
|
|
2020-05-01 03:12:19 +08:00
|
|
|
/* Creates a copy of seq and appends a to it */
|
|
|
|
asdl_seq *
|
|
|
|
_PyPegen_seq_append_to_end(Parser *p, asdl_seq *seq, void *a)
|
|
|
|
{
|
|
|
|
assert(a != NULL);
|
|
|
|
if (!seq) {
|
|
|
|
return _PyPegen_singleton_seq(p, a);
|
|
|
|
}
|
|
|
|
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
|
2020-05-01 03:12:19 +08:00
|
|
|
if (!new_seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (Py_ssize_t i = 0, l = asdl_seq_LEN(new_seq); i + 1 < l; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i));
|
2020-05-01 03:12:19 +08:00
|
|
|
}
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_seq_SET_UNTYPED(new_seq, asdl_seq_LEN(new_seq) - 1, a);
|
2020-05-01 03:12:19 +08:00
|
|
|
return new_seq;
|
|
|
|
}
|
|
|
|
|
2020-04-23 10:43:08 +08:00
|
|
|
static Py_ssize_t
|
2020-04-23 06:29:27 +08:00
|
|
|
_get_flattened_seq_size(asdl_seq *seqs)
|
|
|
|
{
|
2020-04-23 10:43:08 +08:00
|
|
|
Py_ssize_t size = 0;
|
2020-04-23 06:29:27 +08:00
|
|
|
for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
|
2020-04-23 06:29:27 +08:00
|
|
|
size += asdl_seq_LEN(inner_seq);
|
|
|
|
}
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Flattens an asdl_seq* of asdl_seq*s */
|
|
|
|
asdl_seq *
|
|
|
|
_PyPegen_seq_flatten(Parser *p, asdl_seq *seqs)
|
|
|
|
{
|
2020-04-23 10:43:08 +08:00
|
|
|
Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs);
|
2020-04-23 06:29:27 +08:00
|
|
|
assert(flattened_seq_size > 0);
|
|
|
|
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_seq *flattened_seq = (asdl_seq*)_Py_asdl_generic_seq_new(flattened_seq_size, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!flattened_seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int flattened_seq_idx = 0;
|
|
|
|
for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
|
2020-04-23 10:43:08 +08:00
|
|
|
for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_seq_SET_UNTYPED(flattened_seq, flattened_seq_idx++, asdl_seq_GET_UNTYPED(inner_seq, j));
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(flattened_seq_idx == flattened_seq_size);
|
|
|
|
|
|
|
|
return flattened_seq;
|
|
|
|
}
|
|
|
|
|
bpo-43914: Highlight invalid ranges in SyntaxErrors (#25525)
To improve the user experience understanding what part of the error messages associated with SyntaxErrors is wrong, we can highlight the whole error range and not only place the caret at the first character. In this way:
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^
SyntaxError: Generator expression must be parenthesized
becomes
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
2021-04-23 21:27:05 +08:00
|
|
|
void *
|
|
|
|
_PyPegen_seq_last_item(asdl_seq *seq)
|
|
|
|
{
|
|
|
|
Py_ssize_t len = asdl_seq_LEN(seq);
|
|
|
|
return asdl_seq_GET_UNTYPED(seq, len - 1);
|
|
|
|
}
|
|
|
|
|
2021-06-24 23:09:57 +08:00
|
|
|
void *
|
|
|
|
_PyPegen_seq_first_item(asdl_seq *seq)
|
|
|
|
{
|
|
|
|
return asdl_seq_GET_UNTYPED(seq, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-04-23 06:29:27 +08:00
|
|
|
/* Creates a new name of the form <first_name>.<second_name> */
|
|
|
|
expr_ty
|
|
|
|
_PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
|
|
|
|
{
|
|
|
|
assert(first_name != NULL && second_name != NULL);
|
|
|
|
PyObject *first_identifier = first_name->v.Name.id;
|
|
|
|
PyObject *second_identifier = second_name->v.Name.id;
|
|
|
|
|
|
|
|
if (PyUnicode_READY(first_identifier) == -1) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (PyUnicode_READY(second_identifier) == -1) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
const char *first_str = PyUnicode_AsUTF8(first_identifier);
|
|
|
|
if (!first_str) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
const char *second_str = PyUnicode_AsUTF8(second_identifier);
|
|
|
|
if (!second_str) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2020-04-24 08:13:33 +08:00
|
|
|
Py_ssize_t len = strlen(first_str) + strlen(second_str) + 1; // +1 for the dot
|
2020-04-23 06:29:27 +08:00
|
|
|
|
|
|
|
PyObject *str = PyBytes_FromStringAndSize(NULL, len);
|
|
|
|
if (!str) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *s = PyBytes_AS_STRING(str);
|
|
|
|
if (!s) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
strcpy(s, first_str);
|
|
|
|
s += strlen(first_str);
|
|
|
|
*s++ = '.';
|
|
|
|
strcpy(s, second_str);
|
|
|
|
s += strlen(second_str);
|
|
|
|
*s = '\0';
|
|
|
|
|
|
|
|
PyObject *uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), PyBytes_GET_SIZE(str), NULL);
|
|
|
|
Py_DECREF(str);
|
|
|
|
if (!uni) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
PyUnicode_InternInPlace(&uni);
|
2021-03-24 09:23:01 +08:00
|
|
|
if (_PyArena_AddPyObject(p->arena, uni) < 0) {
|
2020-04-23 06:29:27 +08:00
|
|
|
Py_DECREF(uni);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_Name(uni, Load, EXTRA_EXPR(first_name, second_name));
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Counts the total number of dots in seq's tokens */
|
|
|
|
int
|
|
|
|
_PyPegen_seq_count_dots(asdl_seq *seq)
|
|
|
|
{
|
|
|
|
int number_of_dots = 0;
|
|
|
|
for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
Token *current_expr = asdl_seq_GET_UNTYPED(seq, i);
|
2020-04-23 06:29:27 +08:00
|
|
|
switch (current_expr->type) {
|
|
|
|
case ELLIPSIS:
|
|
|
|
number_of_dots += 3;
|
|
|
|
break;
|
|
|
|
case DOT:
|
|
|
|
number_of_dots += 1;
|
|
|
|
break;
|
|
|
|
default:
|
2020-04-23 23:36:06 +08:00
|
|
|
Py_UNREACHABLE();
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return number_of_dots;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Creates an alias with '*' as the identifier name */
|
|
|
|
alias_ty
|
2021-04-11 04:56:28 +08:00
|
|
|
_PyPegen_alias_for_star(Parser *p, int lineno, int col_offset, int end_lineno,
|
|
|
|
int end_col_offset, PyArena *arena) {
|
2020-04-23 06:29:27 +08:00
|
|
|
PyObject *str = PyUnicode_InternFromString("*");
|
|
|
|
if (!str) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2021-03-24 09:23:01 +08:00
|
|
|
if (_PyArena_AddPyObject(p->arena, str) < 0) {
|
2020-04-23 06:29:27 +08:00
|
|
|
Py_DECREF(str);
|
|
|
|
return NULL;
|
|
|
|
}
|
2021-04-11 04:56:28 +08:00
|
|
|
return _PyAST_alias(str, NULL, lineno, col_offset, end_lineno, end_col_offset, arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Creates a new asdl_seq* with the identifiers of all the names in seq */
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_identifier_seq *
|
|
|
|
_PyPegen_map_names_to_ids(Parser *p, asdl_expr_seq *seq)
|
2020-04-23 06:29:27 +08:00
|
|
|
{
|
2020-04-23 10:43:08 +08:00
|
|
|
Py_ssize_t len = asdl_seq_LEN(seq);
|
2020-04-23 06:29:27 +08:00
|
|
|
assert(len > 0);
|
|
|
|
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_identifier_seq *new_seq = _Py_asdl_identifier_seq_new(len, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!new_seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
for (Py_ssize_t i = 0; i < len; i++) {
|
|
|
|
expr_ty e = asdl_seq_GET(seq, i);
|
|
|
|
asdl_seq_SET(new_seq, i, e->v.Name.id);
|
|
|
|
}
|
|
|
|
return new_seq;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Constructs a CmpopExprPair */
|
|
|
|
CmpopExprPair *
|
|
|
|
_PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr)
|
|
|
|
{
|
|
|
|
assert(expr != NULL);
|
2021-03-24 09:23:01 +08:00
|
|
|
CmpopExprPair *a = _PyArena_Malloc(p->arena, sizeof(CmpopExprPair));
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!a) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
a->cmpop = cmpop;
|
|
|
|
a->expr = expr;
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
|
|
|
|
asdl_int_seq *
|
|
|
|
_PyPegen_get_cmpops(Parser *p, asdl_seq *seq)
|
|
|
|
{
|
2020-04-23 10:43:08 +08:00
|
|
|
Py_ssize_t len = asdl_seq_LEN(seq);
|
2020-04-23 06:29:27 +08:00
|
|
|
assert(len > 0);
|
|
|
|
|
|
|
|
asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena);
|
|
|
|
if (!new_seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
for (Py_ssize_t i = 0; i < len; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
|
2020-04-23 06:29:27 +08:00
|
|
|
asdl_seq_SET(new_seq, i, pair->cmpop);
|
|
|
|
}
|
|
|
|
return new_seq;
|
|
|
|
}
|
|
|
|
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_expr_seq *
|
2020-04-23 06:29:27 +08:00
|
|
|
_PyPegen_get_exprs(Parser *p, asdl_seq *seq)
|
|
|
|
{
|
2020-04-23 10:43:08 +08:00
|
|
|
Py_ssize_t len = asdl_seq_LEN(seq);
|
2020-04-23 06:29:27 +08:00
|
|
|
assert(len > 0);
|
|
|
|
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!new_seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
for (Py_ssize_t i = 0; i < len; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
|
2020-04-23 06:29:27 +08:00
|
|
|
asdl_seq_SET(new_seq, i, pair->expr);
|
|
|
|
}
|
|
|
|
return new_seq;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Creates an asdl_seq* where all the elements have been changed to have ctx as context */
|
2020-09-17 02:42:00 +08:00
|
|
|
static asdl_expr_seq *
|
|
|
|
_set_seq_context(Parser *p, asdl_expr_seq *seq, expr_context_ty ctx)
|
2020-04-23 06:29:27 +08:00
|
|
|
{
|
2020-04-23 10:43:08 +08:00
|
|
|
Py_ssize_t len = asdl_seq_LEN(seq);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (len == 0) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!new_seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
for (Py_ssize_t i = 0; i < len; i++) {
|
|
|
|
expr_ty e = asdl_seq_GET(seq, i);
|
|
|
|
asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx));
|
|
|
|
}
|
|
|
|
return new_seq;
|
|
|
|
}
|
|
|
|
|
|
|
|
static expr_ty
|
|
|
|
_set_name_context(Parser *p, expr_ty e, expr_context_ty ctx)
|
|
|
|
{
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e));
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static expr_ty
|
|
|
|
_set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx)
|
|
|
|
{
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_Tuple(
|
2020-09-17 02:42:00 +08:00
|
|
|
_set_seq_context(p, e->v.Tuple.elts, ctx),
|
|
|
|
ctx,
|
|
|
|
EXTRA_EXPR(e, e));
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static expr_ty
|
|
|
|
_set_list_context(Parser *p, expr_ty e, expr_context_ty ctx)
|
|
|
|
{
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_List(
|
2020-09-17 02:42:00 +08:00
|
|
|
_set_seq_context(p, e->v.List.elts, ctx),
|
|
|
|
ctx,
|
|
|
|
EXTRA_EXPR(e, e));
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static expr_ty
|
|
|
|
_set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx)
|
|
|
|
{
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_Subscript(e->v.Subscript.value, e->v.Subscript.slice,
|
|
|
|
ctx, EXTRA_EXPR(e, e));
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static expr_ty
|
|
|
|
_set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx)
|
|
|
|
{
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_Attribute(e->v.Attribute.value, e->v.Attribute.attr,
|
|
|
|
ctx, EXTRA_EXPR(e, e));
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static expr_ty
|
|
|
|
_set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx)
|
|
|
|
{
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx),
|
|
|
|
ctx, EXTRA_EXPR(e, e));
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */
|
|
|
|
expr_ty
|
|
|
|
_PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx)
|
|
|
|
{
|
|
|
|
assert(expr != NULL);
|
|
|
|
|
|
|
|
expr_ty new = NULL;
|
|
|
|
switch (expr->kind) {
|
|
|
|
case Name_kind:
|
|
|
|
new = _set_name_context(p, expr, ctx);
|
|
|
|
break;
|
|
|
|
case Tuple_kind:
|
|
|
|
new = _set_tuple_context(p, expr, ctx);
|
|
|
|
break;
|
|
|
|
case List_kind:
|
|
|
|
new = _set_list_context(p, expr, ctx);
|
|
|
|
break;
|
|
|
|
case Subscript_kind:
|
|
|
|
new = _set_subscript_context(p, expr, ctx);
|
|
|
|
break;
|
|
|
|
case Attribute_kind:
|
|
|
|
new = _set_attribute_context(p, expr, ctx);
|
|
|
|
break;
|
|
|
|
case Starred_kind:
|
|
|
|
new = _set_starred_context(p, expr, ctx);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
new = expr;
|
|
|
|
}
|
|
|
|
return new;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */
|
|
|
|
KeyValuePair *
|
|
|
|
_PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value)
|
|
|
|
{
|
2021-03-24 09:23:01 +08:00
|
|
|
KeyValuePair *a = _PyArena_Malloc(p->arena, sizeof(KeyValuePair));
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!a) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
a->key = key;
|
|
|
|
a->value = value;
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Extracts all keys from an asdl_seq* of KeyValuePair*'s */
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_expr_seq *
|
2020-04-23 06:29:27 +08:00
|
|
|
_PyPegen_get_keys(Parser *p, asdl_seq *seq)
|
|
|
|
{
|
2020-04-23 10:43:08 +08:00
|
|
|
Py_ssize_t len = asdl_seq_LEN(seq);
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!new_seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
for (Py_ssize_t i = 0; i < len; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
|
2020-04-23 06:29:27 +08:00
|
|
|
asdl_seq_SET(new_seq, i, pair->key);
|
|
|
|
}
|
|
|
|
return new_seq;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Extracts all values from an asdl_seq* of KeyValuePair*'s */
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_expr_seq *
|
2020-04-23 06:29:27 +08:00
|
|
|
_PyPegen_get_values(Parser *p, asdl_seq *seq)
|
|
|
|
{
|
2020-04-23 10:43:08 +08:00
|
|
|
Py_ssize_t len = asdl_seq_LEN(seq);
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!new_seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
for (Py_ssize_t i = 0; i < len; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
|
2020-04-23 06:29:27 +08:00
|
|
|
asdl_seq_SET(new_seq, i, pair->value);
|
|
|
|
}
|
|
|
|
return new_seq;
|
|
|
|
}
|
|
|
|
|
2021-04-29 13:58:44 +08:00
|
|
|
/* Constructs a KeyPatternPair that is used when parsing mapping & class patterns */
|
|
|
|
KeyPatternPair *
|
|
|
|
_PyPegen_key_pattern_pair(Parser *p, expr_ty key, pattern_ty pattern)
|
|
|
|
{
|
|
|
|
KeyPatternPair *a = _PyArena_Malloc(p->arena, sizeof(KeyPatternPair));
|
|
|
|
if (!a) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
a->key = key;
|
|
|
|
a->pattern = pattern;
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Extracts all keys from an asdl_seq* of KeyPatternPair*'s */
|
|
|
|
asdl_expr_seq *
|
|
|
|
_PyPegen_get_pattern_keys(Parser *p, asdl_seq *seq)
|
|
|
|
{
|
|
|
|
Py_ssize_t len = asdl_seq_LEN(seq);
|
|
|
|
asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
|
|
|
|
if (!new_seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
for (Py_ssize_t i = 0; i < len; i++) {
|
|
|
|
KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
|
|
|
|
asdl_seq_SET(new_seq, i, pair->key);
|
|
|
|
}
|
|
|
|
return new_seq;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Extracts all patterns from an asdl_seq* of KeyPatternPair*'s */
|
|
|
|
asdl_pattern_seq *
|
|
|
|
_PyPegen_get_patterns(Parser *p, asdl_seq *seq)
|
|
|
|
{
|
|
|
|
Py_ssize_t len = asdl_seq_LEN(seq);
|
|
|
|
asdl_pattern_seq *new_seq = _Py_asdl_pattern_seq_new(len, p->arena);
|
|
|
|
if (!new_seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
for (Py_ssize_t i = 0; i < len; i++) {
|
|
|
|
KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
|
|
|
|
asdl_seq_SET(new_seq, i, pair->pattern);
|
|
|
|
}
|
|
|
|
return new_seq;
|
|
|
|
}
|
|
|
|
|
2020-04-23 06:29:27 +08:00
|
|
|
/* Constructs a NameDefaultPair */
|
|
|
|
NameDefaultPair *
|
2020-05-01 03:12:19 +08:00
|
|
|
_PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value, Token *tc)
|
2020-04-23 06:29:27 +08:00
|
|
|
{
|
2021-03-24 09:23:01 +08:00
|
|
|
NameDefaultPair *a = _PyArena_Malloc(p->arena, sizeof(NameDefaultPair));
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!a) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2020-05-01 03:12:19 +08:00
|
|
|
a->arg = _PyPegen_add_type_comment_to_arg(p, arg, tc);
|
2020-04-23 06:29:27 +08:00
|
|
|
a->value = value;
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Constructs a SlashWithDefault */
|
|
|
|
SlashWithDefault *
|
2020-09-17 02:42:00 +08:00
|
|
|
_PyPegen_slash_with_default(Parser *p, asdl_arg_seq *plain_names, asdl_seq *names_with_defaults)
|
2020-04-23 06:29:27 +08:00
|
|
|
{
|
2021-03-24 09:23:01 +08:00
|
|
|
SlashWithDefault *a = _PyArena_Malloc(p->arena, sizeof(SlashWithDefault));
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!a) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
a->plain_names = plain_names;
|
|
|
|
a->names_with_defaults = names_with_defaults;
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Constructs a StarEtc */
|
|
|
|
StarEtc *
|
|
|
|
_PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg)
|
|
|
|
{
|
2021-03-24 09:23:01 +08:00
|
|
|
StarEtc *a = _PyArena_Malloc(p->arena, sizeof(StarEtc));
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!a) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
a->vararg = vararg;
|
|
|
|
a->kwonlyargs = kwonlyargs;
|
|
|
|
a->kwarg = kwarg;
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
|
|
|
|
asdl_seq *
|
|
|
|
_PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b)
|
|
|
|
{
|
2020-04-23 10:43:08 +08:00
|
|
|
Py_ssize_t first_len = asdl_seq_LEN(a);
|
|
|
|
Py_ssize_t second_len = asdl_seq_LEN(b);
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(first_len + second_len, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!new_seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int k = 0;
|
|
|
|
for (Py_ssize_t i = 0; i < first_len; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(a, i));
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
for (Py_ssize_t i = 0; i < second_len; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(b, i));
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return new_seq;
|
|
|
|
}
|
|
|
|
|
2020-09-17 02:42:00 +08:00
|
|
|
static asdl_arg_seq*
|
2020-04-23 06:29:27 +08:00
|
|
|
_get_names(Parser *p, asdl_seq *names_with_defaults)
|
|
|
|
{
|
2020-04-23 10:43:08 +08:00
|
|
|
Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_arg_seq *seq = _Py_asdl_arg_seq_new(len, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
for (Py_ssize_t i = 0; i < len; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
|
2020-04-23 06:29:27 +08:00
|
|
|
asdl_seq_SET(seq, i, pair->arg);
|
|
|
|
}
|
|
|
|
return seq;
|
|
|
|
}
|
|
|
|
|
2020-09-17 02:42:00 +08:00
|
|
|
static asdl_expr_seq *
|
2020-04-23 06:29:27 +08:00
|
|
|
_get_defaults(Parser *p, asdl_seq *names_with_defaults)
|
|
|
|
{
|
2020-04-23 10:43:08 +08:00
|
|
|
Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_expr_seq *seq = _Py_asdl_expr_seq_new(len, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
for (Py_ssize_t i = 0; i < len; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
|
2020-04-23 06:29:27 +08:00
|
|
|
asdl_seq_SET(seq, i, pair->value);
|
|
|
|
}
|
|
|
|
return seq;
|
|
|
|
}
|
|
|
|
|
2021-04-09 07:48:53 +08:00
|
|
|
static int
|
|
|
|
_make_posonlyargs(Parser *p,
|
|
|
|
asdl_arg_seq *slash_without_default,
|
|
|
|
SlashWithDefault *slash_with_default,
|
|
|
|
asdl_arg_seq **posonlyargs) {
|
2020-04-23 06:29:27 +08:00
|
|
|
if (slash_without_default != NULL) {
|
2021-04-09 07:48:53 +08:00
|
|
|
*posonlyargs = slash_without_default;
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
else if (slash_with_default != NULL) {
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_arg_seq *slash_with_default_names =
|
2021-04-09 07:48:53 +08:00
|
|
|
_get_names(p, slash_with_default->names_with_defaults);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!slash_with_default_names) {
|
2021-04-09 07:48:53 +08:00
|
|
|
return -1;
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
2021-04-09 07:48:53 +08:00
|
|
|
*posonlyargs = (asdl_arg_seq*)_PyPegen_join_sequences(
|
2020-09-17 02:42:00 +08:00
|
|
|
p,
|
|
|
|
(asdl_seq*)slash_with_default->plain_names,
|
|
|
|
(asdl_seq*)slash_with_default_names);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
else {
|
2021-04-09 07:48:53 +08:00
|
|
|
*posonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
2021-04-09 07:48:53 +08:00
|
|
|
return *posonlyargs == NULL ? -1 : 0;
|
|
|
|
}
|
2020-04-23 06:29:27 +08:00
|
|
|
|
2021-04-09 07:48:53 +08:00
|
|
|
static int
|
|
|
|
_make_posargs(Parser *p,
|
|
|
|
asdl_arg_seq *plain_names,
|
|
|
|
asdl_seq *names_with_default,
|
|
|
|
asdl_arg_seq **posargs) {
|
2020-04-23 06:29:27 +08:00
|
|
|
if (plain_names != NULL && names_with_default != NULL) {
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!names_with_default_names) {
|
2021-04-09 07:48:53 +08:00
|
|
|
return -1;
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
2021-04-09 07:48:53 +08:00
|
|
|
*posargs = (asdl_arg_seq*)_PyPegen_join_sequences(
|
|
|
|
p,(asdl_seq*)plain_names, (asdl_seq*)names_with_default_names);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
else if (plain_names == NULL && names_with_default != NULL) {
|
2021-04-09 07:48:53 +08:00
|
|
|
*posargs = _get_names(p, names_with_default);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
else if (plain_names != NULL && names_with_default == NULL) {
|
2021-04-09 07:48:53 +08:00
|
|
|
*posargs = plain_names;
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
else {
|
2021-04-09 07:48:53 +08:00
|
|
|
*posargs = _Py_asdl_arg_seq_new(0, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
2021-04-09 07:48:53 +08:00
|
|
|
return *posargs == NULL ? -1 : 0;
|
|
|
|
}
|
2020-04-23 06:29:27 +08:00
|
|
|
|
2021-04-09 07:48:53 +08:00
|
|
|
static int
|
|
|
|
_make_posdefaults(Parser *p,
|
|
|
|
SlashWithDefault *slash_with_default,
|
|
|
|
asdl_seq *names_with_default,
|
|
|
|
asdl_expr_seq **posdefaults) {
|
2020-04-23 06:29:27 +08:00
|
|
|
if (slash_with_default != NULL && names_with_default != NULL) {
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_expr_seq *slash_with_default_values =
|
2021-04-09 07:48:53 +08:00
|
|
|
_get_defaults(p, slash_with_default->names_with_defaults);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!slash_with_default_values) {
|
2021-04-09 07:48:53 +08:00
|
|
|
return -1;
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_expr_seq *names_with_default_values = _get_defaults(p, names_with_default);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!names_with_default_values) {
|
2021-04-09 07:48:53 +08:00
|
|
|
return -1;
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
2021-04-09 07:48:53 +08:00
|
|
|
*posdefaults = (asdl_expr_seq*)_PyPegen_join_sequences(
|
2020-09-17 02:42:00 +08:00
|
|
|
p,
|
|
|
|
(asdl_seq*)slash_with_default_values,
|
|
|
|
(asdl_seq*)names_with_default_values);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
else if (slash_with_default == NULL && names_with_default != NULL) {
|
2021-04-09 07:48:53 +08:00
|
|
|
*posdefaults = _get_defaults(p, names_with_default);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
else if (slash_with_default != NULL && names_with_default == NULL) {
|
2021-04-09 07:48:53 +08:00
|
|
|
*posdefaults = _get_defaults(p, slash_with_default->names_with_defaults);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
else {
|
2021-04-09 07:48:53 +08:00
|
|
|
*posdefaults = _Py_asdl_expr_seq_new(0, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
2021-04-09 07:48:53 +08:00
|
|
|
return *posdefaults == NULL ? -1 : 0;
|
|
|
|
}
|
2020-04-23 06:29:27 +08:00
|
|
|
|
2021-04-09 07:48:53 +08:00
|
|
|
static int
|
|
|
|
_make_kwargs(Parser *p, StarEtc *star_etc,
|
|
|
|
asdl_arg_seq **kwonlyargs,
|
|
|
|
asdl_expr_seq **kwdefaults) {
|
2020-04-23 06:29:27 +08:00
|
|
|
if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
|
2021-04-09 07:48:53 +08:00
|
|
|
*kwonlyargs = _get_names(p, star_etc->kwonlyargs);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
else {
|
2021-04-09 07:48:53 +08:00
|
|
|
*kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (*kwonlyargs == NULL) {
|
|
|
|
return -1;
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
|
2021-04-09 07:48:53 +08:00
|
|
|
*kwdefaults = _get_defaults(p, star_etc->kwonlyargs);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
else {
|
2021-04-09 07:48:53 +08:00
|
|
|
*kwdefaults = _Py_asdl_expr_seq_new(0, p->arena);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (*kwdefaults == NULL) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */
|
|
|
|
arguments_ty
|
|
|
|
_PyPegen_make_arguments(Parser *p, asdl_arg_seq *slash_without_default,
|
|
|
|
SlashWithDefault *slash_with_default, asdl_arg_seq *plain_names,
|
|
|
|
asdl_seq *names_with_default, StarEtc *star_etc)
|
|
|
|
{
|
|
|
|
asdl_arg_seq *posonlyargs;
|
|
|
|
if (_make_posonlyargs(p, slash_without_default, slash_with_default, &posonlyargs) == -1) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
asdl_arg_seq *posargs;
|
|
|
|
if (_make_posargs(p, plain_names, names_with_default, &posargs) == -1) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
asdl_expr_seq *posdefaults;
|
|
|
|
if (_make_posdefaults(p,slash_with_default, names_with_default, &posdefaults) == -1) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
arg_ty vararg = NULL;
|
|
|
|
if (star_etc != NULL && star_etc->vararg != NULL) {
|
|
|
|
vararg = star_etc->vararg;
|
|
|
|
}
|
|
|
|
|
|
|
|
asdl_arg_seq *kwonlyargs;
|
|
|
|
asdl_expr_seq *kwdefaults;
|
|
|
|
if (_make_kwargs(p, star_etc, &kwonlyargs, &kwdefaults) == -1) {
|
|
|
|
return NULL;
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
arg_ty kwarg = NULL;
|
|
|
|
if (star_etc != NULL && star_etc->kwarg != NULL) {
|
|
|
|
kwarg = star_etc->kwarg;
|
|
|
|
}
|
|
|
|
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_arguments(posonlyargs, posargs, vararg, kwonlyargs,
|
|
|
|
kwdefaults, kwarg, posdefaults, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
2021-04-09 07:48:53 +08:00
|
|
|
|
2020-04-23 06:29:27 +08:00
|
|
|
/* Constructs an empty arguments_ty object, that gets used when a function accepts no
|
|
|
|
* arguments. */
|
|
|
|
arguments_ty
|
|
|
|
_PyPegen_empty_arguments(Parser *p)
|
|
|
|
{
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_arg_seq *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!posonlyargs) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_arg_seq *posargs = _Py_asdl_arg_seq_new(0, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!posargs) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_expr_seq *posdefaults = _Py_asdl_expr_seq_new(0, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!posdefaults) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_arg_seq *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!kwonlyargs) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_expr_seq *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!kwdefaults) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_arguments(posonlyargs, posargs, NULL, kwonlyargs,
|
|
|
|
kwdefaults, NULL, posdefaults, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Encapsulates the value of an operator_ty into an AugOperator struct */
|
|
|
|
AugOperator *
|
|
|
|
_PyPegen_augoperator(Parser *p, operator_ty kind)
|
|
|
|
{
|
2021-03-24 09:23:01 +08:00
|
|
|
AugOperator *a = _PyArena_Malloc(p->arena, sizeof(AugOperator));
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!a) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
a->kind = kind;
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Construct a FunctionDef equivalent to function_def, but with decorators */
|
|
|
|
stmt_ty
|
2020-09-17 02:42:00 +08:00
|
|
|
_PyPegen_function_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty function_def)
|
2020-04-23 06:29:27 +08:00
|
|
|
{
|
|
|
|
assert(function_def != NULL);
|
|
|
|
if (function_def->kind == AsyncFunctionDef_kind) {
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_AsyncFunctionDef(
|
2020-04-23 06:29:27 +08:00
|
|
|
function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
|
|
|
|
function_def->v.FunctionDef.body, decorators, function_def->v.FunctionDef.returns,
|
|
|
|
function_def->v.FunctionDef.type_comment, function_def->lineno,
|
|
|
|
function_def->col_offset, function_def->end_lineno, function_def->end_col_offset,
|
|
|
|
p->arena);
|
|
|
|
}
|
|
|
|
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_FunctionDef(
|
|
|
|
function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
|
|
|
|
function_def->v.FunctionDef.body, decorators,
|
|
|
|
function_def->v.FunctionDef.returns,
|
|
|
|
function_def->v.FunctionDef.type_comment, function_def->lineno,
|
|
|
|
function_def->col_offset, function_def->end_lineno,
|
|
|
|
function_def->end_col_offset, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Construct a ClassDef equivalent to class_def, but with decorators */
|
|
|
|
stmt_ty
|
2020-09-17 02:42:00 +08:00
|
|
|
_PyPegen_class_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty class_def)
|
2020-04-23 06:29:27 +08:00
|
|
|
{
|
|
|
|
assert(class_def != NULL);
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_ClassDef(
|
|
|
|
class_def->v.ClassDef.name, class_def->v.ClassDef.bases,
|
|
|
|
class_def->v.ClassDef.keywords, class_def->v.ClassDef.body, decorators,
|
|
|
|
class_def->lineno, class_def->col_offset, class_def->end_lineno,
|
|
|
|
class_def->end_col_offset, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Construct a KeywordOrStarred */
|
|
|
|
KeywordOrStarred *
|
|
|
|
_PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword)
|
|
|
|
{
|
2021-03-24 09:23:01 +08:00
|
|
|
KeywordOrStarred *a = _PyArena_Malloc(p->arena, sizeof(KeywordOrStarred));
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!a) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
a->element = element;
|
|
|
|
a->is_keyword = is_keyword;
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */
|
|
|
|
static int
|
|
|
|
_seq_number_of_starred_exprs(asdl_seq *seq)
|
|
|
|
{
|
|
|
|
int n = 0;
|
|
|
|
for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
KeywordOrStarred *k = asdl_seq_GET_UNTYPED(seq, i);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!k->is_keyword) {
|
|
|
|
n++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_expr_seq *
|
2020-04-23 06:29:27 +08:00
|
|
|
_PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs)
|
|
|
|
{
|
|
|
|
int new_len = _seq_number_of_starred_exprs(kwargs);
|
|
|
|
if (new_len == 0) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(new_len, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!new_seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int idx = 0;
|
|
|
|
for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!k->is_keyword) {
|
|
|
|
asdl_seq_SET(new_seq, idx++, k->element);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return new_seq;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Return a new asdl_seq* with only the keywords in kwargs */
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_keyword_seq*
|
2020-04-23 06:29:27 +08:00
|
|
|
_PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
|
|
|
|
{
|
2020-04-23 10:43:08 +08:00
|
|
|
Py_ssize_t len = asdl_seq_LEN(kwargs);
|
|
|
|
Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (new_len == 0) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_keyword_seq *new_seq = _Py_asdl_keyword_seq_new(new_len, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (!new_seq) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int idx = 0;
|
|
|
|
for (Py_ssize_t i = 0; i < len; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
|
2020-04-23 06:29:27 +08:00
|
|
|
if (k->is_keyword) {
|
|
|
|
asdl_seq_SET(new_seq, idx++, k->element);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return new_seq;
|
|
|
|
}
|
|
|
|
|
|
|
|
expr_ty
|
|
|
|
_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings)
|
|
|
|
{
|
2020-04-23 10:43:08 +08:00
|
|
|
Py_ssize_t len = asdl_seq_LEN(strings);
|
2020-04-23 06:29:27 +08:00
|
|
|
assert(len > 0);
|
|
|
|
|
2020-09-17 02:42:00 +08:00
|
|
|
Token *first = asdl_seq_GET_UNTYPED(strings, 0);
|
|
|
|
Token *last = asdl_seq_GET_UNTYPED(strings, len - 1);
|
2020-04-23 06:29:27 +08:00
|
|
|
|
|
|
|
int bytesmode = 0;
|
|
|
|
PyObject *bytes_str = NULL;
|
|
|
|
|
|
|
|
FstringParser state;
|
|
|
|
_PyPegen_FstringParser_Init(&state);
|
|
|
|
|
|
|
|
for (Py_ssize_t i = 0; i < len; i++) {
|
2020-09-17 02:42:00 +08:00
|
|
|
Token *t = asdl_seq_GET_UNTYPED(strings, i);
|
2020-04-23 06:29:27 +08:00
|
|
|
|
|
|
|
int this_bytesmode;
|
|
|
|
int this_rawmode;
|
|
|
|
PyObject *s;
|
|
|
|
const char *fstr;
|
|
|
|
Py_ssize_t fstrlen = -1;
|
|
|
|
|
2020-05-07 18:37:51 +08:00
|
|
|
if (_PyPegen_parsestr(p, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen, t) != 0) {
|
2020-04-23 06:29:27 +08:00
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check that we are not mixing bytes with unicode. */
|
|
|
|
if (i != 0 && bytesmode != this_bytesmode) {
|
|
|
|
RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
|
|
|
|
Py_XDECREF(s);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
bytesmode = this_bytesmode;
|
|
|
|
|
|
|
|
if (fstr != NULL) {
|
|
|
|
assert(s == NULL && !bytesmode);
|
|
|
|
|
|
|
|
int result = _PyPegen_FstringParser_ConcatFstring(p, &state, &fstr, fstr + fstrlen,
|
|
|
|
this_rawmode, 0, first, t, last);
|
|
|
|
if (result < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* String or byte string. */
|
|
|
|
assert(s != NULL && fstr == NULL);
|
|
|
|
assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
|
|
|
|
|
|
|
|
if (bytesmode) {
|
|
|
|
if (i == 0) {
|
|
|
|
bytes_str = s;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
PyBytes_ConcatAndDel(&bytes_str, s);
|
|
|
|
if (!bytes_str) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* This is a regular string. Concatenate it. */
|
|
|
|
if (_PyPegen_FstringParser_ConcatAndDel(&state, s) < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bytesmode) {
|
2021-03-24 09:23:01 +08:00
|
|
|
if (_PyArena_AddPyObject(p->arena, bytes_str) < 0) {
|
2020-04-23 06:29:27 +08:00
|
|
|
goto error;
|
|
|
|
}
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_Constant(bytes_str, NULL, first->lineno,
|
|
|
|
first->col_offset, last->end_lineno,
|
|
|
|
last->end_col_offset, p->arena);
|
2020-04-23 06:29:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return _PyPegen_FstringParser_Finish(p, &state, first, last);
|
|
|
|
|
|
|
|
error:
|
|
|
|
Py_XDECREF(bytes_str);
|
|
|
|
_PyPegen_FstringParser_Dealloc(&state);
|
|
|
|
if (PyErr_Occurred()) {
|
|
|
|
raise_decode_error(p);
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
2020-05-01 03:12:19 +08:00
|
|
|
|
2021-04-29 13:58:44 +08:00
|
|
|
expr_ty
|
|
|
|
_PyPegen_ensure_imaginary(Parser *p, expr_ty exp)
|
|
|
|
{
|
|
|
|
if (exp->kind != Constant_kind || !PyComplex_CheckExact(exp->v.Constant.value)) {
|
2021-04-30 08:19:28 +08:00
|
|
|
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "imaginary number required in complex literal");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return exp;
|
|
|
|
}
|
|
|
|
|
|
|
|
expr_ty
|
|
|
|
_PyPegen_ensure_real(Parser *p, expr_ty exp)
|
|
|
|
{
|
|
|
|
if (exp->kind != Constant_kind || PyComplex_CheckExact(exp->v.Constant.value)) {
|
|
|
|
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "real number required in complex literal");
|
2021-04-29 13:58:44 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return exp;
|
|
|
|
}
|
|
|
|
|
2020-05-01 03:12:19 +08:00
|
|
|
mod_ty
|
2020-09-17 02:42:00 +08:00
|
|
|
_PyPegen_make_module(Parser *p, asdl_stmt_seq *a) {
|
|
|
|
asdl_type_ignore_seq *type_ignores = NULL;
|
2020-05-01 03:12:19 +08:00
|
|
|
Py_ssize_t num = p->type_ignore_comments.num_items;
|
|
|
|
if (num > 0) {
|
|
|
|
// Turn the raw (comment, lineno) pairs into TypeIgnore objects in the arena
|
2020-09-17 02:42:00 +08:00
|
|
|
type_ignores = _Py_asdl_type_ignore_seq_new(num, p->arena);
|
2020-05-01 03:12:19 +08:00
|
|
|
if (type_ignores == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
for (int i = 0; i < num; i++) {
|
|
|
|
PyObject *tag = _PyPegen_new_type_comment(p, p->type_ignore_comments.items[i].comment);
|
|
|
|
if (tag == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2021-04-08 03:34:22 +08:00
|
|
|
type_ignore_ty ti = _PyAST_TypeIgnore(p->type_ignore_comments.items[i].lineno,
|
|
|
|
tag, p->arena);
|
2020-05-01 03:12:19 +08:00
|
|
|
if (ti == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
asdl_seq_SET(type_ignores, i, ti);
|
|
|
|
}
|
|
|
|
}
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_Module(a, type_ignores, p->arena);
|
2020-05-01 03:12:19 +08:00
|
|
|
}
|
2020-05-15 09:04:52 +08:00
|
|
|
|
|
|
|
// Error reporting helpers
|
|
|
|
|
|
|
|
expr_ty
|
2020-06-19 07:10:43 +08:00
|
|
|
_PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type)
|
2020-05-15 09:04:52 +08:00
|
|
|
{
|
|
|
|
if (e == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define VISIT_CONTAINER(CONTAINER, TYPE) do { \
|
2021-04-09 08:17:31 +08:00
|
|
|
Py_ssize_t len = asdl_seq_LEN((CONTAINER)->v.TYPE.elts);\
|
2020-05-15 09:04:52 +08:00
|
|
|
for (Py_ssize_t i = 0; i < len; i++) {\
|
2021-04-09 08:17:31 +08:00
|
|
|
expr_ty other = asdl_seq_GET((CONTAINER)->v.TYPE.elts, i);\
|
2020-06-19 07:10:43 +08:00
|
|
|
expr_ty child = _PyPegen_get_invalid_target(other, targets_type);\
|
2020-05-15 09:04:52 +08:00
|
|
|
if (child != NULL) {\
|
|
|
|
return child;\
|
|
|
|
}\
|
|
|
|
}\
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
// We only need to visit List and Tuple nodes recursively as those
|
|
|
|
// are the only ones that can contain valid names in targets when
|
|
|
|
// they are parsed as expressions. Any other kind of expression
|
|
|
|
// that is a container (like Sets or Dicts) is directly invalid and
|
|
|
|
// we don't need to visit it recursively.
|
|
|
|
|
|
|
|
switch (e->kind) {
|
2020-06-19 07:10:43 +08:00
|
|
|
case List_kind:
|
2020-05-15 09:04:52 +08:00
|
|
|
VISIT_CONTAINER(e, List);
|
|
|
|
return NULL;
|
2020-06-19 07:10:43 +08:00
|
|
|
case Tuple_kind:
|
2020-05-15 09:04:52 +08:00
|
|
|
VISIT_CONTAINER(e, Tuple);
|
|
|
|
return NULL;
|
|
|
|
case Starred_kind:
|
2020-06-19 07:10:43 +08:00
|
|
|
if (targets_type == DEL_TARGETS) {
|
|
|
|
return e;
|
|
|
|
}
|
|
|
|
return _PyPegen_get_invalid_target(e->v.Starred.value, targets_type);
|
|
|
|
case Compare_kind:
|
|
|
|
// This is needed, because the `a in b` in `for a in b` gets parsed
|
|
|
|
// as a comparison, and so we need to search the left side of the comparison
|
|
|
|
// for invalid targets.
|
|
|
|
if (targets_type == FOR_TARGETS) {
|
|
|
|
cmpop_ty cmpop = (cmpop_ty) asdl_seq_GET(e->v.Compare.ops, 0);
|
|
|
|
if (cmpop == In) {
|
|
|
|
return _PyPegen_get_invalid_target(e->v.Compare.left, targets_type);
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return e;
|
2020-05-15 09:04:52 +08:00
|
|
|
case Name_kind:
|
|
|
|
case Subscript_kind:
|
|
|
|
case Attribute_kind:
|
|
|
|
return NULL;
|
|
|
|
default:
|
|
|
|
return e;
|
|
|
|
}
|
2020-05-19 03:14:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) {
|
|
|
|
int kwarg_unpacking = 0;
|
|
|
|
for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) {
|
|
|
|
keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i);
|
|
|
|
if (!keyword->arg) {
|
|
|
|
kwarg_unpacking = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *msg = NULL;
|
|
|
|
if (kwarg_unpacking) {
|
|
|
|
msg = "positional argument follows keyword argument unpacking";
|
|
|
|
} else {
|
|
|
|
msg = "positional argument follows keyword argument";
|
|
|
|
}
|
|
|
|
|
|
|
|
return RAISE_SYNTAX_ERROR(msg);
|
|
|
|
}
|
2020-05-22 08:56:52 +08:00
|
|
|
|
|
|
|
void *
|
|
|
|
_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args)
|
|
|
|
{
|
|
|
|
/* The rule that calls this function is 'args for_if_clauses'.
|
|
|
|
For the input f(L, x for x in y), L and x are in args and
|
|
|
|
the for is parsed as a for_if_clause. We have to check if
|
|
|
|
len <= 1, so that input like dict((a, b) for a, b in x)
|
|
|
|
gets successfully parsed and then we pass the last
|
|
|
|
argument (x in the above example) as the location of the
|
|
|
|
error */
|
|
|
|
Py_ssize_t len = asdl_seq_LEN(args->v.Call.args);
|
|
|
|
if (len <= 1) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
bpo-43914: Highlight invalid ranges in SyntaxErrors (#25525)
To improve the user experience understanding what part of the error messages associated with SyntaxErrors is wrong, we can highlight the whole error range and not only place the caret at the first character. In this way:
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^
SyntaxError: Generator expression must be parenthesized
becomes
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
2021-04-23 21:27:05 +08:00
|
|
|
return RAISE_SYNTAX_ERROR_STARTING_FROM(
|
2020-05-22 08:56:52 +08:00
|
|
|
(expr_ty) asdl_seq_GET(args->v.Call.args, len - 1),
|
|
|
|
"Generator expression must be parenthesized"
|
|
|
|
);
|
|
|
|
}
|
2020-09-03 00:44:19 +08:00
|
|
|
|
|
|
|
|
2020-09-17 02:42:00 +08:00
|
|
|
expr_ty _PyPegen_collect_call_seqs(Parser *p, asdl_expr_seq *a, asdl_seq *b,
|
2020-09-03 22:29:32 +08:00
|
|
|
int lineno, int col_offset, int end_lineno,
|
|
|
|
int end_col_offset, PyArena *arena) {
|
2020-09-03 00:44:19 +08:00
|
|
|
Py_ssize_t args_len = asdl_seq_LEN(a);
|
|
|
|
Py_ssize_t total_len = args_len;
|
|
|
|
|
|
|
|
if (b == NULL) {
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_Call(_PyPegen_dummy_name(p), a, NULL, lineno, col_offset,
|
2020-09-03 22:29:32 +08:00
|
|
|
end_lineno, end_col_offset, arena);
|
2020-09-03 00:44:19 +08:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_expr_seq *starreds = _PyPegen_seq_extract_starred_exprs(p, b);
|
|
|
|
asdl_keyword_seq *keywords = _PyPegen_seq_delete_starred_exprs(p, b);
|
2020-09-03 00:44:19 +08:00
|
|
|
|
|
|
|
if (starreds) {
|
|
|
|
total_len += asdl_seq_LEN(starreds);
|
|
|
|
}
|
|
|
|
|
2020-09-17 02:42:00 +08:00
|
|
|
asdl_expr_seq *args = _Py_asdl_expr_seq_new(total_len, arena);
|
2020-09-03 00:44:19 +08:00
|
|
|
|
|
|
|
Py_ssize_t i = 0;
|
|
|
|
for (i = 0; i < args_len; i++) {
|
|
|
|
asdl_seq_SET(args, i, asdl_seq_GET(a, i));
|
|
|
|
}
|
|
|
|
for (; i < total_len; i++) {
|
|
|
|
asdl_seq_SET(args, i, asdl_seq_GET(starreds, i - args_len));
|
|
|
|
}
|
|
|
|
|
2021-04-08 03:34:22 +08:00
|
|
|
return _PyAST_Call(_PyPegen_dummy_name(p), args, keywords, lineno,
|
|
|
|
col_offset, end_lineno, end_col_offset, arena);
|
2020-09-03 00:44:19 +08:00
|
|
|
}
|