cpython/Modules/parsermodule.c
Pablo Galindo 9a0000d15d
bpo-36256: Fix bug in parsermodule when parsing if statements (GH-12477)
bpo-36256: Fix bug in parsermodule when parsing if statements

In the parser module, when validating nodes before starting the parsing with to create a ST in "parser_newstobject" there is a problem that appears when two arcs in the same DFA state has transitions with labels with the same type. For example, the DFA for if_stmt has a state with
two labels with the same type: "elif" and "else" (type NAME). The algorithm tries one by one the arcs until the label that starts the arc transition has a label with the same type of the current child label we are trying to accept. In this case, the arc for "elif" comes before the arc for "else"and passes this test (because the current child label is "else" and has the same type as "elif"). This lead to expecting a namedexpr_test (305) instead of a colon (11). The solution is to compare also the string representation (in case there is one) of the labels to see if the transition that we have is the correct one.
2019-03-21 23:33:02 +00:00

1219 lines
40 KiB
C

/* parsermodule.c
*
* Copyright 1995-1996 by Fred L. Drake, Jr. and Virginia Polytechnic
* Institute and State University, Blacksburg, Virginia, USA.
* Portions copyright 1991-1995 by Stichting Mathematisch Centrum,
* Amsterdam, The Netherlands. Copying is permitted under the terms
* associated with the main Python distribution, with the additional
* restriction that this additional notice be included and maintained
* on all distributed copies.
*
* This module serves to replace the original parser module written
* by Guido. The functionality is not matched precisely, but the
* original may be implemented on top of this. This is desirable
* since the source of the text to be parsed is now divorced from
* this interface.
*
* Unlike the prior interface, the ability to give a parse tree
* produced by Python code as a tuple to the compiler is enabled by
* this module. See the documentation for more details.
*
* I've added some annotations that help with the lint code-checking
* program, but they're not complete by a long shot. The real errors
* that lint detects are gone, but there are still warnings with
* Py_[X]DECREF() and Py_[X]INCREF() macros. The lint annotations
* look like "NOTE(...)".
*
* To debug parser errors like
* "parser.ParserError: Expected node type 12, got 333."
* decode symbol numbers using the automatically-generated files
* Lib/symbol.h and Include/token.h.
*/
#include "Python.h" /* general Python API */
#include "Python-ast.h" /* mod_ty */
#undef Yield /* undefine macro conflicting with <winbase.h> */
#include "ast.h"
#include "graminit.h" /* symbols defined in the grammar */
#include "node.h" /* internal parser structure */
#include "errcode.h" /* error codes for PyNode_*() */
#include "token.h" /* token definitions */
/* ISTERMINAL() / ISNONTERMINAL() */
#include "grammar.h"
#include "parsetok.h"
extern grammar _PyParser_Grammar; /* From graminit.c */
#ifdef lint
#include <note.h>
#else
#define NOTE(x)
#endif
/* String constants used to initialize module attributes.
*
*/
static const char parser_copyright_string[] =
"Copyright 1995-1996 by Virginia Polytechnic Institute & State\n\
University, Blacksburg, Virginia, USA, and Fred L. Drake, Jr., Reston,\n\
Virginia, USA. Portions copyright 1991-1995 by Stichting Mathematisch\n\
Centrum, Amsterdam, The Netherlands.";
PyDoc_STRVAR(parser_doc_string,
"This is an interface to Python's internal parser.");
static const char parser_version_string[] = "0.5";
typedef PyObject* (*SeqMaker) (Py_ssize_t length);
typedef int (*SeqInserter) (PyObject* sequence,
Py_ssize_t index,
PyObject* element);
/* The function below is copyrighted by Stichting Mathematisch Centrum. The
* original copyright statement is included below, and continues to apply
* in full to the function immediately following. All other material is
* original, copyrighted by Fred L. Drake, Jr. and Virginia Polytechnic
* Institute and State University. Changes were made to comply with the
* new naming conventions. Added arguments to provide support for creating
* lists as well as tuples, and optionally including the line numbers.
*/
static PyObject*
node2tuple(node *n, /* node to convert */
SeqMaker mkseq, /* create sequence */
SeqInserter addelem, /* func. to add elem. in seq. */
int lineno, /* include line numbers? */
int col_offset) /* include column offsets? */
{
PyObject *result = NULL, *w;
if (n == NULL) {
Py_RETURN_NONE;
}
if (ISNONTERMINAL(TYPE(n))) {
int i;
result = mkseq(1 + NCH(n) + (TYPE(n) == encoding_decl));
if (result == NULL)
goto error;
w = PyLong_FromLong(TYPE(n));
if (w == NULL)
goto error;
(void) addelem(result, 0, w);
for (i = 0; i < NCH(n); i++) {
w = node2tuple(CHILD(n, i), mkseq, addelem, lineno, col_offset);
if (w == NULL)
goto error;
(void) addelem(result, i+1, w);
}
if (TYPE(n) == encoding_decl) {
w = PyUnicode_FromString(STR(n));
if (w == NULL)
goto error;
(void) addelem(result, i+1, w);
}
}
else if (ISTERMINAL(TYPE(n))) {
result = mkseq(2 + lineno + col_offset);
if (result == NULL)
goto error;
w = PyLong_FromLong(TYPE(n));
if (w == NULL)
goto error;
(void) addelem(result, 0, w);
w = PyUnicode_FromString(STR(n));
if (w == NULL)
goto error;
(void) addelem(result, 1, w);
if (lineno) {
w = PyLong_FromLong(n->n_lineno);
if (w == NULL)
goto error;
(void) addelem(result, 2, w);
}
if (col_offset) {
w = PyLong_FromLong(n->n_col_offset);
if (w == NULL)
goto error;
(void) addelem(result, 2 + lineno, w);
}
}
else {
PyErr_SetString(PyExc_SystemError,
"unrecognized parse tree node type");
return ((PyObject*) NULL);
}
return result;
error:
Py_XDECREF(result);
return NULL;
}
/*
* End of material copyrighted by Stichting Mathematisch Centrum.
*/
/* There are two types of intermediate objects we're interested in:
* 'eval' and 'exec' types. These constants can be used in the st_type
* field of the object type to identify which any given object represents.
* These should probably go in an external header to allow other extensions
* to use them, but then, we really should be using C++ too. ;-)
*/
#define PyST_EXPR 1
#define PyST_SUITE 2
/* These are the internal objects and definitions required to implement the
* ST type. Most of the internal names are more reminiscent of the 'old'
* naming style, but the code uses the new naming convention.
*/
static PyObject*
parser_error = 0;
typedef struct {
PyObject_HEAD /* standard object header */
node* st_node; /* the node* returned by the parser */
int st_type; /* EXPR or SUITE ? */
PyCompilerFlags st_flags; /* Parser and compiler flags */
} PyST_Object;
static void parser_free(PyST_Object *st);
static PyObject* parser_sizeof(PyST_Object *, void *);
static PyObject* parser_richcompare(PyObject *left, PyObject *right, int op);
static PyObject* parser_compilest(PyST_Object *, PyObject *, PyObject *);
static PyObject* parser_isexpr(PyST_Object *, PyObject *, PyObject *);
static PyObject* parser_issuite(PyST_Object *, PyObject *, PyObject *);
static PyObject* parser_st2list(PyST_Object *, PyObject *, PyObject *);
static PyObject* parser_st2tuple(PyST_Object *, PyObject *, PyObject *);
#define PUBLIC_METHOD_TYPE (METH_VARARGS|METH_KEYWORDS)
static PyMethodDef parser_methods[] = {
{"compile", (PyCFunction)(void(*)(void))parser_compilest, PUBLIC_METHOD_TYPE,
PyDoc_STR("Compile this ST object into a code object.")},
{"isexpr", (PyCFunction)(void(*)(void))parser_isexpr, PUBLIC_METHOD_TYPE,
PyDoc_STR("Determines if this ST object was created from an expression.")},
{"issuite", (PyCFunction)(void(*)(void))parser_issuite, PUBLIC_METHOD_TYPE,
PyDoc_STR("Determines if this ST object was created from a suite.")},
{"tolist", (PyCFunction)(void(*)(void))parser_st2list, PUBLIC_METHOD_TYPE,
PyDoc_STR("Creates a list-tree representation of this ST.")},
{"totuple", (PyCFunction)(void(*)(void))parser_st2tuple, PUBLIC_METHOD_TYPE,
PyDoc_STR("Creates a tuple-tree representation of this ST.")},
{"__sizeof__", (PyCFunction)parser_sizeof, METH_NOARGS,
PyDoc_STR("Returns size in memory, in bytes.")},
{NULL, NULL, 0, NULL}
};
static
PyTypeObject PyST_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"parser.st", /* tp_name */
(int) sizeof(PyST_Object), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)parser_free, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
/* Functions to access object as input/output buffer */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
/* __doc__ */
"Intermediate representation of a Python parse tree.",
0, /* tp_traverse */
0, /* tp_clear */
parser_richcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
parser_methods, /* tp_methods */
}; /* PyST_Type */
/* PyST_Type isn't subclassable, so just check ob_type */
#define PyST_Object_Check(v) ((v)->ob_type == &PyST_Type)
static int
parser_compare_nodes(node *left, node *right)
{
int j;
if (TYPE(left) < TYPE(right))
return (-1);
if (TYPE(right) < TYPE(left))
return (1);
if (ISTERMINAL(TYPE(left)))
return (strcmp(STR(left), STR(right)));
if (NCH(left) < NCH(right))
return (-1);
if (NCH(right) < NCH(left))
return (1);
for (j = 0; j < NCH(left); ++j) {
int v = parser_compare_nodes(CHILD(left, j), CHILD(right, j));
if (v != 0)
return (v);
}
return (0);
}
/* parser_richcompare(PyObject* left, PyObject* right, int op)
*
* Comparison function used by the Python operators ==, !=, <, >, <=, >=
* This really just wraps a call to parser_compare_nodes() with some easy
* checks and protection code.
*
*/
static PyObject *
parser_richcompare(PyObject *left, PyObject *right, int op)
{
int result;
/* neither argument should be NULL, unless something's gone wrong */
if (left == NULL || right == NULL) {
PyErr_BadInternalCall();
return NULL;
}
/* both arguments should be instances of PyST_Object */
if (!PyST_Object_Check(left) || !PyST_Object_Check(right)) {
Py_RETURN_NOTIMPLEMENTED;
}
if (left == right)
/* if arguments are identical, they're equal */
result = 0;
else
result = parser_compare_nodes(((PyST_Object *)left)->st_node,
((PyST_Object *)right)->st_node);
Py_RETURN_RICHCOMPARE(result, 0, op);
}
/* parser_newstobject(node* st)
*
* Allocates a new Python object representing an ST. This is simply the
* 'wrapper' object that holds a node* and allows it to be passed around in
* Python code.
*
*/
static PyObject*
parser_newstobject(node *st, int type)
{
PyST_Object* o = PyObject_New(PyST_Object, &PyST_Type);
if (o != 0) {
o->st_node = st;
o->st_type = type;
o->st_flags.cf_flags = 0;
o->st_flags.cf_feature_version = PY_MINOR_VERSION;
}
else {
PyNode_Free(st);
}
return ((PyObject*)o);
}
/* void parser_free(PyST_Object* st)
*
* This is called by a del statement that reduces the reference count to 0.
*
*/
static void
parser_free(PyST_Object *st)
{
PyNode_Free(st->st_node);
PyObject_Del(st);
}
static PyObject *
parser_sizeof(PyST_Object *st, void *unused)
{
Py_ssize_t res;
res = _PyObject_SIZE(Py_TYPE(st)) + _PyNode_SizeOf(st->st_node);
return PyLong_FromSsize_t(res);
}
/* parser_st2tuple(PyObject* self, PyObject* args, PyObject* kw)
*
* This provides conversion from a node* to a tuple object that can be
* returned to the Python-level caller. The ST object is not modified.
*
*/
static PyObject*
parser_st2tuple(PyST_Object *self, PyObject *args, PyObject *kw)
{
int line_info = 0;
int col_info = 0;
PyObject *res = 0;
int ok;
static char *keywords[] = {"st", "line_info", "col_info", NULL};
if (self == NULL || PyModule_Check(self)) {
ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|pp:st2tuple", keywords,
&PyST_Type, &self, &line_info,
&col_info);
}
else
ok = PyArg_ParseTupleAndKeywords(args, kw, "|pp:totuple", &keywords[1],
&line_info, &col_info);
if (ok != 0) {
/*
* Convert ST into a tuple representation. Use Guido's function,
* since it's known to work already.
*/
res = node2tuple(((PyST_Object*)self)->st_node,
PyTuple_New, PyTuple_SetItem, line_info, col_info);
}
return (res);
}
/* parser_st2list(PyObject* self, PyObject* args, PyObject* kw)
*
* This provides conversion from a node* to a list object that can be
* returned to the Python-level caller. The ST object is not modified.
*
*/
static PyObject*
parser_st2list(PyST_Object *self, PyObject *args, PyObject *kw)
{
int line_info = 0;
int col_info = 0;
PyObject *res = 0;
int ok;
static char *keywords[] = {"st", "line_info", "col_info", NULL};
if (self == NULL || PyModule_Check(self))
ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|pp:st2list", keywords,
&PyST_Type, &self, &line_info,
&col_info);
else
ok = PyArg_ParseTupleAndKeywords(args, kw, "|pp:tolist", &keywords[1],
&line_info, &col_info);
if (ok) {
/*
* Convert ST into a tuple representation. Use Guido's function,
* since it's known to work already.
*/
res = node2tuple(self->st_node,
PyList_New, PyList_SetItem, line_info, col_info);
}
return (res);
}
/* parser_compilest(PyObject* self, PyObject* args)
*
* This function creates code objects from the parse tree represented by
* the passed-in data object. An optional file name is passed in as well.
*
*/
static PyObject*
parser_compilest(PyST_Object *self, PyObject *args, PyObject *kw)
{
PyObject* res = NULL;
PyArena* arena = NULL;
mod_ty mod;
PyObject* filename = NULL;
int ok;
static char *keywords[] = {"st", "filename", NULL};
if (self == NULL || PyModule_Check(self))
ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|O&:compilest", keywords,
&PyST_Type, &self,
PyUnicode_FSDecoder, &filename);
else
ok = PyArg_ParseTupleAndKeywords(args, kw, "|O&:compile", &keywords[1],
PyUnicode_FSDecoder, &filename);
if (!ok)
goto error;
if (filename == NULL) {
filename = PyUnicode_FromString("<syntax-tree>");
if (filename == NULL)
goto error;
}
arena = PyArena_New();
if (!arena)
goto error;
mod = PyAST_FromNodeObject(self->st_node, &self->st_flags,
filename, arena);
if (!mod)
goto error;
res = (PyObject *)PyAST_CompileObject(mod, filename,
&self->st_flags, -1, arena);
error:
Py_XDECREF(filename);
if (arena != NULL)
PyArena_Free(arena);
return res;
}
/* PyObject* parser_isexpr(PyObject* self, PyObject* args)
* PyObject* parser_issuite(PyObject* self, PyObject* args)
*
* Checks the passed-in ST object to determine if it is an expression or
* a statement suite, respectively. The return is a Python truth value.
*
*/
static PyObject*
parser_isexpr(PyST_Object *self, PyObject *args, PyObject *kw)
{
PyObject* res = 0;
int ok;
static char *keywords[] = {"st", NULL};
if (self == NULL || PyModule_Check(self))
ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:isexpr", keywords,
&PyST_Type, &self);
else
ok = PyArg_ParseTupleAndKeywords(args, kw, ":isexpr", &keywords[1]);
if (ok) {
/* Check to see if the ST represents an expression or not. */
res = (self->st_type == PyST_EXPR) ? Py_True : Py_False;
Py_INCREF(res);
}
return (res);
}
static PyObject*
parser_issuite(PyST_Object *self, PyObject *args, PyObject *kw)
{
PyObject* res = 0;
int ok;
static char *keywords[] = {"st", NULL};
if (self == NULL || PyModule_Check(self))
ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:issuite", keywords,
&PyST_Type, &self);
else
ok = PyArg_ParseTupleAndKeywords(args, kw, ":issuite", &keywords[1]);
if (ok) {
/* Check to see if the ST represents an expression or not. */
res = (self->st_type == PyST_EXPR) ? Py_False : Py_True;
Py_INCREF(res);
}
return (res);
}
/* err_string(const char* message)
*
* Sets the error string for an exception of type ParserError.
*
*/
static void
err_string(const char *message)
{
PyErr_SetString(parser_error, message);
}
/* PyObject* parser_do_parse(PyObject* args, int type)
*
* Internal function to actually execute the parse and return the result if
* successful or set an exception if not.
*
*/
static PyObject*
parser_do_parse(PyObject *args, PyObject *kw, const char *argspec, int type)
{
char* string = 0;
PyObject* res = 0;
int flags = 0;
perrdetail err;
static char *keywords[] = {"source", NULL};
if (PyArg_ParseTupleAndKeywords(args, kw, argspec, keywords, &string)) {
node* n = PyParser_ParseStringFlagsFilenameEx(string, NULL,
&_PyParser_Grammar,
(type == PyST_EXPR)
? eval_input : file_input,
&err, &flags);
if (n) {
res = parser_newstobject(n, type);
if (res) {
((PyST_Object *)res)->st_flags.cf_flags = flags & PyCF_MASK;
((PyST_Object *)res)->st_flags.cf_feature_version = PY_MINOR_VERSION;
}
}
else {
PyParser_SetError(&err);
}
PyParser_ClearError(&err);
}
return (res);
}
/* PyObject* parser_expr(PyObject* self, PyObject* args)
* PyObject* parser_suite(PyObject* self, PyObject* args)
*
* External interfaces to the parser itself. Which is called determines if
* the parser attempts to recognize an expression ('eval' form) or statement
* suite ('exec' form). The real work is done by parser_do_parse() above.
*
*/
static PyObject*
parser_expr(PyST_Object *self, PyObject *args, PyObject *kw)
{
NOTE(ARGUNUSED(self))
return (parser_do_parse(args, kw, "s:expr", PyST_EXPR));
}
static PyObject*
parser_suite(PyST_Object *self, PyObject *args, PyObject *kw)
{
NOTE(ARGUNUSED(self))
return (parser_do_parse(args, kw, "s:suite", PyST_SUITE));
}
/* This is the messy part of the code. Conversion from a tuple to an ST
* object requires that the input tuple be valid without having to rely on
* catching an exception from the compiler. This is done to allow the
* compiler itself to remain fast, since most of its input will come from
* the parser directly, and therefore be known to be syntactically correct.
* This validation is done to ensure that we don't core dump the compile
* phase, returning an exception instead.
*
* Two aspects can be broken out in this code: creating a node tree from
* the tuple passed in, and verifying that it is indeed valid. It may be
* advantageous to expand the number of ST types to include funcdefs and
* lambdadefs to take advantage of the optimizer, recognizing those STs
* here. They are not necessary, and not quite as useful in a raw form.
* For now, let's get expressions and suites working reliably.
*/
static node* build_node_tree(PyObject *tuple);
static int
validate_node(node *tree)
{
int type = TYPE(tree);
int nch = NCH(tree);
dfa *nt_dfa;
state *dfa_state;
int pos, arc;
assert(ISNONTERMINAL(type));
type -= NT_OFFSET;
if (type >= _PyParser_Grammar.g_ndfas) {
PyErr_Format(parser_error, "Unrecognized node type %d.", TYPE(tree));
return 0;
}
nt_dfa = &_PyParser_Grammar.g_dfa[type];
REQ(tree, nt_dfa->d_type);
/* Run the DFA for this nonterminal. */
dfa_state = nt_dfa->d_state;
for (pos = 0; pos < nch; ++pos) {
node *ch = CHILD(tree, pos);
int ch_type = TYPE(ch);
if (ch_type == suite && TYPE(tree) == funcdef) {
/* This is the opposite hack of what we do in parser.c
(search for func_body_suite), except we don't ever
support type comments here. */
ch_type = func_body_suite;
}
for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
short a_label = dfa_state->s_arc[arc].a_lbl;
assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels);
const char *label_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str;
if ((_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type)
&& ((ch->n_str == NULL) || (label_str == NULL)
|| (strcmp(ch->n_str, label_str) == 0))
) {
/* The child is acceptable; if non-terminal, validate it recursively. */
if (ISNONTERMINAL(ch_type) && !validate_node(ch))
return 0;
/* Update the state, and move on to the next child. */
dfa_state = &nt_dfa->d_state[dfa_state->s_arc[arc].a_arrow];
goto arc_found;
}
}
/* What would this state have accepted? */
{
short a_label = dfa_state->s_arc->a_lbl;
if (!a_label) /* Wouldn't accept any more children */
goto illegal_num_children;
int next_type = _PyParser_Grammar.g_ll.ll_label[a_label].lb_type;
const char *expected_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str;
if (ISNONTERMINAL(next_type)) {
PyErr_Format(parser_error, "Expected node type %d, got %d.",
next_type, ch_type);
}
else if (expected_str != NULL) {
PyErr_Format(parser_error, "Illegal terminal: expected '%s'.",
expected_str);
}
else {
PyErr_Format(parser_error, "Illegal terminal: expected %s.",
_PyParser_TokenNames[next_type]);
}
return 0;
}
arc_found:
continue;
}
/* Are we in a final state? If so, return 1 for successful validation. */
for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
if (!dfa_state->s_arc[arc].a_lbl) {
return 1;
}
}
illegal_num_children:
PyErr_Format(parser_error,
"Illegal number of children for %s node.", nt_dfa->d_name);
return 0;
}
/* PyObject* parser_tuple2st(PyObject* self, PyObject* args)
*
* This is the public function, called from the Python code. It receives a
* single tuple object from the caller, and creates an ST object if the
* tuple can be validated. It does this by checking the first code of the
* tuple, and, if acceptable, builds the internal representation. If this
* step succeeds, the internal representation is validated as fully as
* possible with the recursive validate_node() routine defined above.
*
* This function must be changed if support is to be added for PyST_FRAGMENT
* ST objects.
*
*/
static PyObject*
parser_tuple2st(PyST_Object *self, PyObject *args, PyObject *kw)
{
NOTE(ARGUNUSED(self))
PyObject *st = 0;
PyObject *tuple;
node *tree;
static char *keywords[] = {"sequence", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kw, "O:sequence2st", keywords,
&tuple))
return (0);
if (!PySequence_Check(tuple)) {
PyErr_SetString(PyExc_ValueError,
"sequence2st() requires a single sequence argument");
return (0);
}
/*
* Convert the tree to the internal form before checking it.
*/
tree = build_node_tree(tuple);
if (tree != 0) {
node *validation_root = NULL;
int tree_type = 0;
switch (TYPE(tree)) {
case eval_input:
/* Might be an eval form. */
tree_type = PyST_EXPR;
validation_root = tree;
break;
case encoding_decl:
/* This looks like an encoding_decl so far. */
if (NCH(tree) == 1) {
tree_type = PyST_SUITE;
validation_root = CHILD(tree, 0);
}
else {
err_string("Error Parsing encoding_decl");
}
break;
case file_input:
/* This looks like an exec form so far. */
tree_type = PyST_SUITE;
validation_root = tree;
break;
default:
/* This is a fragment, at best. */
err_string("parse tree does not use a valid start symbol");
}
if (validation_root != NULL && validate_node(validation_root))
st = parser_newstobject(tree, tree_type);
else
PyNode_Free(tree);
}
/* Make sure we raise an exception on all errors. We should never
* get this, but we'd do well to be sure something is done.
*/
if (st == NULL && !PyErr_Occurred())
err_string("unspecified ST error occurred");
return st;
}
/* node* build_node_children()
*
* Iterate across the children of the current non-terminal node and build
* their structures. If successful, return the root of this portion of
* the tree, otherwise, 0. Any required exception will be specified already,
* and no memory will have been deallocated.
*
*/
static node*
build_node_children(PyObject *tuple, node *root, int *line_num)
{
Py_ssize_t len = PyObject_Size(tuple);
Py_ssize_t i;
int err;
if (len < 0) {
return NULL;
}
for (i = 1; i < len; ++i) {
/* elem must always be a sequence, however simple */
PyObject* elem = PySequence_GetItem(tuple, i);
int ok = elem != NULL;
int type = 0;
char *strn = 0;
if (ok)
ok = PySequence_Check(elem);
if (ok) {
PyObject *temp = PySequence_GetItem(elem, 0);
if (temp == NULL)
ok = 0;
else {
ok = PyLong_Check(temp);
if (ok) {
type = _PyLong_AsInt(temp);
if (type == -1 && PyErr_Occurred()) {
Py_DECREF(temp);
Py_DECREF(elem);
return NULL;
}
}
Py_DECREF(temp);
}
}
if (!ok) {
PyObject *err = Py_BuildValue("Os", elem,
"Illegal node construct.");
PyErr_SetObject(parser_error, err);
Py_XDECREF(err);
Py_XDECREF(elem);
return NULL;
}
if (ISTERMINAL(type)) {
Py_ssize_t len = PyObject_Size(elem);
PyObject *temp;
const char *temp_str;
if ((len != 2) && (len != 3)) {
err_string("terminal nodes must have 2 or 3 entries");
Py_DECREF(elem);
return NULL;
}
temp = PySequence_GetItem(elem, 1);
if (temp == NULL) {
Py_DECREF(elem);
return NULL;
}
if (!PyUnicode_Check(temp)) {
PyErr_Format(parser_error,
"second item in terminal node must be a string,"
" found %s",
Py_TYPE(temp)->tp_name);
Py_DECREF(temp);
Py_DECREF(elem);
return NULL;
}
if (len == 3) {
PyObject *o = PySequence_GetItem(elem, 2);
if (o == NULL) {
Py_DECREF(temp);
Py_DECREF(elem);
return NULL;
}
if (PyLong_Check(o)) {
int num = _PyLong_AsInt(o);
if (num == -1 && PyErr_Occurred()) {
Py_DECREF(o);
Py_DECREF(temp);
Py_DECREF(elem);
return NULL;
}
*line_num = num;
}
else {
PyErr_Format(parser_error,
"third item in terminal node must be an"
" integer, found %s",
Py_TYPE(temp)->tp_name);
Py_DECREF(o);
Py_DECREF(temp);
Py_DECREF(elem);
return NULL;
}
Py_DECREF(o);
}
temp_str = PyUnicode_AsUTF8AndSize(temp, &len);
if (temp_str == NULL) {
Py_DECREF(temp);
Py_DECREF(elem);
return NULL;
}
strn = (char *)PyObject_MALLOC(len + 1);
if (strn == NULL) {
Py_DECREF(temp);
Py_DECREF(elem);
PyErr_NoMemory();
return NULL;
}
(void) memcpy(strn, temp_str, len + 1);
Py_DECREF(temp);
}
else if (!ISNONTERMINAL(type)) {
/*
* It has to be one or the other; this is an error.
* Raise an exception.
*/
PyObject *err = Py_BuildValue("Os", elem, "unknown node type.");
PyErr_SetObject(parser_error, err);
Py_XDECREF(err);
Py_DECREF(elem);
return NULL;
}
err = PyNode_AddChild(root, type, strn, *line_num, 0, *line_num, 0);
if (err == E_NOMEM) {
Py_DECREF(elem);
PyObject_FREE(strn);
PyErr_NoMemory();
return NULL;
}
if (err == E_OVERFLOW) {
Py_DECREF(elem);
PyObject_FREE(strn);
PyErr_SetString(PyExc_ValueError,
"unsupported number of child nodes");
return NULL;
}
if (ISNONTERMINAL(type)) {
node* new_child = CHILD(root, i - 1);
if (new_child != build_node_children(elem, new_child, line_num)) {
Py_DECREF(elem);
return NULL;
}
}
else if (type == NEWLINE) { /* It's true: we increment the */
++(*line_num); /* line number *after* the newline! */
}
Py_DECREF(elem);
}
return root;
}
static node*
build_node_tree(PyObject *tuple)
{
node* res = 0;
PyObject *temp = PySequence_GetItem(tuple, 0);
long num = -1;
if (temp != NULL)
num = PyLong_AsLong(temp);
Py_XDECREF(temp);
if (ISTERMINAL(num)) {
/*
* The tuple is simple, but it doesn't start with a start symbol.
* Raise an exception now and be done with it.
*/
tuple = Py_BuildValue("Os", tuple,
"Illegal syntax-tree; cannot start with terminal symbol.");
PyErr_SetObject(parser_error, tuple);
Py_XDECREF(tuple);
}
else if (ISNONTERMINAL(num)) {
/*
* Not efficient, but that can be handled later.
*/
int line_num = 0;
PyObject *encoding = NULL;
if (num == encoding_decl) {
encoding = PySequence_GetItem(tuple, 2);
if (encoding == NULL) {
PyErr_SetString(parser_error, "missed encoding");
return NULL;
}
if (!PyUnicode_Check(encoding)) {
PyErr_Format(parser_error,
"encoding must be a string, found %.200s",
Py_TYPE(encoding)->tp_name);
Py_DECREF(encoding);
return NULL;
}
/* tuple isn't borrowed anymore here, need to DECREF */
tuple = PySequence_GetSlice(tuple, 0, 2);
if (tuple == NULL) {
Py_DECREF(encoding);
return NULL;
}
}
res = PyNode_New(num);
if (res != NULL) {
if (res != build_node_children(tuple, res, &line_num)) {
PyNode_Free(res);
res = NULL;
}
if (res && encoding) {
Py_ssize_t len;
const char *temp;
temp = PyUnicode_AsUTF8AndSize(encoding, &len);
if (temp == NULL) {
PyNode_Free(res);
Py_DECREF(encoding);
Py_DECREF(tuple);
return NULL;
}
res->n_str = (char *)PyObject_MALLOC(len + 1);
if (res->n_str == NULL) {
PyNode_Free(res);
Py_DECREF(encoding);
Py_DECREF(tuple);
PyErr_NoMemory();
return NULL;
}
(void) memcpy(res->n_str, temp, len + 1);
}
}
if (encoding != NULL) {
Py_DECREF(encoding);
Py_DECREF(tuple);
}
}
else {
/* The tuple is illegal -- if the number is neither TERMINAL nor
* NONTERMINAL, we can't use it. Not sure the implementation
* allows this condition, but the API doesn't preclude it.
*/
PyObject *err = Py_BuildValue("Os", tuple,
"Illegal component tuple.");
PyErr_SetObject(parser_error, err);
Py_XDECREF(err);
}
return (res);
}
static PyObject*
pickle_constructor = NULL;
static PyObject*
parser__pickler(PyObject *self, PyObject *args)
{
NOTE(ARGUNUSED(self))
PyObject *result = NULL;
PyObject *st = NULL;
PyObject *empty_dict = NULL;
if (PyArg_ParseTuple(args, "O!:_pickler", &PyST_Type, &st)) {
PyObject *newargs;
PyObject *tuple;
if ((empty_dict = PyDict_New()) == NULL)
goto finally;
if ((newargs = Py_BuildValue("Oi", st, 1)) == NULL)
goto finally;
tuple = parser_st2tuple((PyST_Object*)NULL, newargs, empty_dict);
if (tuple != NULL) {
result = Py_BuildValue("O(O)", pickle_constructor, tuple);
Py_DECREF(tuple);
}
Py_DECREF(newargs);
}
finally:
Py_XDECREF(empty_dict);
return (result);
}
/* Functions exported by this module. Most of this should probably
* be converted into an ST object with methods, but that is better
* done directly in Python, allowing subclasses to be created directly.
* We'd really have to write a wrapper around it all anyway to allow
* inheritance.
*/
static PyMethodDef parser_functions[] = {
{"compilest", (PyCFunction)(void(*)(void))parser_compilest, PUBLIC_METHOD_TYPE,
PyDoc_STR("Compiles an ST object into a code object.")},
{"expr", (PyCFunction)(void(*)(void))parser_expr, PUBLIC_METHOD_TYPE,
PyDoc_STR("Creates an ST object from an expression.")},
{"isexpr", (PyCFunction)(void(*)(void))parser_isexpr, PUBLIC_METHOD_TYPE,
PyDoc_STR("Determines if an ST object was created from an expression.")},
{"issuite", (PyCFunction)(void(*)(void))parser_issuite, PUBLIC_METHOD_TYPE,
PyDoc_STR("Determines if an ST object was created from a suite.")},
{"suite", (PyCFunction)(void(*)(void))parser_suite, PUBLIC_METHOD_TYPE,
PyDoc_STR("Creates an ST object from a suite.")},
{"sequence2st", (PyCFunction)(void(*)(void))parser_tuple2st, PUBLIC_METHOD_TYPE,
PyDoc_STR("Creates an ST object from a tree representation.")},
{"st2tuple", (PyCFunction)(void(*)(void))parser_st2tuple, PUBLIC_METHOD_TYPE,
PyDoc_STR("Creates a tuple-tree representation of an ST.")},
{"st2list", (PyCFunction)(void(*)(void))parser_st2list, PUBLIC_METHOD_TYPE,
PyDoc_STR("Creates a list-tree representation of an ST.")},
{"tuple2st", (PyCFunction)(void(*)(void))parser_tuple2st, PUBLIC_METHOD_TYPE,
PyDoc_STR("Creates an ST object from a tree representation.")},
/* private stuff: support pickle module */
{"_pickler", (PyCFunction)parser__pickler, METH_VARARGS,
PyDoc_STR("Returns the pickle magic to allow ST objects to be pickled.")},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef parsermodule = {
PyModuleDef_HEAD_INIT,
"parser",
NULL,
-1,
parser_functions,
NULL,
NULL,
NULL,
NULL
};
PyMODINIT_FUNC PyInit_parser(void); /* supply a prototype */
PyMODINIT_FUNC
PyInit_parser(void)
{
PyObject *module, *copyreg;
if (PyType_Ready(&PyST_Type) < 0)
return NULL;
module = PyModule_Create(&parsermodule);
if (module == NULL)
return NULL;
if (parser_error == 0)
parser_error = PyErr_NewException("parser.ParserError", NULL, NULL);
if (parser_error == 0)
return NULL;
/* CAUTION: The code next used to skip bumping the refcount on
* parser_error. That's a disaster if PyInit_parser() gets called more
* than once. By incref'ing, we ensure that each module dict that
* gets created owns its reference to the shared parser_error object,
* and the file static parser_error vrbl owns a reference too.
*/
Py_INCREF(parser_error);
if (PyModule_AddObject(module, "ParserError", parser_error) != 0)
return NULL;
Py_INCREF(&PyST_Type);
PyModule_AddObject(module, "STType", (PyObject*)&PyST_Type);
PyModule_AddStringConstant(module, "__copyright__",
parser_copyright_string);
PyModule_AddStringConstant(module, "__doc__",
parser_doc_string);
PyModule_AddStringConstant(module, "__version__",
parser_version_string);
/* Register to support pickling.
* If this fails, the import of this module will fail because an
* exception will be raised here; should we clear the exception?
*/
copyreg = PyImport_ImportModuleNoBlock("copyreg");
if (copyreg != NULL) {
PyObject *func, *pickler;
_Py_IDENTIFIER(pickle);
_Py_IDENTIFIER(sequence2st);
_Py_IDENTIFIER(_pickler);
func = _PyObject_GetAttrId(copyreg, &PyId_pickle);
pickle_constructor = _PyObject_GetAttrId(module, &PyId_sequence2st);
pickler = _PyObject_GetAttrId(module, &PyId__pickler);
Py_XINCREF(pickle_constructor);
if ((func != NULL) && (pickle_constructor != NULL)
&& (pickler != NULL)) {
PyObject *res;
res = PyObject_CallFunctionObjArgs(func, &PyST_Type, pickler,
pickle_constructor, NULL);
Py_XDECREF(res);
}
Py_XDECREF(func);
Py_XDECREF(pickle_constructor);
Py_XDECREF(pickler);
Py_DECREF(copyreg);
}
return module;
}