mirror of
https://github.com/python/cpython.git
synced 2024-11-25 19:03:49 +08:00
e5362eaa75
Converting with line_info=False and col_info=True crashed before.
1198 lines
39 KiB
C
1198 lines
39 KiB
C
/* parsermodule.c
|
|
*
|
|
* Copyright 1995-1996 by Fred L. Drake, Jr. and Virginia Polytechnic
|
|
* Institute and State University, Blacksburg, Virginia, USA.
|
|
* Portions copyright 1991-1995 by Stichting Mathematisch Centrum,
|
|
* Amsterdam, The Netherlands. Copying is permitted under the terms
|
|
* associated with the main Python distribution, with the additional
|
|
* restriction that this additional notice be included and maintained
|
|
* on all distributed copies.
|
|
*
|
|
* This module serves to replace the original parser module written
|
|
* by Guido. The functionality is not matched precisely, but the
|
|
* original may be implemented on top of this. This is desirable
|
|
* since the source of the text to be parsed is now divorced from
|
|
* this interface.
|
|
*
|
|
* Unlike the prior interface, the ability to give a parse tree
|
|
* produced by Python code as a tuple to the compiler is enabled by
|
|
* this module. See the documentation for more details.
|
|
*
|
|
* I've added some annotations that help with the lint code-checking
|
|
* program, but they're not complete by a long shot. The real errors
|
|
* that lint detects are gone, but there are still warnings with
|
|
* Py_[X]DECREF() and Py_[X]INCREF() macros. The lint annotations
|
|
* look like "NOTE(...)".
|
|
*
|
|
* To debug parser errors like
|
|
* "parser.ParserError: Expected node type 12, got 333."
|
|
* decode symbol numbers using the automatically-generated files
|
|
* Lib/symbol.h and Include/token.h.
|
|
*/
|
|
|
|
#include "Python.h" /* general Python API */
|
|
#include "Python-ast.h" /* mod_ty */
|
|
#include "graminit.h" /* symbols defined in the grammar */
|
|
#include "node.h" /* internal parser structure */
|
|
#include "errcode.h" /* error codes for PyNode_*() */
|
|
#include "token.h" /* token definitions */
|
|
#include "grammar.h"
|
|
#include "parsetok.h"
|
|
/* ISTERMINAL() / ISNONTERMINAL() */
|
|
#undef Yield
|
|
#include "ast.h"
|
|
|
|
extern grammar _PyParser_Grammar; /* From graminit.c */
|
|
|
|
#ifdef lint
|
|
#include <note.h>
|
|
#else
|
|
#define NOTE(x)
|
|
#endif
|
|
|
|
/* String constants used to initialize module attributes.
|
|
*
|
|
*/
|
|
static const char parser_copyright_string[] =
|
|
"Copyright 1995-1996 by Virginia Polytechnic Institute & State\n\
|
|
University, Blacksburg, Virginia, USA, and Fred L. Drake, Jr., Reston,\n\
|
|
Virginia, USA. Portions copyright 1991-1995 by Stichting Mathematisch\n\
|
|
Centrum, Amsterdam, The Netherlands.";
|
|
|
|
|
|
PyDoc_STRVAR(parser_doc_string,
|
|
"This is an interface to Python's internal parser.");
|
|
|
|
static const char parser_version_string[] = "0.5";
|
|
|
|
|
|
typedef PyObject* (*SeqMaker) (Py_ssize_t length);
|
|
typedef int (*SeqInserter) (PyObject* sequence,
|
|
Py_ssize_t index,
|
|
PyObject* element);
|
|
|
|
/* The function below is copyrighted by Stichting Mathematisch Centrum. The
|
|
* original copyright statement is included below, and continues to apply
|
|
* in full to the function immediately following. All other material is
|
|
* original, copyrighted by Fred L. Drake, Jr. and Virginia Polytechnic
|
|
* Institute and State University. Changes were made to comply with the
|
|
* new naming conventions. Added arguments to provide support for creating
|
|
* lists as well as tuples, and optionally including the line numbers.
|
|
*/
|
|
|
|
|
|
static PyObject*
|
|
node2tuple(node *n, /* node to convert */
|
|
SeqMaker mkseq, /* create sequence */
|
|
SeqInserter addelem, /* func. to add elem. in seq. */
|
|
int lineno, /* include line numbers? */
|
|
int col_offset) /* include column offsets? */
|
|
{
|
|
PyObject *result = NULL, *w;
|
|
|
|
if (n == NULL) {
|
|
Py_RETURN_NONE;
|
|
}
|
|
|
|
if (ISNONTERMINAL(TYPE(n))) {
|
|
int i;
|
|
|
|
result = mkseq(1 + NCH(n) + (TYPE(n) == encoding_decl));
|
|
if (result == NULL)
|
|
goto error;
|
|
|
|
w = PyLong_FromLong(TYPE(n));
|
|
if (w == NULL)
|
|
goto error;
|
|
(void) addelem(result, 0, w);
|
|
|
|
for (i = 0; i < NCH(n); i++) {
|
|
w = node2tuple(CHILD(n, i), mkseq, addelem, lineno, col_offset);
|
|
if (w == NULL)
|
|
goto error;
|
|
(void) addelem(result, i+1, w);
|
|
}
|
|
|
|
if (TYPE(n) == encoding_decl) {
|
|
w = PyUnicode_FromString(STR(n));
|
|
if (w == NULL)
|
|
goto error;
|
|
(void) addelem(result, i+1, w);
|
|
}
|
|
}
|
|
else if (ISTERMINAL(TYPE(n))) {
|
|
result = mkseq(2 + lineno + col_offset);
|
|
if (result == NULL)
|
|
goto error;
|
|
|
|
w = PyLong_FromLong(TYPE(n));
|
|
if (w == NULL)
|
|
goto error;
|
|
(void) addelem(result, 0, w);
|
|
|
|
w = PyUnicode_FromString(STR(n));
|
|
if (w == NULL)
|
|
goto error;
|
|
(void) addelem(result, 1, w);
|
|
|
|
if (lineno) {
|
|
w = PyLong_FromLong(n->n_lineno);
|
|
if (w == NULL)
|
|
goto error;
|
|
(void) addelem(result, 2, w);
|
|
}
|
|
|
|
if (col_offset) {
|
|
w = PyLong_FromLong(n->n_col_offset);
|
|
if (w == NULL)
|
|
goto error;
|
|
(void) addelem(result, 2 + lineno, w);
|
|
}
|
|
}
|
|
else {
|
|
PyErr_SetString(PyExc_SystemError,
|
|
"unrecognized parse tree node type");
|
|
return ((PyObject*) NULL);
|
|
}
|
|
return result;
|
|
|
|
error:
|
|
Py_XDECREF(result);
|
|
return NULL;
|
|
}
|
|
/*
|
|
* End of material copyrighted by Stichting Mathematisch Centrum.
|
|
*/
|
|
|
|
|
|
|
|
/* There are two types of intermediate objects we're interested in:
|
|
* 'eval' and 'exec' types. These constants can be used in the st_type
|
|
* field of the object type to identify which any given object represents.
|
|
* These should probably go in an external header to allow other extensions
|
|
* to use them, but then, we really should be using C++ too. ;-)
|
|
*/
|
|
|
|
#define PyST_EXPR 1
|
|
#define PyST_SUITE 2
|
|
|
|
|
|
/* These are the internal objects and definitions required to implement the
|
|
* ST type. Most of the internal names are more reminiscent of the 'old'
|
|
* naming style, but the code uses the new naming convention.
|
|
*/
|
|
|
|
static PyObject*
|
|
parser_error = 0;
|
|
|
|
|
|
typedef struct {
|
|
PyObject_HEAD /* standard object header */
|
|
node* st_node; /* the node* returned by the parser */
|
|
int st_type; /* EXPR or SUITE ? */
|
|
PyCompilerFlags st_flags; /* Parser and compiler flags */
|
|
} PyST_Object;
|
|
|
|
|
|
static void parser_free(PyST_Object *st);
|
|
static PyObject* parser_sizeof(PyST_Object *, void *);
|
|
static PyObject* parser_richcompare(PyObject *left, PyObject *right, int op);
|
|
static PyObject* parser_compilest(PyST_Object *, PyObject *, PyObject *);
|
|
static PyObject* parser_isexpr(PyST_Object *, PyObject *, PyObject *);
|
|
static PyObject* parser_issuite(PyST_Object *, PyObject *, PyObject *);
|
|
static PyObject* parser_st2list(PyST_Object *, PyObject *, PyObject *);
|
|
static PyObject* parser_st2tuple(PyST_Object *, PyObject *, PyObject *);
|
|
|
|
#define PUBLIC_METHOD_TYPE (METH_VARARGS|METH_KEYWORDS)
|
|
|
|
static PyMethodDef parser_methods[] = {
|
|
{"compile", (PyCFunction)parser_compilest, PUBLIC_METHOD_TYPE,
|
|
PyDoc_STR("Compile this ST object into a code object.")},
|
|
{"isexpr", (PyCFunction)parser_isexpr, PUBLIC_METHOD_TYPE,
|
|
PyDoc_STR("Determines if this ST object was created from an expression.")},
|
|
{"issuite", (PyCFunction)parser_issuite, PUBLIC_METHOD_TYPE,
|
|
PyDoc_STR("Determines if this ST object was created from a suite.")},
|
|
{"tolist", (PyCFunction)parser_st2list, PUBLIC_METHOD_TYPE,
|
|
PyDoc_STR("Creates a list-tree representation of this ST.")},
|
|
{"totuple", (PyCFunction)parser_st2tuple, PUBLIC_METHOD_TYPE,
|
|
PyDoc_STR("Creates a tuple-tree representation of this ST.")},
|
|
{"__sizeof__", (PyCFunction)parser_sizeof, METH_NOARGS,
|
|
PyDoc_STR("Returns size in memory, in bytes.")},
|
|
{NULL, NULL, 0, NULL}
|
|
};
|
|
|
|
static
|
|
PyTypeObject PyST_Type = {
|
|
PyVarObject_HEAD_INIT(NULL, 0)
|
|
"parser.st", /* tp_name */
|
|
(int) sizeof(PyST_Object), /* tp_basicsize */
|
|
0, /* tp_itemsize */
|
|
(destructor)parser_free, /* tp_dealloc */
|
|
0, /* tp_print */
|
|
0, /* tp_getattr */
|
|
0, /* tp_setattr */
|
|
0, /* tp_reserved */
|
|
0, /* tp_repr */
|
|
0, /* tp_as_number */
|
|
0, /* tp_as_sequence */
|
|
0, /* tp_as_mapping */
|
|
0, /* tp_hash */
|
|
0, /* tp_call */
|
|
0, /* tp_str */
|
|
0, /* tp_getattro */
|
|
0, /* tp_setattro */
|
|
|
|
/* Functions to access object as input/output buffer */
|
|
0, /* tp_as_buffer */
|
|
|
|
Py_TPFLAGS_DEFAULT, /* tp_flags */
|
|
|
|
/* __doc__ */
|
|
"Intermediate representation of a Python parse tree.",
|
|
0, /* tp_traverse */
|
|
0, /* tp_clear */
|
|
parser_richcompare, /* tp_richcompare */
|
|
0, /* tp_weaklistoffset */
|
|
0, /* tp_iter */
|
|
0, /* tp_iternext */
|
|
parser_methods, /* tp_methods */
|
|
}; /* PyST_Type */
|
|
|
|
|
|
/* PyST_Type isn't subclassable, so just check ob_type */
|
|
#define PyST_Object_Check(v) ((v)->ob_type == &PyST_Type)
|
|
|
|
static int
|
|
parser_compare_nodes(node *left, node *right)
|
|
{
|
|
int j;
|
|
|
|
if (TYPE(left) < TYPE(right))
|
|
return (-1);
|
|
|
|
if (TYPE(right) < TYPE(left))
|
|
return (1);
|
|
|
|
if (ISTERMINAL(TYPE(left)))
|
|
return (strcmp(STR(left), STR(right)));
|
|
|
|
if (NCH(left) < NCH(right))
|
|
return (-1);
|
|
|
|
if (NCH(right) < NCH(left))
|
|
return (1);
|
|
|
|
for (j = 0; j < NCH(left); ++j) {
|
|
int v = parser_compare_nodes(CHILD(left, j), CHILD(right, j));
|
|
|
|
if (v != 0)
|
|
return (v);
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
/* parser_richcompare(PyObject* left, PyObject* right, int op)
|
|
*
|
|
* Comparison function used by the Python operators ==, !=, <, >, <=, >=
|
|
* This really just wraps a call to parser_compare_nodes() with some easy
|
|
* checks and protection code.
|
|
*
|
|
*/
|
|
|
|
static PyObject *
|
|
parser_richcompare(PyObject *left, PyObject *right, int op)
|
|
{
|
|
int result;
|
|
|
|
/* neither argument should be NULL, unless something's gone wrong */
|
|
if (left == NULL || right == NULL) {
|
|
PyErr_BadInternalCall();
|
|
return NULL;
|
|
}
|
|
|
|
/* both arguments should be instances of PyST_Object */
|
|
if (!PyST_Object_Check(left) || !PyST_Object_Check(right)) {
|
|
Py_RETURN_NOTIMPLEMENTED;
|
|
}
|
|
|
|
if (left == right)
|
|
/* if arguments are identical, they're equal */
|
|
result = 0;
|
|
else
|
|
result = parser_compare_nodes(((PyST_Object *)left)->st_node,
|
|
((PyST_Object *)right)->st_node);
|
|
|
|
Py_RETURN_RICHCOMPARE(result, 0, op);
|
|
}
|
|
|
|
/* parser_newstobject(node* st)
|
|
*
|
|
* Allocates a new Python object representing an ST. This is simply the
|
|
* 'wrapper' object that holds a node* and allows it to be passed around in
|
|
* Python code.
|
|
*
|
|
*/
|
|
static PyObject*
|
|
parser_newstobject(node *st, int type)
|
|
{
|
|
PyST_Object* o = PyObject_New(PyST_Object, &PyST_Type);
|
|
|
|
if (o != 0) {
|
|
o->st_node = st;
|
|
o->st_type = type;
|
|
o->st_flags.cf_flags = 0;
|
|
}
|
|
else {
|
|
PyNode_Free(st);
|
|
}
|
|
return ((PyObject*)o);
|
|
}
|
|
|
|
|
|
/* void parser_free(PyST_Object* st)
|
|
*
|
|
* This is called by a del statement that reduces the reference count to 0.
|
|
*
|
|
*/
|
|
static void
|
|
parser_free(PyST_Object *st)
|
|
{
|
|
PyNode_Free(st->st_node);
|
|
PyObject_Del(st);
|
|
}
|
|
|
|
static PyObject *
|
|
parser_sizeof(PyST_Object *st, void *unused)
|
|
{
|
|
Py_ssize_t res;
|
|
|
|
res = _PyObject_SIZE(Py_TYPE(st)) + _PyNode_SizeOf(st->st_node);
|
|
return PyLong_FromSsize_t(res);
|
|
}
|
|
|
|
|
|
/* parser_st2tuple(PyObject* self, PyObject* args, PyObject* kw)
|
|
*
|
|
* This provides conversion from a node* to a tuple object that can be
|
|
* returned to the Python-level caller. The ST object is not modified.
|
|
*
|
|
*/
|
|
static PyObject*
|
|
parser_st2tuple(PyST_Object *self, PyObject *args, PyObject *kw)
|
|
{
|
|
int line_info = 0;
|
|
int col_info = 0;
|
|
PyObject *res = 0;
|
|
int ok;
|
|
|
|
static char *keywords[] = {"st", "line_info", "col_info", NULL};
|
|
|
|
if (self == NULL || PyModule_Check(self)) {
|
|
ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|pp:st2tuple", keywords,
|
|
&PyST_Type, &self, &line_info,
|
|
&col_info);
|
|
}
|
|
else
|
|
ok = PyArg_ParseTupleAndKeywords(args, kw, "|pp:totuple", &keywords[1],
|
|
&line_info, &col_info);
|
|
if (ok != 0) {
|
|
/*
|
|
* Convert ST into a tuple representation. Use Guido's function,
|
|
* since it's known to work already.
|
|
*/
|
|
res = node2tuple(((PyST_Object*)self)->st_node,
|
|
PyTuple_New, PyTuple_SetItem, line_info, col_info);
|
|
}
|
|
return (res);
|
|
}
|
|
|
|
|
|
/* parser_st2list(PyObject* self, PyObject* args, PyObject* kw)
|
|
*
|
|
* This provides conversion from a node* to a list object that can be
|
|
* returned to the Python-level caller. The ST object is not modified.
|
|
*
|
|
*/
|
|
static PyObject*
|
|
parser_st2list(PyST_Object *self, PyObject *args, PyObject *kw)
|
|
{
|
|
int line_info = 0;
|
|
int col_info = 0;
|
|
PyObject *res = 0;
|
|
int ok;
|
|
|
|
static char *keywords[] = {"st", "line_info", "col_info", NULL};
|
|
|
|
if (self == NULL || PyModule_Check(self))
|
|
ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|pp:st2list", keywords,
|
|
&PyST_Type, &self, &line_info,
|
|
&col_info);
|
|
else
|
|
ok = PyArg_ParseTupleAndKeywords(args, kw, "|pp:tolist", &keywords[1],
|
|
&line_info, &col_info);
|
|
if (ok) {
|
|
/*
|
|
* Convert ST into a tuple representation. Use Guido's function,
|
|
* since it's known to work already.
|
|
*/
|
|
res = node2tuple(self->st_node,
|
|
PyList_New, PyList_SetItem, line_info, col_info);
|
|
}
|
|
return (res);
|
|
}
|
|
|
|
|
|
/* parser_compilest(PyObject* self, PyObject* args)
|
|
*
|
|
* This function creates code objects from the parse tree represented by
|
|
* the passed-in data object. An optional file name is passed in as well.
|
|
*
|
|
*/
|
|
static PyObject*
|
|
parser_compilest(PyST_Object *self, PyObject *args, PyObject *kw)
|
|
{
|
|
PyObject* res = NULL;
|
|
PyArena* arena = NULL;
|
|
mod_ty mod;
|
|
PyObject* filename = NULL;
|
|
int ok;
|
|
|
|
static char *keywords[] = {"st", "filename", NULL};
|
|
|
|
if (self == NULL || PyModule_Check(self))
|
|
ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|O&:compilest", keywords,
|
|
&PyST_Type, &self,
|
|
PyUnicode_FSDecoder, &filename);
|
|
else
|
|
ok = PyArg_ParseTupleAndKeywords(args, kw, "|O&:compile", &keywords[1],
|
|
PyUnicode_FSDecoder, &filename);
|
|
if (!ok)
|
|
goto error;
|
|
|
|
if (filename == NULL) {
|
|
filename = PyUnicode_FromString("<syntax-tree>");
|
|
if (filename == NULL)
|
|
goto error;
|
|
}
|
|
|
|
arena = PyArena_New();
|
|
if (!arena)
|
|
goto error;
|
|
|
|
mod = PyAST_FromNodeObject(self->st_node, &self->st_flags,
|
|
filename, arena);
|
|
if (!mod)
|
|
goto error;
|
|
|
|
res = (PyObject *)PyAST_CompileObject(mod, filename,
|
|
&self->st_flags, -1, arena);
|
|
error:
|
|
Py_XDECREF(filename);
|
|
if (arena != NULL)
|
|
PyArena_Free(arena);
|
|
return res;
|
|
}
|
|
|
|
|
|
/* PyObject* parser_isexpr(PyObject* self, PyObject* args)
|
|
* PyObject* parser_issuite(PyObject* self, PyObject* args)
|
|
*
|
|
* Checks the passed-in ST object to determine if it is an expression or
|
|
* a statement suite, respectively. The return is a Python truth value.
|
|
*
|
|
*/
|
|
static PyObject*
|
|
parser_isexpr(PyST_Object *self, PyObject *args, PyObject *kw)
|
|
{
|
|
PyObject* res = 0;
|
|
int ok;
|
|
|
|
static char *keywords[] = {"st", NULL};
|
|
|
|
if (self == NULL || PyModule_Check(self))
|
|
ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:isexpr", keywords,
|
|
&PyST_Type, &self);
|
|
else
|
|
ok = PyArg_ParseTupleAndKeywords(args, kw, ":isexpr", &keywords[1]);
|
|
|
|
if (ok) {
|
|
/* Check to see if the ST represents an expression or not. */
|
|
res = (self->st_type == PyST_EXPR) ? Py_True : Py_False;
|
|
Py_INCREF(res);
|
|
}
|
|
return (res);
|
|
}
|
|
|
|
|
|
static PyObject*
|
|
parser_issuite(PyST_Object *self, PyObject *args, PyObject *kw)
|
|
{
|
|
PyObject* res = 0;
|
|
int ok;
|
|
|
|
static char *keywords[] = {"st", NULL};
|
|
|
|
if (self == NULL || PyModule_Check(self))
|
|
ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:issuite", keywords,
|
|
&PyST_Type, &self);
|
|
else
|
|
ok = PyArg_ParseTupleAndKeywords(args, kw, ":issuite", &keywords[1]);
|
|
|
|
if (ok) {
|
|
/* Check to see if the ST represents an expression or not. */
|
|
res = (self->st_type == PyST_EXPR) ? Py_False : Py_True;
|
|
Py_INCREF(res);
|
|
}
|
|
return (res);
|
|
}
|
|
|
|
|
|
/* err_string(const char* message)
|
|
*
|
|
* Sets the error string for an exception of type ParserError.
|
|
*
|
|
*/
|
|
static void
|
|
err_string(const char *message)
|
|
{
|
|
PyErr_SetString(parser_error, message);
|
|
}
|
|
|
|
|
|
/* PyObject* parser_do_parse(PyObject* args, int type)
|
|
*
|
|
* Internal function to actually execute the parse and return the result if
|
|
* successful or set an exception if not.
|
|
*
|
|
*/
|
|
static PyObject*
|
|
parser_do_parse(PyObject *args, PyObject *kw, const char *argspec, int type)
|
|
{
|
|
char* string = 0;
|
|
PyObject* res = 0;
|
|
int flags = 0;
|
|
perrdetail err;
|
|
|
|
static char *keywords[] = {"source", NULL};
|
|
|
|
if (PyArg_ParseTupleAndKeywords(args, kw, argspec, keywords, &string)) {
|
|
node* n = PyParser_ParseStringFlagsFilenameEx(string, NULL,
|
|
&_PyParser_Grammar,
|
|
(type == PyST_EXPR)
|
|
? eval_input : file_input,
|
|
&err, &flags);
|
|
|
|
if (n) {
|
|
res = parser_newstobject(n, type);
|
|
if (res)
|
|
((PyST_Object *)res)->st_flags.cf_flags = flags & PyCF_MASK;
|
|
}
|
|
else {
|
|
PyParser_SetError(&err);
|
|
}
|
|
PyParser_ClearError(&err);
|
|
}
|
|
return (res);
|
|
}
|
|
|
|
|
|
/* PyObject* parser_expr(PyObject* self, PyObject* args)
|
|
* PyObject* parser_suite(PyObject* self, PyObject* args)
|
|
*
|
|
* External interfaces to the parser itself. Which is called determines if
|
|
* the parser attempts to recognize an expression ('eval' form) or statement
|
|
* suite ('exec' form). The real work is done by parser_do_parse() above.
|
|
*
|
|
*/
|
|
static PyObject*
|
|
parser_expr(PyST_Object *self, PyObject *args, PyObject *kw)
|
|
{
|
|
NOTE(ARGUNUSED(self))
|
|
return (parser_do_parse(args, kw, "s:expr", PyST_EXPR));
|
|
}
|
|
|
|
|
|
static PyObject*
|
|
parser_suite(PyST_Object *self, PyObject *args, PyObject *kw)
|
|
{
|
|
NOTE(ARGUNUSED(self))
|
|
return (parser_do_parse(args, kw, "s:suite", PyST_SUITE));
|
|
}
|
|
|
|
|
|
|
|
/* This is the messy part of the code. Conversion from a tuple to an ST
|
|
* object requires that the input tuple be valid without having to rely on
|
|
* catching an exception from the compiler. This is done to allow the
|
|
* compiler itself to remain fast, since most of its input will come from
|
|
* the parser directly, and therefore be known to be syntactically correct.
|
|
* This validation is done to ensure that we don't core dump the compile
|
|
* phase, returning an exception instead.
|
|
*
|
|
* Two aspects can be broken out in this code: creating a node tree from
|
|
* the tuple passed in, and verifying that it is indeed valid. It may be
|
|
* advantageous to expand the number of ST types to include funcdefs and
|
|
* lambdadefs to take advantage of the optimizer, recognizing those STs
|
|
* here. They are not necessary, and not quite as useful in a raw form.
|
|
* For now, let's get expressions and suites working reliably.
|
|
*/
|
|
|
|
|
|
static node* build_node_tree(PyObject *tuple);
|
|
|
|
static int
|
|
validate_node(node *tree)
|
|
{
|
|
int type = TYPE(tree);
|
|
int nch = NCH(tree);
|
|
dfa *nt_dfa;
|
|
state *dfa_state;
|
|
int pos, arc;
|
|
|
|
assert(ISNONTERMINAL(type));
|
|
type -= NT_OFFSET;
|
|
if (type >= _PyParser_Grammar.g_ndfas) {
|
|
PyErr_Format(parser_error, "Unrecognized node type %d.", TYPE(tree));
|
|
return 0;
|
|
}
|
|
nt_dfa = &_PyParser_Grammar.g_dfa[type];
|
|
REQ(tree, nt_dfa->d_type);
|
|
|
|
/* Run the DFA for this nonterminal. */
|
|
dfa_state = &nt_dfa->d_state[nt_dfa->d_initial];
|
|
for (pos = 0; pos < nch; ++pos) {
|
|
node *ch = CHILD(tree, pos);
|
|
int ch_type = TYPE(ch);
|
|
for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
|
|
short a_label = dfa_state->s_arc[arc].a_lbl;
|
|
assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels);
|
|
if (_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type) {
|
|
/* The child is acceptable; if non-terminal, validate it recursively. */
|
|
if (ISNONTERMINAL(ch_type) && !validate_node(ch))
|
|
return 0;
|
|
|
|
/* Update the state, and move on to the next child. */
|
|
dfa_state = &nt_dfa->d_state[dfa_state->s_arc[arc].a_arrow];
|
|
goto arc_found;
|
|
}
|
|
}
|
|
/* What would this state have accepted? */
|
|
{
|
|
short a_label = dfa_state->s_arc->a_lbl;
|
|
int next_type;
|
|
if (!a_label) /* Wouldn't accept any more children */
|
|
goto illegal_num_children;
|
|
|
|
next_type = _PyParser_Grammar.g_ll.ll_label[a_label].lb_type;
|
|
if (ISNONTERMINAL(next_type))
|
|
PyErr_Format(parser_error, "Expected node type %d, got %d.",
|
|
next_type, ch_type);
|
|
else
|
|
PyErr_Format(parser_error, "Illegal terminal: expected %s.",
|
|
_PyParser_TokenNames[next_type]);
|
|
return 0;
|
|
}
|
|
|
|
arc_found:
|
|
continue;
|
|
}
|
|
/* Are we in a final state? If so, return 1 for successful validation. */
|
|
for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
|
|
if (!dfa_state->s_arc[arc].a_lbl) {
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
illegal_num_children:
|
|
PyErr_Format(parser_error,
|
|
"Illegal number of children for %s node.", nt_dfa->d_name);
|
|
return 0;
|
|
}
|
|
|
|
/* PyObject* parser_tuple2st(PyObject* self, PyObject* args)
|
|
*
|
|
* This is the public function, called from the Python code. It receives a
|
|
* single tuple object from the caller, and creates an ST object if the
|
|
* tuple can be validated. It does this by checking the first code of the
|
|
* tuple, and, if acceptable, builds the internal representation. If this
|
|
* step succeeds, the internal representation is validated as fully as
|
|
* possible with the recursive validate_node() routine defined above.
|
|
*
|
|
* This function must be changed if support is to be added for PyST_FRAGMENT
|
|
* ST objects.
|
|
*
|
|
*/
|
|
static PyObject*
|
|
parser_tuple2st(PyST_Object *self, PyObject *args, PyObject *kw)
|
|
{
|
|
NOTE(ARGUNUSED(self))
|
|
PyObject *st = 0;
|
|
PyObject *tuple;
|
|
node *tree;
|
|
|
|
static char *keywords[] = {"sequence", NULL};
|
|
|
|
if (!PyArg_ParseTupleAndKeywords(args, kw, "O:sequence2st", keywords,
|
|
&tuple))
|
|
return (0);
|
|
if (!PySequence_Check(tuple)) {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"sequence2st() requires a single sequence argument");
|
|
return (0);
|
|
}
|
|
/*
|
|
* Convert the tree to the internal form before checking it.
|
|
*/
|
|
tree = build_node_tree(tuple);
|
|
if (tree != 0) {
|
|
node *validation_root = NULL;
|
|
int tree_type = 0;
|
|
switch (TYPE(tree)) {
|
|
case eval_input:
|
|
/* Might be an eval form. */
|
|
tree_type = PyST_EXPR;
|
|
validation_root = tree;
|
|
break;
|
|
case encoding_decl:
|
|
/* This looks like an encoding_decl so far. */
|
|
if (NCH(tree) == 1) {
|
|
tree_type = PyST_SUITE;
|
|
validation_root = CHILD(tree, 0);
|
|
}
|
|
else {
|
|
err_string("Error Parsing encoding_decl");
|
|
}
|
|
break;
|
|
case file_input:
|
|
/* This looks like an exec form so far. */
|
|
tree_type = PyST_SUITE;
|
|
validation_root = tree;
|
|
break;
|
|
default:
|
|
/* This is a fragment, at best. */
|
|
err_string("parse tree does not use a valid start symbol");
|
|
}
|
|
|
|
if (validation_root != NULL && validate_node(validation_root))
|
|
st = parser_newstobject(tree, tree_type);
|
|
else
|
|
PyNode_Free(tree);
|
|
}
|
|
/* Make sure we raise an exception on all errors. We should never
|
|
* get this, but we'd do well to be sure something is done.
|
|
*/
|
|
if (st == NULL && !PyErr_Occurred())
|
|
err_string("unspecified ST error occurred");
|
|
|
|
return st;
|
|
}
|
|
|
|
|
|
/* node* build_node_children()
|
|
*
|
|
* Iterate across the children of the current non-terminal node and build
|
|
* their structures. If successful, return the root of this portion of
|
|
* the tree, otherwise, 0. Any required exception will be specified already,
|
|
* and no memory will have been deallocated.
|
|
*
|
|
*/
|
|
static node*
|
|
build_node_children(PyObject *tuple, node *root, int *line_num)
|
|
{
|
|
Py_ssize_t len = PyObject_Size(tuple);
|
|
Py_ssize_t i;
|
|
int err;
|
|
|
|
if (len < 0) {
|
|
return NULL;
|
|
}
|
|
for (i = 1; i < len; ++i) {
|
|
/* elem must always be a sequence, however simple */
|
|
PyObject* elem = PySequence_GetItem(tuple, i);
|
|
int ok = elem != NULL;
|
|
int type = 0;
|
|
char *strn = 0;
|
|
|
|
if (ok)
|
|
ok = PySequence_Check(elem);
|
|
if (ok) {
|
|
PyObject *temp = PySequence_GetItem(elem, 0);
|
|
if (temp == NULL)
|
|
ok = 0;
|
|
else {
|
|
ok = PyLong_Check(temp);
|
|
if (ok) {
|
|
type = _PyLong_AsInt(temp);
|
|
if (type == -1 && PyErr_Occurred()) {
|
|
Py_DECREF(temp);
|
|
Py_DECREF(elem);
|
|
return NULL;
|
|
}
|
|
}
|
|
Py_DECREF(temp);
|
|
}
|
|
}
|
|
if (!ok) {
|
|
PyObject *err = Py_BuildValue("Os", elem,
|
|
"Illegal node construct.");
|
|
PyErr_SetObject(parser_error, err);
|
|
Py_XDECREF(err);
|
|
Py_XDECREF(elem);
|
|
return NULL;
|
|
}
|
|
if (ISTERMINAL(type)) {
|
|
Py_ssize_t len = PyObject_Size(elem);
|
|
PyObject *temp;
|
|
const char *temp_str;
|
|
|
|
if ((len != 2) && (len != 3)) {
|
|
err_string("terminal nodes must have 2 or 3 entries");
|
|
Py_DECREF(elem);
|
|
return NULL;
|
|
}
|
|
temp = PySequence_GetItem(elem, 1);
|
|
if (temp == NULL) {
|
|
Py_DECREF(elem);
|
|
return NULL;
|
|
}
|
|
if (!PyUnicode_Check(temp)) {
|
|
PyErr_Format(parser_error,
|
|
"second item in terminal node must be a string,"
|
|
" found %s",
|
|
Py_TYPE(temp)->tp_name);
|
|
Py_DECREF(temp);
|
|
Py_DECREF(elem);
|
|
return NULL;
|
|
}
|
|
if (len == 3) {
|
|
PyObject *o = PySequence_GetItem(elem, 2);
|
|
if (o == NULL) {
|
|
Py_DECREF(temp);
|
|
Py_DECREF(elem);
|
|
return NULL;
|
|
}
|
|
if (PyLong_Check(o)) {
|
|
int num = _PyLong_AsInt(o);
|
|
if (num == -1 && PyErr_Occurred()) {
|
|
Py_DECREF(o);
|
|
Py_DECREF(temp);
|
|
Py_DECREF(elem);
|
|
return NULL;
|
|
}
|
|
*line_num = num;
|
|
}
|
|
else {
|
|
PyErr_Format(parser_error,
|
|
"third item in terminal node must be an"
|
|
" integer, found %s",
|
|
Py_TYPE(temp)->tp_name);
|
|
Py_DECREF(o);
|
|
Py_DECREF(temp);
|
|
Py_DECREF(elem);
|
|
return NULL;
|
|
}
|
|
Py_DECREF(o);
|
|
}
|
|
temp_str = PyUnicode_AsUTF8AndSize(temp, &len);
|
|
if (temp_str == NULL) {
|
|
Py_DECREF(temp);
|
|
Py_DECREF(elem);
|
|
return NULL;
|
|
}
|
|
strn = (char *)PyObject_MALLOC(len + 1);
|
|
if (strn == NULL) {
|
|
Py_DECREF(temp);
|
|
Py_DECREF(elem);
|
|
PyErr_NoMemory();
|
|
return NULL;
|
|
}
|
|
(void) memcpy(strn, temp_str, len + 1);
|
|
Py_DECREF(temp);
|
|
}
|
|
else if (!ISNONTERMINAL(type)) {
|
|
/*
|
|
* It has to be one or the other; this is an error.
|
|
* Raise an exception.
|
|
*/
|
|
PyObject *err = Py_BuildValue("Os", elem, "unknown node type.");
|
|
PyErr_SetObject(parser_error, err);
|
|
Py_XDECREF(err);
|
|
Py_DECREF(elem);
|
|
return NULL;
|
|
}
|
|
err = PyNode_AddChild(root, type, strn, *line_num, 0);
|
|
if (err == E_NOMEM) {
|
|
Py_DECREF(elem);
|
|
PyObject_FREE(strn);
|
|
PyErr_NoMemory();
|
|
return NULL;
|
|
}
|
|
if (err == E_OVERFLOW) {
|
|
Py_DECREF(elem);
|
|
PyObject_FREE(strn);
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"unsupported number of child nodes");
|
|
return NULL;
|
|
}
|
|
|
|
if (ISNONTERMINAL(type)) {
|
|
node* new_child = CHILD(root, i - 1);
|
|
|
|
if (new_child != build_node_children(elem, new_child, line_num)) {
|
|
Py_DECREF(elem);
|
|
return NULL;
|
|
}
|
|
}
|
|
else if (type == NEWLINE) { /* It's true: we increment the */
|
|
++(*line_num); /* line number *after* the newline! */
|
|
}
|
|
Py_DECREF(elem);
|
|
}
|
|
return root;
|
|
}
|
|
|
|
|
|
static node*
|
|
build_node_tree(PyObject *tuple)
|
|
{
|
|
node* res = 0;
|
|
PyObject *temp = PySequence_GetItem(tuple, 0);
|
|
long num = -1;
|
|
|
|
if (temp != NULL)
|
|
num = PyLong_AsLong(temp);
|
|
Py_XDECREF(temp);
|
|
if (ISTERMINAL(num)) {
|
|
/*
|
|
* The tuple is simple, but it doesn't start with a start symbol.
|
|
* Raise an exception now and be done with it.
|
|
*/
|
|
tuple = Py_BuildValue("Os", tuple,
|
|
"Illegal syntax-tree; cannot start with terminal symbol.");
|
|
PyErr_SetObject(parser_error, tuple);
|
|
Py_XDECREF(tuple);
|
|
}
|
|
else if (ISNONTERMINAL(num)) {
|
|
/*
|
|
* Not efficient, but that can be handled later.
|
|
*/
|
|
int line_num = 0;
|
|
PyObject *encoding = NULL;
|
|
|
|
if (num == encoding_decl) {
|
|
encoding = PySequence_GetItem(tuple, 2);
|
|
if (encoding == NULL) {
|
|
PyErr_SetString(parser_error, "missed encoding");
|
|
return NULL;
|
|
}
|
|
if (!PyUnicode_Check(encoding)) {
|
|
PyErr_Format(parser_error,
|
|
"encoding must be a string, found %.200s",
|
|
Py_TYPE(encoding)->tp_name);
|
|
Py_DECREF(encoding);
|
|
return NULL;
|
|
}
|
|
/* tuple isn't borrowed anymore here, need to DECREF */
|
|
tuple = PySequence_GetSlice(tuple, 0, 2);
|
|
if (tuple == NULL) {
|
|
Py_DECREF(encoding);
|
|
return NULL;
|
|
}
|
|
}
|
|
res = PyNode_New(num);
|
|
if (res != NULL) {
|
|
if (res != build_node_children(tuple, res, &line_num)) {
|
|
PyNode_Free(res);
|
|
res = NULL;
|
|
}
|
|
if (res && encoding) {
|
|
Py_ssize_t len;
|
|
const char *temp;
|
|
temp = PyUnicode_AsUTF8AndSize(encoding, &len);
|
|
if (temp == NULL) {
|
|
PyNode_Free(res);
|
|
Py_DECREF(encoding);
|
|
Py_DECREF(tuple);
|
|
return NULL;
|
|
}
|
|
res->n_str = (char *)PyObject_MALLOC(len + 1);
|
|
if (res->n_str == NULL) {
|
|
PyNode_Free(res);
|
|
Py_DECREF(encoding);
|
|
Py_DECREF(tuple);
|
|
PyErr_NoMemory();
|
|
return NULL;
|
|
}
|
|
(void) memcpy(res->n_str, temp, len + 1);
|
|
}
|
|
}
|
|
if (encoding != NULL) {
|
|
Py_DECREF(encoding);
|
|
Py_DECREF(tuple);
|
|
}
|
|
}
|
|
else {
|
|
/* The tuple is illegal -- if the number is neither TERMINAL nor
|
|
* NONTERMINAL, we can't use it. Not sure the implementation
|
|
* allows this condition, but the API doesn't preclude it.
|
|
*/
|
|
PyObject *err = Py_BuildValue("Os", tuple,
|
|
"Illegal component tuple.");
|
|
PyErr_SetObject(parser_error, err);
|
|
Py_XDECREF(err);
|
|
}
|
|
|
|
return (res);
|
|
}
|
|
|
|
|
|
static PyObject*
|
|
pickle_constructor = NULL;
|
|
|
|
|
|
static PyObject*
|
|
parser__pickler(PyObject *self, PyObject *args)
|
|
{
|
|
NOTE(ARGUNUSED(self))
|
|
PyObject *result = NULL;
|
|
PyObject *st = NULL;
|
|
PyObject *empty_dict = NULL;
|
|
|
|
if (PyArg_ParseTuple(args, "O!:_pickler", &PyST_Type, &st)) {
|
|
PyObject *newargs;
|
|
PyObject *tuple;
|
|
|
|
if ((empty_dict = PyDict_New()) == NULL)
|
|
goto finally;
|
|
if ((newargs = Py_BuildValue("Oi", st, 1)) == NULL)
|
|
goto finally;
|
|
tuple = parser_st2tuple((PyST_Object*)NULL, newargs, empty_dict);
|
|
if (tuple != NULL) {
|
|
result = Py_BuildValue("O(O)", pickle_constructor, tuple);
|
|
Py_DECREF(tuple);
|
|
}
|
|
Py_DECREF(newargs);
|
|
}
|
|
finally:
|
|
Py_XDECREF(empty_dict);
|
|
|
|
return (result);
|
|
}
|
|
|
|
|
|
/* Functions exported by this module. Most of this should probably
|
|
* be converted into an ST object with methods, but that is better
|
|
* done directly in Python, allowing subclasses to be created directly.
|
|
* We'd really have to write a wrapper around it all anyway to allow
|
|
* inheritance.
|
|
*/
|
|
static PyMethodDef parser_functions[] = {
|
|
{"compilest", (PyCFunction)parser_compilest, PUBLIC_METHOD_TYPE,
|
|
PyDoc_STR("Compiles an ST object into a code object.")},
|
|
{"expr", (PyCFunction)parser_expr, PUBLIC_METHOD_TYPE,
|
|
PyDoc_STR("Creates an ST object from an expression.")},
|
|
{"isexpr", (PyCFunction)parser_isexpr, PUBLIC_METHOD_TYPE,
|
|
PyDoc_STR("Determines if an ST object was created from an expression.")},
|
|
{"issuite", (PyCFunction)parser_issuite, PUBLIC_METHOD_TYPE,
|
|
PyDoc_STR("Determines if an ST object was created from a suite.")},
|
|
{"suite", (PyCFunction)parser_suite, PUBLIC_METHOD_TYPE,
|
|
PyDoc_STR("Creates an ST object from a suite.")},
|
|
{"sequence2st", (PyCFunction)parser_tuple2st, PUBLIC_METHOD_TYPE,
|
|
PyDoc_STR("Creates an ST object from a tree representation.")},
|
|
{"st2tuple", (PyCFunction)parser_st2tuple, PUBLIC_METHOD_TYPE,
|
|
PyDoc_STR("Creates a tuple-tree representation of an ST.")},
|
|
{"st2list", (PyCFunction)parser_st2list, PUBLIC_METHOD_TYPE,
|
|
PyDoc_STR("Creates a list-tree representation of an ST.")},
|
|
{"tuple2st", (PyCFunction)parser_tuple2st, PUBLIC_METHOD_TYPE,
|
|
PyDoc_STR("Creates an ST object from a tree representation.")},
|
|
|
|
/* private stuff: support pickle module */
|
|
{"_pickler", (PyCFunction)parser__pickler, METH_VARARGS,
|
|
PyDoc_STR("Returns the pickle magic to allow ST objects to be pickled.")},
|
|
|
|
{NULL, NULL, 0, NULL}
|
|
};
|
|
|
|
|
|
|
|
static struct PyModuleDef parsermodule = {
|
|
PyModuleDef_HEAD_INIT,
|
|
"parser",
|
|
NULL,
|
|
-1,
|
|
parser_functions,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
NULL
|
|
};
|
|
|
|
PyMODINIT_FUNC PyInit_parser(void); /* supply a prototype */
|
|
|
|
PyMODINIT_FUNC
|
|
PyInit_parser(void)
|
|
{
|
|
PyObject *module, *copyreg;
|
|
|
|
if (PyType_Ready(&PyST_Type) < 0)
|
|
return NULL;
|
|
module = PyModule_Create(&parsermodule);
|
|
if (module == NULL)
|
|
return NULL;
|
|
|
|
if (parser_error == 0)
|
|
parser_error = PyErr_NewException("parser.ParserError", NULL, NULL);
|
|
|
|
if (parser_error == 0)
|
|
return NULL;
|
|
/* CAUTION: The code next used to skip bumping the refcount on
|
|
* parser_error. That's a disaster if PyInit_parser() gets called more
|
|
* than once. By incref'ing, we ensure that each module dict that
|
|
* gets created owns its reference to the shared parser_error object,
|
|
* and the file static parser_error vrbl owns a reference too.
|
|
*/
|
|
Py_INCREF(parser_error);
|
|
if (PyModule_AddObject(module, "ParserError", parser_error) != 0)
|
|
return NULL;
|
|
|
|
Py_INCREF(&PyST_Type);
|
|
PyModule_AddObject(module, "STType", (PyObject*)&PyST_Type);
|
|
|
|
PyModule_AddStringConstant(module, "__copyright__",
|
|
parser_copyright_string);
|
|
PyModule_AddStringConstant(module, "__doc__",
|
|
parser_doc_string);
|
|
PyModule_AddStringConstant(module, "__version__",
|
|
parser_version_string);
|
|
|
|
/* Register to support pickling.
|
|
* If this fails, the import of this module will fail because an
|
|
* exception will be raised here; should we clear the exception?
|
|
*/
|
|
copyreg = PyImport_ImportModuleNoBlock("copyreg");
|
|
if (copyreg != NULL) {
|
|
PyObject *func, *pickler;
|
|
_Py_IDENTIFIER(pickle);
|
|
_Py_IDENTIFIER(sequence2st);
|
|
_Py_IDENTIFIER(_pickler);
|
|
|
|
func = _PyObject_GetAttrId(copyreg, &PyId_pickle);
|
|
pickle_constructor = _PyObject_GetAttrId(module, &PyId_sequence2st);
|
|
pickler = _PyObject_GetAttrId(module, &PyId__pickler);
|
|
Py_XINCREF(pickle_constructor);
|
|
if ((func != NULL) && (pickle_constructor != NULL)
|
|
&& (pickler != NULL)) {
|
|
PyObject *res;
|
|
|
|
res = PyObject_CallFunctionObjArgs(func, &PyST_Type, pickler,
|
|
pickle_constructor, NULL);
|
|
Py_XDECREF(res);
|
|
}
|
|
Py_XDECREF(func);
|
|
Py_XDECREF(pickle_constructor);
|
|
Py_XDECREF(pickler);
|
|
Py_DECREF(copyreg);
|
|
}
|
|
return module;
|
|
}
|