cpython/Python/Python-ast.c

8939 lines
281 KiB
C
Raw Normal View History

Merged revisions 53623-53858 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r53624 | peter.astrand | 2007-02-02 20:06:36 +0100 (Fri, 02 Feb 2007) | 1 line We had several if statements checking the value of a fd. This is unsafe, since valid fds might be zero. We should check for not None instead. ........ r53635 | kurt.kaiser | 2007-02-05 07:03:18 +0100 (Mon, 05 Feb 2007) | 2 lines Add 'raw' support to configHandler. Patch 1650174 Tal Einat. ........ r53641 | kurt.kaiser | 2007-02-06 00:02:16 +0100 (Tue, 06 Feb 2007) | 5 lines 1. Calltips now 'handle' tuples in the argument list (display '<tuple>' :) Suggested solution by Christos Georgiou, Bug 791968. 2. Clean up tests, were not failing when they should have been. 4. Remove some camelcase and an unneeded try/except block. ........ r53644 | kurt.kaiser | 2007-02-06 04:21:40 +0100 (Tue, 06 Feb 2007) | 2 lines Clean up ModifiedInterpreter.runcode() structure ........ r53646 | peter.astrand | 2007-02-06 16:37:50 +0100 (Tue, 06 Feb 2007) | 1 line Applied patch 1124861.3.patch to solve bug #1124861: Automatically create pipes on Windows, if GetStdHandle fails. Will backport. ........ r53648 | lars.gustaebel | 2007-02-06 19:38:13 +0100 (Tue, 06 Feb 2007) | 4 lines Patch #1652681: create nonexistent files in append mode and allow appending to empty files. ........ r53649 | kurt.kaiser | 2007-02-06 20:09:43 +0100 (Tue, 06 Feb 2007) | 4 lines Updated patch (CodeContext.061217.patch) to [ 1362975 ] CodeContext - Improved text indentation Tal Einat 16Dec06 ........ r53650 | kurt.kaiser | 2007-02-06 20:21:19 +0100 (Tue, 06 Feb 2007) | 2 lines narrow exception per [ 1540849 ] except too broad ........ r53653 | kurt.kaiser | 2007-02-07 04:39:41 +0100 (Wed, 07 Feb 2007) | 4 lines [ 1621265 ] Auto-completion list placement Move AC window below input line unless not enough space, then put it above. Patch: Tal Einat ........ r53654 | kurt.kaiser | 2007-02-07 09:07:13 +0100 (Wed, 07 Feb 2007) | 2 lines Handle AttributeError during calltip lookup ........ r53656 | raymond.hettinger | 2007-02-07 21:08:22 +0100 (Wed, 07 Feb 2007) | 3 lines SF #1615701: make d.update(m) honor __getitem__() and keys() in dict subclasses ........ r53658 | raymond.hettinger | 2007-02-07 22:04:20 +0100 (Wed, 07 Feb 2007) | 1 line SF: 1397711 Set docs conflated immutable and hashable ........ r53660 | raymond.hettinger | 2007-02-07 22:42:17 +0100 (Wed, 07 Feb 2007) | 1 line Check for a common user error with defaultdict(). ........ r53662 | raymond.hettinger | 2007-02-07 23:24:07 +0100 (Wed, 07 Feb 2007) | 1 line Bug #1575169: operator.isSequenceType() now returns False for subclasses of dict. ........ r53664 | raymond.hettinger | 2007-02-08 00:49:03 +0100 (Thu, 08 Feb 2007) | 1 line Silence compiler warning ........ r53666 | raymond.hettinger | 2007-02-08 01:07:32 +0100 (Thu, 08 Feb 2007) | 1 line Do not let overflows in enumerate() and count() pass silently. ........ r53668 | raymond.hettinger | 2007-02-08 01:50:39 +0100 (Thu, 08 Feb 2007) | 1 line Bypass set specific optimizations for set and frozenset subclasses. ........ r53670 | raymond.hettinger | 2007-02-08 02:42:35 +0100 (Thu, 08 Feb 2007) | 1 line Fix docstring bug ........ r53671 | martin.v.loewis | 2007-02-08 10:13:36 +0100 (Thu, 08 Feb 2007) | 3 lines Bug #1653736: Complain about keyword arguments to time.isoformat. Will backport to 2.5. ........ r53679 | kurt.kaiser | 2007-02-08 23:58:18 +0100 (Thu, 08 Feb 2007) | 6 lines Corrected some bugs in AutoComplete. Also, Page Up/Down in ACW implemented; mouse and cursor selection in ACWindow implemented; double Tab inserts current selection and closes ACW (similar to double-click and Return); scroll wheel now works in ACW. Added AutoComplete instructions to IDLE Help. ........ r53689 | martin.v.loewis | 2007-02-09 13:19:32 +0100 (Fri, 09 Feb 2007) | 3 lines Bug #1653736: Properly discard third argument to slot_nb_inplace_power. Will backport. ........ r53691 | martin.v.loewis | 2007-02-09 13:36:48 +0100 (Fri, 09 Feb 2007) | 4 lines Bug #1600860: Search for shared python library in LIBDIR, not lib/python/config, on "linux" and "gnu" systems. Will backport. ........ r53693 | martin.v.loewis | 2007-02-09 13:58:49 +0100 (Fri, 09 Feb 2007) | 2 lines Update broken link. Will backport to 2.5. ........ r53697 | georg.brandl | 2007-02-09 19:48:41 +0100 (Fri, 09 Feb 2007) | 2 lines Bug #1656078: typo in in profile docs. ........ r53731 | brett.cannon | 2007-02-11 06:36:00 +0100 (Sun, 11 Feb 2007) | 3 lines Change a very minor inconsistency (that is purely cosmetic) in the AST definition. ........ r53735 | skip.montanaro | 2007-02-11 19:24:37 +0100 (Sun, 11 Feb 2007) | 1 line fix trace.py --ignore-dir ........ r53741 | brett.cannon | 2007-02-11 20:44:41 +0100 (Sun, 11 Feb 2007) | 3 lines Check in changed Python-ast.c from a cosmetic change to Python.asdl (in r53731). ........ r53751 | brett.cannon | 2007-02-12 04:51:02 +0100 (Mon, 12 Feb 2007) | 5 lines Modify Parser/asdl_c.py so that the __version__ number for Python/Python-ast.c is specified at the top of the file. Also add a note that Python/Python-ast.c needs to be committed separately after a change to the AST grammar to capture the revision number of the change (which is what __version__ is set to). ........ r53752 | lars.gustaebel | 2007-02-12 10:25:53 +0100 (Mon, 12 Feb 2007) | 3 lines Bug #1656581: Point out that external file objects are supposed to be at position 0. ........ r53754 | martin.v.loewis | 2007-02-12 13:21:10 +0100 (Mon, 12 Feb 2007) | 3 lines Patch 1463026: Support default namespace in XMLGenerator. Fixes #847665. Will backport. ........ r53757 | armin.rigo | 2007-02-12 17:23:24 +0100 (Mon, 12 Feb 2007) | 4 lines Fix the line to what is my guess at the original author's meaning. (The line has no effect anyway, but is present because it's customary call the base class __init__). ........ r53763 | martin.v.loewis | 2007-02-13 09:34:45 +0100 (Tue, 13 Feb 2007) | 3 lines Patch #685268: Consider a package's __path__ in imputil. Will backport. ........ r53765 | martin.v.loewis | 2007-02-13 10:49:38 +0100 (Tue, 13 Feb 2007) | 2 lines Patch #698833: Support file decryption in zipfile. ........ r53766 | martin.v.loewis | 2007-02-13 11:10:39 +0100 (Tue, 13 Feb 2007) | 3 lines Patch #1517891: Make 'a' create the file if it doesn't exist. Fixes #1514451. ........ r53767 | martin.v.loewis | 2007-02-13 13:08:24 +0100 (Tue, 13 Feb 2007) | 3 lines Bug #1658794: Remove extraneous 'this'. Will backport to 2.5. ........ r53769 | martin.v.loewis | 2007-02-13 13:14:19 +0100 (Tue, 13 Feb 2007) | 3 lines Patch #1657276: Make NETLINK_DNRTMSG conditional. Will backport. ........ r53771 | lars.gustaebel | 2007-02-13 17:09:24 +0100 (Tue, 13 Feb 2007) | 4 lines Patch #1647484: Renamed GzipFile's filename attribute to name. The filename attribute is still accessible as a property that emits a DeprecationWarning. ........ r53772 | lars.gustaebel | 2007-02-13 17:24:00 +0100 (Tue, 13 Feb 2007) | 3 lines Strip the '.gz' extension from the filename that is written to the gzip header. ........ r53774 | martin.v.loewis | 2007-02-14 11:07:37 +0100 (Wed, 14 Feb 2007) | 2 lines Patch #1432399: Add HCI sockets. ........ r53775 | martin.v.loewis | 2007-02-14 12:30:07 +0100 (Wed, 14 Feb 2007) | 2 lines Update 1432399 to removal of _BT_SOCKADDR_MEMB. ........ r53776 | martin.v.loewis | 2007-02-14 12:30:56 +0100 (Wed, 14 Feb 2007) | 3 lines Ignore directory time stamps when considering whether to rerun libffi configure. ........ r53778 | lars.gustaebel | 2007-02-14 15:45:12 +0100 (Wed, 14 Feb 2007) | 4 lines A missing binary mode in AppendTest caused failures in Windows Buildbot. ........ r53782 | martin.v.loewis | 2007-02-15 10:51:35 +0100 (Thu, 15 Feb 2007) | 2 lines Patch #1397848: add the reasoning behind no-resize-on-shrinkage. ........ r53783 | georg.brandl | 2007-02-15 11:37:59 +0100 (Thu, 15 Feb 2007) | 2 lines Make functools.wraps() docs a bit clearer. ........ r53785 | georg.brandl | 2007-02-15 12:29:04 +0100 (Thu, 15 Feb 2007) | 2 lines Patch #1494140: Add documentation for the new struct.Struct object. ........ r53787 | georg.brandl | 2007-02-15 12:29:55 +0100 (Thu, 15 Feb 2007) | 2 lines Add missing \versionadded. ........ r53800 | brett.cannon | 2007-02-15 23:54:39 +0100 (Thu, 15 Feb 2007) | 11 lines Update the encoding package's search function to use absolute imports when calling __import__. This helps make the expected search locations for encoding modules be more explicit. One could use an explicit value for __path__ when making the call to __import__ to force the exact location searched for encodings. This would give the most strict search path possible if one is worried about malicious code being imported. The unfortunate side-effect of that is that if __path__ was modified on 'encodings' on purpose in a safe way it would not be picked up in future __import__ calls. ........ r53801 | brett.cannon | 2007-02-16 20:33:01 +0100 (Fri, 16 Feb 2007) | 2 lines Make the __import__ call in encodings.__init__ absolute with a level 0 call. ........ r53809 | vinay.sajip | 2007-02-16 23:36:24 +0100 (Fri, 16 Feb 2007) | 1 line Minor fix for currentframe (SF #1652788). ........ r53818 | raymond.hettinger | 2007-02-19 03:03:19 +0100 (Mon, 19 Feb 2007) | 3 lines Extend work on revision 52962: Eliminate redundant calls to PyObject_Hash(). ........ r53820 | raymond.hettinger | 2007-02-19 05:08:43 +0100 (Mon, 19 Feb 2007) | 1 line Add merge() function to heapq. ........ r53821 | raymond.hettinger | 2007-02-19 06:28:28 +0100 (Mon, 19 Feb 2007) | 1 line Add tie-breaker count to preserve sort stability. ........ r53822 | raymond.hettinger | 2007-02-19 07:59:32 +0100 (Mon, 19 Feb 2007) | 1 line Use C heapreplace() instead of slower _siftup() in pure python. ........ r53823 | raymond.hettinger | 2007-02-19 08:30:21 +0100 (Mon, 19 Feb 2007) | 1 line Add test for merge stability ........ r53824 | raymond.hettinger | 2007-02-19 10:14:10 +0100 (Mon, 19 Feb 2007) | 1 line Provide an example of defaultdict with non-zero constant factory function. ........ r53825 | lars.gustaebel | 2007-02-19 10:54:47 +0100 (Mon, 19 Feb 2007) | 2 lines Moved misplaced news item. ........ r53826 | martin.v.loewis | 2007-02-19 11:55:19 +0100 (Mon, 19 Feb 2007) | 3 lines Patch #1490190: posixmodule now includes os.chflags() and os.lchflags() functions on platforms where the underlying system calls are available. ........ r53827 | raymond.hettinger | 2007-02-19 19:15:04 +0100 (Mon, 19 Feb 2007) | 1 line Fixup docstrings for merge(). ........ r53829 | raymond.hettinger | 2007-02-19 21:44:04 +0100 (Mon, 19 Feb 2007) | 1 line Fixup set/dict interoperability. ........ r53837 | raymond.hettinger | 2007-02-21 06:20:38 +0100 (Wed, 21 Feb 2007) | 1 line Add itertools.izip_longest(). ........ r53838 | raymond.hettinger | 2007-02-21 18:22:05 +0100 (Wed, 21 Feb 2007) | 1 line Remove filler struct item and fix leak. ........
2007-02-23 23:07:44 +08:00
/* File automatically generated by Parser/asdl_c.py. */
2012-03-13 00:46:44 +08:00
#include <stddef.h>
#include "Python.h"
#include "Python-ast.h"
static PyTypeObject AST_type;
static PyTypeObject *mod_type;
static PyObject* ast2obj_mod(void*);
static PyTypeObject *Module_type;
_Py_IDENTIFIER(body);
_Py_IDENTIFIER(type_ignores);
static char *Module_fields[]={
"body",
"type_ignores",
};
static PyTypeObject *Interactive_type;
static char *Interactive_fields[]={
"body",
};
static PyTypeObject *Expression_type;
static char *Expression_fields[]={
"body",
};
static PyTypeObject *FunctionType_type;
_Py_IDENTIFIER(argtypes);
_Py_IDENTIFIER(returns);
static char *FunctionType_fields[]={
"argtypes",
"returns",
};
static PyTypeObject *Suite_type;
static char *Suite_fields[]={
"body",
};
static PyTypeObject *stmt_type;
_Py_IDENTIFIER(lineno);
_Py_IDENTIFIER(col_offset);
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
_Py_IDENTIFIER(end_lineno);
_Py_IDENTIFIER(end_col_offset);
static char *stmt_attributes[] = {
"lineno",
"col_offset",
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
"end_lineno",
"end_col_offset",
};
static PyObject* ast2obj_stmt(void*);
static PyTypeObject *FunctionDef_type;
_Py_IDENTIFIER(name);
_Py_IDENTIFIER(args);
_Py_IDENTIFIER(decorator_list);
_Py_IDENTIFIER(type_comment);
static char *FunctionDef_fields[]={
"name",
"args",
"body",
"decorator_list",
"returns",
"type_comment",
};
static PyTypeObject *AsyncFunctionDef_type;
static char *AsyncFunctionDef_fields[]={
"name",
"args",
"body",
"decorator_list",
"returns",
"type_comment",
};
static PyTypeObject *ClassDef_type;
_Py_IDENTIFIER(bases);
_Py_IDENTIFIER(keywords);
static char *ClassDef_fields[]={
"name",
"bases",
"keywords",
"body",
"decorator_list",
};
static PyTypeObject *Return_type;
_Py_IDENTIFIER(value);
static char *Return_fields[]={
"value",
};
static PyTypeObject *Delete_type;
_Py_IDENTIFIER(targets);
static char *Delete_fields[]={
"targets",
};
static PyTypeObject *Assign_type;
static char *Assign_fields[]={
"targets",
"value",
"type_comment",
};
static PyTypeObject *AugAssign_type;
_Py_IDENTIFIER(target);
_Py_IDENTIFIER(op);
static char *AugAssign_fields[]={
"target",
"op",
"value",
};
static PyTypeObject *AnnAssign_type;
_Py_IDENTIFIER(annotation);
_Py_IDENTIFIER(simple);
static char *AnnAssign_fields[]={
"target",
"annotation",
"value",
"simple",
};
static PyTypeObject *For_type;
_Py_IDENTIFIER(iter);
_Py_IDENTIFIER(orelse);
static char *For_fields[]={
"target",
"iter",
"body",
"orelse",
"type_comment",
};
static PyTypeObject *AsyncFor_type;
static char *AsyncFor_fields[]={
"target",
"iter",
"body",
"orelse",
"type_comment",
};
static PyTypeObject *While_type;
_Py_IDENTIFIER(test);
static char *While_fields[]={
"test",
"body",
"orelse",
};
static PyTypeObject *If_type;
static char *If_fields[]={
"test",
"body",
"orelse",
};
static PyTypeObject *With_type;
_Py_IDENTIFIER(items);
static char *With_fields[]={
"items",
"body",
"type_comment",
};
static PyTypeObject *AsyncWith_type;
static char *AsyncWith_fields[]={
"items",
"body",
"type_comment",
};
static PyTypeObject *Raise_type;
_Py_IDENTIFIER(exc);
_Py_IDENTIFIER(cause);
static char *Raise_fields[]={
"exc",
"cause",
};
static PyTypeObject *Try_type;
_Py_IDENTIFIER(handlers);
_Py_IDENTIFIER(finalbody);
static char *Try_fields[]={
"body",
"handlers",
"orelse",
"finalbody",
};
static PyTypeObject *Assert_type;
_Py_IDENTIFIER(msg);
static char *Assert_fields[]={
"test",
"msg",
};
static PyTypeObject *Import_type;
_Py_IDENTIFIER(names);
static char *Import_fields[]={
"names",
};
static PyTypeObject *ImportFrom_type;
_Py_IDENTIFIER(module);
_Py_IDENTIFIER(level);
static char *ImportFrom_fields[]={
"module",
"names",
"level",
};
static PyTypeObject *Global_type;
static char *Global_fields[]={
"names",
};
static PyTypeObject *Nonlocal_type;
static char *Nonlocal_fields[]={
"names",
};
static PyTypeObject *Expr_type;
static char *Expr_fields[]={
"value",
};
static PyTypeObject *Pass_type;
static PyTypeObject *Break_type;
static PyTypeObject *Continue_type;
static PyTypeObject *expr_type;
static char *expr_attributes[] = {
"lineno",
"col_offset",
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
"end_lineno",
"end_col_offset",
};
static PyObject* ast2obj_expr(void*);
static PyTypeObject *BoolOp_type;
_Py_IDENTIFIER(values);
static char *BoolOp_fields[]={
"op",
"values",
};
bpo-35224: PEP 572 Implementation (#10497) * Add tokenization of := - Add token to Include/token.h. Add token to documentation in Doc/library/token.rst. - Run `./python Lib/token.py` to regenerate Lib/token.py. - Update Parser/tokenizer.c: add case to handle `:=`. * Add initial usage of := in grammar. * Update Python.asdl to match the grammar updates. Regenerated Include/Python-ast.h and Python/Python-ast.c * Update AST and compiler files in Python/ast.c and Python/compile.c. Basic functionality, this isn't scoped properly * Regenerate Lib/symbol.py using `./python Lib/symbol.py` * Tests - Fix failing tests in test_parser.py due to changes in token numbers for internal representation * Tests - Add simple test for := token * Tests - Add simple tests for named expressions using expr and suite * Tests - Update number of levels for nested expressions to prevent stack overflow * Update symbol table to handle NamedExpr * Update Grammar to allow assignment expressions in if statements. Regenerate Python/graminit.c accordingly using `make regen-grammar` * Tests - Add additional tests for named expressions in RoundtripLegalSyntaxTestCase, based on examples and information directly from PEP 572 Note: failing tests are currently commented out (4 out of 24 tests currently fail) * Tests - Add temporary syntax test failure tests in test_parser.py Note: There is an outstanding TODO for this -- syntax tests need to be moved to a different file (presumably test_syntax.py), but this is covering what needs to be tested at the moment, and it's more convenient to run a single test for the time being * Add support for allowing assignment expressions as function argument annotations. Uncomment tests for these cases because they all pass now! * Tests - Move existing syntax tests out of test_parser.py and into test_named_expressions.py. Refactor syntax tests to use unittest * Add TargetScopeError exception to extend SyntaxError Note: This simply creates the TargetScopeError exception, it is not yet used anywhere * Tests - Update tests per PEP 572 Continue refactoring test suite: The named expression test suite now checks for any invalid cases that throw exceptions (no longer limited to SyntaxErrors), assignment tests to ensure that variables are properly assigned, and scope tests to ensure that variable availability and values are correct Note: - There are still tests that are marked to skip, as they are not yet implemented - There are approximately 300 lines of the PEP that have not yet been addressed, though these may be deferred * Documentation - Small updates to XXX/todo comments - Remove XXX from child description in ast.c - Add comment with number of previously supported nested expressions for 3.7.X in test_parser.py * Fix assert in seq_for_testlist() * Cleanup - Denote "Not implemented -- No keyword args" on failing test case. Fix PEP8 error for blank lines at beginning of test classes in test_parser.py * Tests - Wrap all file opens in `with...as` to ensure files are closed * WIP: handle f(a := 1) * Tests and Cleanup - No longer skips keyword arg test. Keyword arg test now uses a simpler test case and does not rely on an external file. Remove print statements from ast.c * Tests - Refactor last remaining test case that relied on on external file to use a simpler test case without the dependency * Tests - Add better description of remaning skipped tests. Add test checking scope when using assignment expression in a function argument * Tests - Add test for nested comprehension, testing value and scope. Fix variable name in skipped comprehension scope test * Handle restriction of LHS for named expressions - can only assign to LHS of type NAME. Specifically, restrict assignment to tuples This adds an alternative set_context specifically for named expressions, set_namedexpr_context. Thus, context is now set differently for standard assignment versus assignment for named expressions in order to handle restrictions. * Tests - Update negative test case for assigning to lambda to match new error message. Add negative test case for assigning to tuple * Tests - Reorder test cases to group invalid syntax cases and named assignment target errors * Tests - Update test case for named expression in function argument - check that result and variable are set correctly * Todo - Add todo for TargetScopeError based on Guido's comment (https://github.com/python/cpython/commit/2b3acd37bdfc2d35e5094228c6684050d2aa8b0a#r30472562) * Tests - Add named expression tests for assignment operator in function arguments Note: One of two tests are skipped, as function arguments are currently treating an assignment expression inside of parenthesis as one child, which does not properly catch the named expression, nor does it count arguments properly * Add NamedStore to expr_context. Regenerate related code with `make regen-ast` * Add usage of NamedStore to ast_for_named_expr in ast.c. Update occurances of checking for Store to also handle NamedStore where appropriate * Add ste_comprehension to _symtable_entry to track if the namespace is a comprehension. Initialize ste_comprehension to 0. Set set_comprehension to 1 in symtable_handle_comprehension * s/symtable_add_def/symtable_add_def_helper. Add symtable_add_def to handle grabbing st->st_cur and passing it to symtable_add_def_helper. This now allows us to call the original code from symtable_add_def by instead calling symtable_add_def_helper with a different ste. * Refactor symtable_record_directive to take lineno and col_offset as arguments instead of stmt_ty. This allows symtable_record_directive to be used for stmt_ty and expr_ty * Handle elevating scope for named expressions in comprehensions. * Handle error for usage of named expression inside a class block * Tests - No longer skip scope tests. Add additional scope tests * Cleanup - Update error message for named expression within a comprehension within a class. Update comments. Add assert for symtable_extend_namedexpr_scope to validate that we always find at least a ModuleScope if we don't find a Class or FunctionScope * Cleanup - Add missing case for NamedStore in expr_context_name. Remove unused var in set_namedexpr_content * Refactor - Consolidate set_context and set_namedexpr_context to reduce duplicated code. Special cases for named expressions are handled by checking if ctx is NamedStore * Cleanup - Add additional use cases for ast_for_namedexpr in usage comment. Fix multiple blank lines in test_named_expressions * Tests - Remove unnecessary test case. Renumber test case function names * Remove TargetScopeError for now. Will add back if needed * Cleanup - Small comment nit for consistency * Handle positional argument check with named expression * Add TargetScopeError exception definition. Add documentation for TargetScopeError in c-api docs. Throw TargetScopeError instead of SyntaxError when using a named expression in a comprehension within a class scope * Increase stack size for parser by 200. This is a minimal change (approx. 5kb) and should not have an impact on any systems. Update parser test to allow 99 nested levels again * Add TargetScopeError to exception_hierarchy.txt for test_baseexception.py_ * Tests - Major update for named expression tests, both in test_named_expressions and test_parser - Add test for TargetScopeError - Add tests for named expressions in comprehension scope and edge cases - Add tests for named expressions in function arguments (declarations and call sites) - Reorganize tests to group them more logically * Cleanup - Remove unnecessary comment * Cleanup - Comment nitpicks * Explicitly disallow assignment expressions to a name inside parentheses, e.g.: ((x) := 0) - Add check for LHS types to detect a parenthesis then a name (see note) - Add test for this scenario - Update tests for changed error message for named assignment to a tuple (also, see note) Note: This caused issues with the previous error handling for named assignment to a LHS that contained an expression, such as a tuple. Thus, the check for the LHS of a named expression must be changed to be more specific if we wish to maintain the previous error messages * Cleanup - Wrap lines more strictly in test file * Revert "Explicitly disallow assignment expressions to a name inside parentheses, e.g.: ((x) := 0)" This reverts commit f1531400ca7d7a2d148830c8ac703f041740896d. * Add NEWS.d entry * Tests - Fix error in test_pickle.test_exceptions by adding TargetScopeError to list of exceptions * Tests - Update error message tests to reflect improved messaging convention (s/can't/cannot) * Remove cases that cannot be reached in compile.c. Small linting update. * Update Grammar/Tokens to add COLONEQUAL. Regenerate all files * Update TargetScopeError PRE_INIT and POST_INIT, as this was purposefully left out when fixing rebase conflicts * Add NamedStore back and regenerate files * Pass along line number and end col info for named expression * Simplify News entry * Fix compiler warning and explicity mark fallthrough
2019-01-25 07:49:56 +08:00
static PyTypeObject *NamedExpr_type;
static char *NamedExpr_fields[]={
"target",
"value",
};
static PyTypeObject *BinOp_type;
_Py_IDENTIFIER(left);
_Py_IDENTIFIER(right);
static char *BinOp_fields[]={
"left",
"op",
"right",
};
static PyTypeObject *UnaryOp_type;
_Py_IDENTIFIER(operand);
static char *UnaryOp_fields[]={
"op",
"operand",
};
static PyTypeObject *Lambda_type;
static char *Lambda_fields[]={
"args",
"body",
};
static PyTypeObject *IfExp_type;
static char *IfExp_fields[]={
"test",
"body",
"orelse",
};
static PyTypeObject *Dict_type;
_Py_IDENTIFIER(keys);
static char *Dict_fields[]={
"keys",
"values",
};
static PyTypeObject *Set_type;
_Py_IDENTIFIER(elts);
static char *Set_fields[]={
"elts",
};
static PyTypeObject *ListComp_type;
_Py_IDENTIFIER(elt);
_Py_IDENTIFIER(generators);
static char *ListComp_fields[]={
"elt",
"generators",
};
static PyTypeObject *SetComp_type;
static char *SetComp_fields[]={
"elt",
"generators",
};
Merged revisions 56154-56264 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/p3yk ................ r56155 | neal.norwitz | 2007-07-03 08:59:08 +0300 (Tue, 03 Jul 2007) | 1 line Get this test working after converting map to return an iterator ................ r56202 | neal.norwitz | 2007-07-09 04:30:09 +0300 (Mon, 09 Jul 2007) | 37 lines Merged revisions 56124-56201 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r56129 | georg.brandl | 2007-06-30 04:01:01 -0700 (Sat, 30 Jun 2007) | 2 lines Document smtp.SMTPAuthenticationError. ........ r56137 | georg.brandl | 2007-07-01 01:11:35 -0700 (Sun, 01 Jul 2007) | 2 lines Fix a few webbrowser.py problems. ........ r56143 | georg.brandl | 2007-07-02 04:54:28 -0700 (Mon, 02 Jul 2007) | 2 lines Remove duplicate sentence from alarm() doc. ........ r56170 | mark.hammond | 2007-07-03 19:03:10 -0700 (Tue, 03 Jul 2007) | 3 lines copy built files to the PCBuild directory, where tools like distutils or external build processes can find them. ........ r56176 | kurt.kaiser | 2007-07-05 15:03:39 -0700 (Thu, 05 Jul 2007) | 10 lines Many calls to tk.call involve an arglist containing a single tuple. Calls using METH_OLDARGS unpack this tuple; calls using METH_VARARG don't. Tcl's concatenation of args was affected; IDLE doesn't start. Modify Tkapp_Call() to unpack single tuple arglists. Bug 1733943 Ref http://mail.python.org/pipermail/python-checkins/2007-May/060454.html ........ r56177 | neal.norwitz | 2007-07-05 21:13:39 -0700 (Thu, 05 Jul 2007) | 1 line Fix typo in comment ........ ................ r56251 | neal.norwitz | 2007-07-11 10:01:01 +0300 (Wed, 11 Jul 2007) | 1 line Get working with map returning an iterator (had to fix whitespace too) ................ r56255 | thomas.wouters | 2007-07-11 13:41:37 +0300 (Wed, 11 Jul 2007) | 6 lines Clean up merge glitch or copy-paste error (the entire module was duplicated, except the first half even had some more copy-paste errors, referring to listcomps and genexps instead of setcomps) ................ r56256 | thomas.wouters | 2007-07-11 15:16:01 +0300 (Wed, 11 Jul 2007) | 14 lines Dict comprehensions. Still needs doc changes (like many python-3000 features ;-). It generates bytecode similar to: x = {} for k, v in (generator here): x[k] = v except there is no tuple-packing and -unpacking involved. Trivial measurement suggests it's significantly faster than dict(generator here) (in the order of 2 to 3 times as fast) but I have not done extensive measurements. ................ r56263 | guido.van.rossum | 2007-07-11 15:36:26 +0300 (Wed, 11 Jul 2007) | 3 lines Patch 1724999 by Ali Gholami Rudi -- avoid complaints about dict size change during iter in destroy call. ................
2007-07-11 21:09:30 +08:00
static PyTypeObject *DictComp_type;
_Py_IDENTIFIER(key);
Merged revisions 56154-56264 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/p3yk ................ r56155 | neal.norwitz | 2007-07-03 08:59:08 +0300 (Tue, 03 Jul 2007) | 1 line Get this test working after converting map to return an iterator ................ r56202 | neal.norwitz | 2007-07-09 04:30:09 +0300 (Mon, 09 Jul 2007) | 37 lines Merged revisions 56124-56201 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r56129 | georg.brandl | 2007-06-30 04:01:01 -0700 (Sat, 30 Jun 2007) | 2 lines Document smtp.SMTPAuthenticationError. ........ r56137 | georg.brandl | 2007-07-01 01:11:35 -0700 (Sun, 01 Jul 2007) | 2 lines Fix a few webbrowser.py problems. ........ r56143 | georg.brandl | 2007-07-02 04:54:28 -0700 (Mon, 02 Jul 2007) | 2 lines Remove duplicate sentence from alarm() doc. ........ r56170 | mark.hammond | 2007-07-03 19:03:10 -0700 (Tue, 03 Jul 2007) | 3 lines copy built files to the PCBuild directory, where tools like distutils or external build processes can find them. ........ r56176 | kurt.kaiser | 2007-07-05 15:03:39 -0700 (Thu, 05 Jul 2007) | 10 lines Many calls to tk.call involve an arglist containing a single tuple. Calls using METH_OLDARGS unpack this tuple; calls using METH_VARARG don't. Tcl's concatenation of args was affected; IDLE doesn't start. Modify Tkapp_Call() to unpack single tuple arglists. Bug 1733943 Ref http://mail.python.org/pipermail/python-checkins/2007-May/060454.html ........ r56177 | neal.norwitz | 2007-07-05 21:13:39 -0700 (Thu, 05 Jul 2007) | 1 line Fix typo in comment ........ ................ r56251 | neal.norwitz | 2007-07-11 10:01:01 +0300 (Wed, 11 Jul 2007) | 1 line Get working with map returning an iterator (had to fix whitespace too) ................ r56255 | thomas.wouters | 2007-07-11 13:41:37 +0300 (Wed, 11 Jul 2007) | 6 lines Clean up merge glitch or copy-paste error (the entire module was duplicated, except the first half even had some more copy-paste errors, referring to listcomps and genexps instead of setcomps) ................ r56256 | thomas.wouters | 2007-07-11 15:16:01 +0300 (Wed, 11 Jul 2007) | 14 lines Dict comprehensions. Still needs doc changes (like many python-3000 features ;-). It generates bytecode similar to: x = {} for k, v in (generator here): x[k] = v except there is no tuple-packing and -unpacking involved. Trivial measurement suggests it's significantly faster than dict(generator here) (in the order of 2 to 3 times as fast) but I have not done extensive measurements. ................ r56263 | guido.van.rossum | 2007-07-11 15:36:26 +0300 (Wed, 11 Jul 2007) | 3 lines Patch 1724999 by Ali Gholami Rudi -- avoid complaints about dict size change during iter in destroy call. ................
2007-07-11 21:09:30 +08:00
static char *DictComp_fields[]={
"key",
"value",
"generators",
Merged revisions 56154-56264 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/p3yk ................ r56155 | neal.norwitz | 2007-07-03 08:59:08 +0300 (Tue, 03 Jul 2007) | 1 line Get this test working after converting map to return an iterator ................ r56202 | neal.norwitz | 2007-07-09 04:30:09 +0300 (Mon, 09 Jul 2007) | 37 lines Merged revisions 56124-56201 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r56129 | georg.brandl | 2007-06-30 04:01:01 -0700 (Sat, 30 Jun 2007) | 2 lines Document smtp.SMTPAuthenticationError. ........ r56137 | georg.brandl | 2007-07-01 01:11:35 -0700 (Sun, 01 Jul 2007) | 2 lines Fix a few webbrowser.py problems. ........ r56143 | georg.brandl | 2007-07-02 04:54:28 -0700 (Mon, 02 Jul 2007) | 2 lines Remove duplicate sentence from alarm() doc. ........ r56170 | mark.hammond | 2007-07-03 19:03:10 -0700 (Tue, 03 Jul 2007) | 3 lines copy built files to the PCBuild directory, where tools like distutils or external build processes can find them. ........ r56176 | kurt.kaiser | 2007-07-05 15:03:39 -0700 (Thu, 05 Jul 2007) | 10 lines Many calls to tk.call involve an arglist containing a single tuple. Calls using METH_OLDARGS unpack this tuple; calls using METH_VARARG don't. Tcl's concatenation of args was affected; IDLE doesn't start. Modify Tkapp_Call() to unpack single tuple arglists. Bug 1733943 Ref http://mail.python.org/pipermail/python-checkins/2007-May/060454.html ........ r56177 | neal.norwitz | 2007-07-05 21:13:39 -0700 (Thu, 05 Jul 2007) | 1 line Fix typo in comment ........ ................ r56251 | neal.norwitz | 2007-07-11 10:01:01 +0300 (Wed, 11 Jul 2007) | 1 line Get working with map returning an iterator (had to fix whitespace too) ................ r56255 | thomas.wouters | 2007-07-11 13:41:37 +0300 (Wed, 11 Jul 2007) | 6 lines Clean up merge glitch or copy-paste error (the entire module was duplicated, except the first half even had some more copy-paste errors, referring to listcomps and genexps instead of setcomps) ................ r56256 | thomas.wouters | 2007-07-11 15:16:01 +0300 (Wed, 11 Jul 2007) | 14 lines Dict comprehensions. Still needs doc changes (like many python-3000 features ;-). It generates bytecode similar to: x = {} for k, v in (generator here): x[k] = v except there is no tuple-packing and -unpacking involved. Trivial measurement suggests it's significantly faster than dict(generator here) (in the order of 2 to 3 times as fast) but I have not done extensive measurements. ................ r56263 | guido.van.rossum | 2007-07-11 15:36:26 +0300 (Wed, 11 Jul 2007) | 3 lines Patch 1724999 by Ali Gholami Rudi -- avoid complaints about dict size change during iter in destroy call. ................
2007-07-11 21:09:30 +08:00
};
static PyTypeObject *GeneratorExp_type;
static char *GeneratorExp_fields[]={
"elt",
"generators",
};
static PyTypeObject *Await_type;
static char *Await_fields[]={
"value",
};
static PyTypeObject *Yield_type;
static char *Yield_fields[]={
"value",
};
static PyTypeObject *YieldFrom_type;
static char *YieldFrom_fields[]={
"value",
};
static PyTypeObject *Compare_type;
_Py_IDENTIFIER(ops);
_Py_IDENTIFIER(comparators);
static char *Compare_fields[]={
"left",
"ops",
"comparators",
};
static PyTypeObject *Call_type;
_Py_IDENTIFIER(func);
static char *Call_fields[]={
"func",
"args",
"keywords",
};
static PyTypeObject *FormattedValue_type;
_Py_IDENTIFIER(conversion);
_Py_IDENTIFIER(format_spec);
static char *FormattedValue_fields[]={
"value",
"conversion",
"format_spec",
};
static PyTypeObject *JoinedStr_type;
static char *JoinedStr_fields[]={
"values",
};
static PyTypeObject *Constant_type;
static char *Constant_fields[]={
"value",
};
static PyTypeObject *Attribute_type;
_Py_IDENTIFIER(attr);
_Py_IDENTIFIER(ctx);
static char *Attribute_fields[]={
"value",
"attr",
"ctx",
};
static PyTypeObject *Subscript_type;
_Py_IDENTIFIER(slice);
static char *Subscript_fields[]={
"value",
"slice",
"ctx",
};
Merged revisions 55225-55227,55229-55269 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/p3yk ................ r55238 | guido.van.rossum | 2007-05-10 16:46:05 -0700 (Thu, 10 May 2007) | 9 lines Merged revisions 55227 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r55227 | guido.van.rossum | 2007-05-10 10:20:15 -0700 (Thu, 10 May 2007) | 2 lines Fix a bug in test_c_api() that caused a negative refcount. ........ ................ r55246 | neal.norwitz | 2007-05-11 00:01:52 -0700 (Fri, 11 May 2007) | 1 line Remove commands.getstatus() it is obsolete. ................ r55248 | neal.norwitz | 2007-05-11 00:29:05 -0700 (Fri, 11 May 2007) | 2 lines Remove bsddb185 support. ................ r55249 | neal.norwitz | 2007-05-11 00:29:50 -0700 (Fri, 11 May 2007) | 1 line Remove bsddb185 module too ................ r55250 | neal.norwitz | 2007-05-11 00:32:13 -0700 (Fri, 11 May 2007) | 1 line bsddb185: Gotta remove from the file checked in, not Setup ................ r55251 | neal.norwitz | 2007-05-11 00:53:26 -0700 (Fri, 11 May 2007) | 1 line Remove obsolete IRIX modules (as much as I could find, there is probably more) ................ r55252 | neal.norwitz | 2007-05-11 00:55:35 -0700 (Fri, 11 May 2007) | 1 line Remove SGI turd. ................ r55254 | georg.brandl | 2007-05-11 03:11:01 -0700 (Fri, 11 May 2007) | 2 lines Add a case for set comprehensions to the "cannot assign to" switch. ................ r55255 | georg.brandl | 2007-05-11 03:11:25 -0700 (Fri, 11 May 2007) | 2 lines Fix wrong imports. ................ r55261 | georg.brandl | 2007-05-11 07:37:48 -0700 (Fri, 11 May 2007) | 2 lines Remove removed tex files. ................ r55262 | georg.brandl | 2007-05-11 08:28:41 -0700 (Fri, 11 May 2007) | 2 lines Commit PEP 3132 implementation. ................ r55264 | georg.brandl | 2007-05-11 08:50:19 -0700 (Fri, 11 May 2007) | 2 lines Check in the inevitable AST version number and format Py_ssize_t with %zd. ................ r55265 | neal.norwitz | 2007-05-11 09:12:22 -0700 (Fri, 11 May 2007) | 1 line Remove mention of os.popen* and popen2.* since these will be removed. ................ r55266 | neal.norwitz | 2007-05-11 09:19:57 -0700 (Fri, 11 May 2007) | 1 line Get doc to build again (almost, the doc is fine) ................ r55267 | neal.norwitz | 2007-05-11 09:21:02 -0700 (Fri, 11 May 2007) | 1 line Really get doc to build (remove use of string module) ................ r55269 | neal.norwitz | 2007-05-11 09:29:43 -0700 (Fri, 11 May 2007) | 1 line Add some notes to cleanup later ................
2007-05-12 00:50:42 +08:00
static PyTypeObject *Starred_type;
static char *Starred_fields[]={
"value",
"ctx",
Merged revisions 55225-55227,55229-55269 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/p3yk ................ r55238 | guido.van.rossum | 2007-05-10 16:46:05 -0700 (Thu, 10 May 2007) | 9 lines Merged revisions 55227 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r55227 | guido.van.rossum | 2007-05-10 10:20:15 -0700 (Thu, 10 May 2007) | 2 lines Fix a bug in test_c_api() that caused a negative refcount. ........ ................ r55246 | neal.norwitz | 2007-05-11 00:01:52 -0700 (Fri, 11 May 2007) | 1 line Remove commands.getstatus() it is obsolete. ................ r55248 | neal.norwitz | 2007-05-11 00:29:05 -0700 (Fri, 11 May 2007) | 2 lines Remove bsddb185 support. ................ r55249 | neal.norwitz | 2007-05-11 00:29:50 -0700 (Fri, 11 May 2007) | 1 line Remove bsddb185 module too ................ r55250 | neal.norwitz | 2007-05-11 00:32:13 -0700 (Fri, 11 May 2007) | 1 line bsddb185: Gotta remove from the file checked in, not Setup ................ r55251 | neal.norwitz | 2007-05-11 00:53:26 -0700 (Fri, 11 May 2007) | 1 line Remove obsolete IRIX modules (as much as I could find, there is probably more) ................ r55252 | neal.norwitz | 2007-05-11 00:55:35 -0700 (Fri, 11 May 2007) | 1 line Remove SGI turd. ................ r55254 | georg.brandl | 2007-05-11 03:11:01 -0700 (Fri, 11 May 2007) | 2 lines Add a case for set comprehensions to the "cannot assign to" switch. ................ r55255 | georg.brandl | 2007-05-11 03:11:25 -0700 (Fri, 11 May 2007) | 2 lines Fix wrong imports. ................ r55261 | georg.brandl | 2007-05-11 07:37:48 -0700 (Fri, 11 May 2007) | 2 lines Remove removed tex files. ................ r55262 | georg.brandl | 2007-05-11 08:28:41 -0700 (Fri, 11 May 2007) | 2 lines Commit PEP 3132 implementation. ................ r55264 | georg.brandl | 2007-05-11 08:50:19 -0700 (Fri, 11 May 2007) | 2 lines Check in the inevitable AST version number and format Py_ssize_t with %zd. ................ r55265 | neal.norwitz | 2007-05-11 09:12:22 -0700 (Fri, 11 May 2007) | 1 line Remove mention of os.popen* and popen2.* since these will be removed. ................ r55266 | neal.norwitz | 2007-05-11 09:19:57 -0700 (Fri, 11 May 2007) | 1 line Get doc to build again (almost, the doc is fine) ................ r55267 | neal.norwitz | 2007-05-11 09:21:02 -0700 (Fri, 11 May 2007) | 1 line Really get doc to build (remove use of string module) ................ r55269 | neal.norwitz | 2007-05-11 09:29:43 -0700 (Fri, 11 May 2007) | 1 line Add some notes to cleanup later ................
2007-05-12 00:50:42 +08:00
};
static PyTypeObject *Name_type;
_Py_IDENTIFIER(id);
static char *Name_fields[]={
"id",
"ctx",
};
static PyTypeObject *List_type;
static char *List_fields[]={
"elts",
"ctx",
};
static PyTypeObject *Tuple_type;
static char *Tuple_fields[]={
"elts",
"ctx",
};
static PyTypeObject *expr_context_type;
static PyObject *Load_singleton, *Store_singleton, *Del_singleton,
*AugLoad_singleton, *AugStore_singleton, *Param_singleton;
static PyObject* ast2obj_expr_context(expr_context_ty);
static PyTypeObject *Load_type;
static PyTypeObject *Store_type;
static PyTypeObject *Del_type;
static PyTypeObject *AugLoad_type;
static PyTypeObject *AugStore_type;
static PyTypeObject *Param_type;
static PyTypeObject *slice_type;
static PyObject* ast2obj_slice(void*);
static PyTypeObject *Slice_type;
_Py_IDENTIFIER(lower);
_Py_IDENTIFIER(upper);
_Py_IDENTIFIER(step);
static char *Slice_fields[]={
"lower",
"upper",
"step",
};
static PyTypeObject *ExtSlice_type;
_Py_IDENTIFIER(dims);
static char *ExtSlice_fields[]={
"dims",
};
static PyTypeObject *Index_type;
static char *Index_fields[]={
"value",
};
static PyTypeObject *boolop_type;
static PyObject *And_singleton, *Or_singleton;
static PyObject* ast2obj_boolop(boolop_ty);
static PyTypeObject *And_type;
static PyTypeObject *Or_type;
static PyTypeObject *operator_type;
static PyObject *Add_singleton, *Sub_singleton, *Mult_singleton,
*MatMult_singleton, *Div_singleton, *Mod_singleton, *Pow_singleton,
*LShift_singleton, *RShift_singleton, *BitOr_singleton, *BitXor_singleton,
*BitAnd_singleton, *FloorDiv_singleton;
static PyObject* ast2obj_operator(operator_ty);
static PyTypeObject *Add_type;
static PyTypeObject *Sub_type;
static PyTypeObject *Mult_type;
static PyTypeObject *MatMult_type;
static PyTypeObject *Div_type;
static PyTypeObject *Mod_type;
static PyTypeObject *Pow_type;
static PyTypeObject *LShift_type;
static PyTypeObject *RShift_type;
static PyTypeObject *BitOr_type;
static PyTypeObject *BitXor_type;
static PyTypeObject *BitAnd_type;
static PyTypeObject *FloorDiv_type;
static PyTypeObject *unaryop_type;
static PyObject *Invert_singleton, *Not_singleton, *UAdd_singleton,
*USub_singleton;
static PyObject* ast2obj_unaryop(unaryop_ty);
static PyTypeObject *Invert_type;
static PyTypeObject *Not_type;
static PyTypeObject *UAdd_type;
static PyTypeObject *USub_type;
static PyTypeObject *cmpop_type;
static PyObject *Eq_singleton, *NotEq_singleton, *Lt_singleton, *LtE_singleton,
*Gt_singleton, *GtE_singleton, *Is_singleton, *IsNot_singleton, *In_singleton,
*NotIn_singleton;
static PyObject* ast2obj_cmpop(cmpop_ty);
static PyTypeObject *Eq_type;
static PyTypeObject *NotEq_type;
static PyTypeObject *Lt_type;
static PyTypeObject *LtE_type;
static PyTypeObject *Gt_type;
static PyTypeObject *GtE_type;
static PyTypeObject *Is_type;
static PyTypeObject *IsNot_type;
static PyTypeObject *In_type;
static PyTypeObject *NotIn_type;
static PyTypeObject *comprehension_type;
static PyObject* ast2obj_comprehension(void*);
_Py_IDENTIFIER(ifs);
_Py_IDENTIFIER(is_async);
static char *comprehension_fields[]={
"target",
"iter",
"ifs",
"is_async",
};
static PyTypeObject *excepthandler_type;
static char *excepthandler_attributes[] = {
"lineno",
"col_offset",
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
"end_lineno",
"end_col_offset",
};
static PyObject* ast2obj_excepthandler(void*);
static PyTypeObject *ExceptHandler_type;
_Py_IDENTIFIER(type);
static char *ExceptHandler_fields[]={
"type",
"name",
"body",
};
static PyTypeObject *arguments_type;
static PyObject* ast2obj_arguments(void*);
_Py_IDENTIFIER(vararg);
_Py_IDENTIFIER(kwonlyargs);
_Py_IDENTIFIER(kw_defaults);
_Py_IDENTIFIER(kwarg);
_Py_IDENTIFIER(defaults);
static char *arguments_fields[]={
"args",
"vararg",
"kwonlyargs",
"kw_defaults",
"kwarg",
"defaults",
};
static PyTypeObject *arg_type;
static PyObject* ast2obj_arg(void*);
static char *arg_attributes[] = {
"lineno",
"col_offset",
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
"end_lineno",
"end_col_offset",
};
_Py_IDENTIFIER(arg);
static char *arg_fields[]={
"arg",
"annotation",
"type_comment",
};
static PyTypeObject *keyword_type;
static PyObject* ast2obj_keyword(void*);
static char *keyword_fields[]={
"arg",
"value",
};
static PyTypeObject *alias_type;
static PyObject* ast2obj_alias(void*);
_Py_IDENTIFIER(asname);
static char *alias_fields[]={
"name",
"asname",
};
static PyTypeObject *withitem_type;
static PyObject* ast2obj_withitem(void*);
_Py_IDENTIFIER(context_expr);
_Py_IDENTIFIER(optional_vars);
static char *withitem_fields[]={
"context_expr",
"optional_vars",
};
static PyTypeObject *type_ignore_type;
static PyObject* ast2obj_type_ignore(void*);
static PyTypeObject *TypeIgnore_type;
static char *TypeIgnore_fields[]={
"lineno",
};
_Py_IDENTIFIER(_fields);
_Py_IDENTIFIER(_attributes);
2012-03-13 00:46:44 +08:00
typedef struct {
PyObject_HEAD
2012-03-13 00:46:44 +08:00
PyObject *dict;
} AST_object;
2012-03-15 10:50:29 +08:00
static void
ast_dealloc(AST_object *self)
{
/* bpo-31095: UnTrack is needed before calling any callbacks */
PyObject_GC_UnTrack(self);
2012-03-15 10:50:29 +08:00
Py_CLEAR(self->dict);
Py_TYPE(self)->tp_free(self);
2012-03-15 10:50:29 +08:00
}
static int
ast_traverse(AST_object *self, visitproc visit, void *arg)
{
Py_VISIT(self->dict);
return 0;
}
static int
ast_clear(AST_object *self)
{
Py_CLEAR(self->dict);
return 0;
}
static int
ast_type_init(PyObject *self, PyObject *args, PyObject *kw)
{
Py_ssize_t i, numfields = 0;
int res = -1;
PyObject *key, *value, *fields;
if (_PyObject_LookupAttrId((PyObject*)Py_TYPE(self), &PyId__fields, &fields) < 0) {
goto cleanup;
}
if (fields) {
numfields = PySequence_Size(fields);
if (numfields == -1)
goto cleanup;
}
res = 0; /* if no error occurs, this stays 0 to the end */
if (numfields < PyTuple_GET_SIZE(args)) {
PyErr_Format(PyExc_TypeError, "%.400s constructor takes at most "
"%zd positional argument%s",
Py_TYPE(self)->tp_name,
numfields, numfields == 1 ? "" : "s");
res = -1;
goto cleanup;
}
for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
/* cannot be reached when fields is NULL */
PyObject *name = PySequence_GetItem(fields, i);
if (!name) {
res = -1;
goto cleanup;
}
res = PyObject_SetAttr(self, name, PyTuple_GET_ITEM(args, i));
Py_DECREF(name);
if (res < 0)
goto cleanup;
}
if (kw) {
i = 0; /* needed by PyDict_Next */
while (PyDict_Next(kw, &i, &key, &value)) {
res = PyObject_SetAttr(self, key, value);
if (res < 0)
goto cleanup;
}
}
cleanup:
Py_XDECREF(fields);
return res;
}
Merged revisions 61440-61441,61443,61445-61448,61451-61452,61455-61457,61459-61464,61466-61467,61469-61470,61476-61477,61479,61481-61482,61485,61487,61490,61493-61494,61497,61499-61502,61505-61506,61508,61511-61514,61519,61521-61522,61530-61531,61533-61537,61541-61555,61557-61558,61561-61562,61566-61569,61572-61574,61578-61579,61583-61584,61588-61589,61592,61594,61598-61601,61603-61604,61607-61612,61617,61619-61620,61624,61626,61628-61630,61635-61638,61640-61643,61645,61648,61653-61655,61659-61662,61664,61666,61668-61671,61673,61675,61679-61680,61682,61685-61686,61689-61695,61697-61699,61701-61703,61706,61710,61713,61717,61723,61726-61730,61736,61738,61740,61742,61745-61752,61754-61760,61762-61764,61768,61770-61772,61774-61775,61784-61787,61789-61792,61794-61795,61797-61806,61808-61809,61811-61812,61814-61819,61824,61826-61833,61835-61840,61843-61845,61848,61850,61854-61862,61865-61866,61868,61872-61873,61876-61877,61883-61888,61890-61891,61893-61899,61901-61903,61905-61912,61914,61917,61920-61921,61927,61930,61932-61934,61939,61941-61942,61944-61951,61955,61960-61963,61980,61982-61983,61991,61994-61996,62001-62003,62008-62010,62016-62017,62022,62024,62027,62031-62034,62041,62045-62046,62048,62050-62051,62055-62066,62068-62074,62076-62078 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r62048 | georg.brandl | 2008-03-29 23:53:55 -0700 (Sat, 29 Mar 2008) | 2 lines Adapt test_ast to the new ExceptHandler type. ........ r62050 | georg.brandl | 2008-03-30 00:09:22 -0700 (Sun, 30 Mar 2008) | 2 lines Convert test_ast to unittest and add a test for r62049. ........ r62051 | georg.brandl | 2008-03-30 12:00:49 -0700 (Sun, 30 Mar 2008) | 2 lines Make _fields attr for no fields consistent with _attributes attr. ........ r62059 | georg.brandl | 2008-03-30 13:20:39 -0700 (Sun, 30 Mar 2008) | 2 lines Make AST nodes pickleable. ........
2008-03-31 13:29:39 +08:00
/* Pickling support */
static PyObject *
ast_type_reduce(PyObject *self, PyObject *unused)
{
_Py_IDENTIFIER(__dict__);
PyObject *dict;
if (_PyObject_LookupAttrId(self, &PyId___dict__, &dict) < 0) {
return NULL;
Merged revisions 61440-61441,61443,61445-61448,61451-61452,61455-61457,61459-61464,61466-61467,61469-61470,61476-61477,61479,61481-61482,61485,61487,61490,61493-61494,61497,61499-61502,61505-61506,61508,61511-61514,61519,61521-61522,61530-61531,61533-61537,61541-61555,61557-61558,61561-61562,61566-61569,61572-61574,61578-61579,61583-61584,61588-61589,61592,61594,61598-61601,61603-61604,61607-61612,61617,61619-61620,61624,61626,61628-61630,61635-61638,61640-61643,61645,61648,61653-61655,61659-61662,61664,61666,61668-61671,61673,61675,61679-61680,61682,61685-61686,61689-61695,61697-61699,61701-61703,61706,61710,61713,61717,61723,61726-61730,61736,61738,61740,61742,61745-61752,61754-61760,61762-61764,61768,61770-61772,61774-61775,61784-61787,61789-61792,61794-61795,61797-61806,61808-61809,61811-61812,61814-61819,61824,61826-61833,61835-61840,61843-61845,61848,61850,61854-61862,61865-61866,61868,61872-61873,61876-61877,61883-61888,61890-61891,61893-61899,61901-61903,61905-61912,61914,61917,61920-61921,61927,61930,61932-61934,61939,61941-61942,61944-61951,61955,61960-61963,61980,61982-61983,61991,61994-61996,62001-62003,62008-62010,62016-62017,62022,62024,62027,62031-62034,62041,62045-62046,62048,62050-62051,62055-62066,62068-62074,62076-62078 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r62048 | georg.brandl | 2008-03-29 23:53:55 -0700 (Sat, 29 Mar 2008) | 2 lines Adapt test_ast to the new ExceptHandler type. ........ r62050 | georg.brandl | 2008-03-30 00:09:22 -0700 (Sun, 30 Mar 2008) | 2 lines Convert test_ast to unittest and add a test for r62049. ........ r62051 | georg.brandl | 2008-03-30 12:00:49 -0700 (Sun, 30 Mar 2008) | 2 lines Make _fields attr for no fields consistent with _attributes attr. ........ r62059 | georg.brandl | 2008-03-30 13:20:39 -0700 (Sun, 30 Mar 2008) | 2 lines Make AST nodes pickleable. ........
2008-03-31 13:29:39 +08:00
}
if (dict) {
return Py_BuildValue("O()N", Py_TYPE(self), dict);
Merged revisions 61440-61441,61443,61445-61448,61451-61452,61455-61457,61459-61464,61466-61467,61469-61470,61476-61477,61479,61481-61482,61485,61487,61490,61493-61494,61497,61499-61502,61505-61506,61508,61511-61514,61519,61521-61522,61530-61531,61533-61537,61541-61555,61557-61558,61561-61562,61566-61569,61572-61574,61578-61579,61583-61584,61588-61589,61592,61594,61598-61601,61603-61604,61607-61612,61617,61619-61620,61624,61626,61628-61630,61635-61638,61640-61643,61645,61648,61653-61655,61659-61662,61664,61666,61668-61671,61673,61675,61679-61680,61682,61685-61686,61689-61695,61697-61699,61701-61703,61706,61710,61713,61717,61723,61726-61730,61736,61738,61740,61742,61745-61752,61754-61760,61762-61764,61768,61770-61772,61774-61775,61784-61787,61789-61792,61794-61795,61797-61806,61808-61809,61811-61812,61814-61819,61824,61826-61833,61835-61840,61843-61845,61848,61850,61854-61862,61865-61866,61868,61872-61873,61876-61877,61883-61888,61890-61891,61893-61899,61901-61903,61905-61912,61914,61917,61920-61921,61927,61930,61932-61934,61939,61941-61942,61944-61951,61955,61960-61963,61980,61982-61983,61991,61994-61996,62001-62003,62008-62010,62016-62017,62022,62024,62027,62031-62034,62041,62045-62046,62048,62050-62051,62055-62066,62068-62074,62076-62078 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r62048 | georg.brandl | 2008-03-29 23:53:55 -0700 (Sat, 29 Mar 2008) | 2 lines Adapt test_ast to the new ExceptHandler type. ........ r62050 | georg.brandl | 2008-03-30 00:09:22 -0700 (Sun, 30 Mar 2008) | 2 lines Convert test_ast to unittest and add a test for r62049. ........ r62051 | georg.brandl | 2008-03-30 12:00:49 -0700 (Sun, 30 Mar 2008) | 2 lines Make _fields attr for no fields consistent with _attributes attr. ........ r62059 | georg.brandl | 2008-03-30 13:20:39 -0700 (Sun, 30 Mar 2008) | 2 lines Make AST nodes pickleable. ........
2008-03-31 13:29:39 +08:00
}
return Py_BuildValue("O()", Py_TYPE(self));
}
static PyMethodDef ast_type_methods[] = {
{"__reduce__", ast_type_reduce, METH_NOARGS, NULL},
{NULL}
};
2012-03-13 00:46:44 +08:00
static PyGetSetDef ast_type_getsets[] = {
{"__dict__", PyObject_GenericGetDict, PyObject_GenericSetDict},
{NULL}
};
static PyTypeObject AST_type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
Merged revisions 61440-61441,61443,61445-61448,61451-61452,61455-61457,61459-61464,61466-61467,61469-61470,61476-61477,61479,61481-61482,61485,61487,61490,61493-61494,61497,61499-61502,61505-61506,61508,61511-61514,61519,61521-61522,61530-61531,61533-61537,61541-61555,61557-61558,61561-61562,61566-61569,61572-61574,61578-61579,61583-61584,61588-61589,61592,61594,61598-61601,61603-61604,61607-61612,61617,61619-61620,61624,61626,61628-61630,61635-61638,61640-61643,61645,61648,61653-61655,61659-61662,61664,61666,61668-61671,61673,61675,61679-61680,61682,61685-61686,61689-61695,61697-61699,61701-61703,61706,61710,61713,61717,61723,61726-61730,61736,61738,61740,61742,61745-61752,61754-61760,61762-61764,61768,61770-61772,61774-61775,61784-61787,61789-61792,61794-61795,61797-61806,61808-61809,61811-61812,61814-61819,61824,61826-61833,61835-61840,61843-61845,61848,61850,61854-61862,61865-61866,61868,61872-61873,61876-61877,61883-61888,61890-61891,61893-61899,61901-61903,61905-61912,61914,61917,61920-61921,61927,61930,61932-61934,61939,61941-61942,61944-61951,61955,61960-61963,61980,61982-61983,61991,61994-61996,62001-62003,62008-62010,62016-62017,62022,62024,62027,62031-62034,62041,62045-62046,62048,62050-62051,62055-62066,62068-62074,62076-62078 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r62048 | georg.brandl | 2008-03-29 23:53:55 -0700 (Sat, 29 Mar 2008) | 2 lines Adapt test_ast to the new ExceptHandler type. ........ r62050 | georg.brandl | 2008-03-30 00:09:22 -0700 (Sun, 30 Mar 2008) | 2 lines Convert test_ast to unittest and add a test for r62049. ........ r62051 | georg.brandl | 2008-03-30 12:00:49 -0700 (Sun, 30 Mar 2008) | 2 lines Make _fields attr for no fields consistent with _attributes attr. ........ r62059 | georg.brandl | 2008-03-30 13:20:39 -0700 (Sun, 30 Mar 2008) | 2 lines Make AST nodes pickleable. ........
2008-03-31 13:29:39 +08:00
"_ast.AST",
2012-03-13 00:46:44 +08:00
sizeof(AST_object),
0,
2012-03-15 10:50:29 +08:00
(destructor)ast_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
PyObject_GenericSetAttr, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags */
0, /* tp_doc */
(traverseproc)ast_traverse, /* tp_traverse */
(inquiry)ast_clear, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
Merged revisions 61440-61441,61443,61445-61448,61451-61452,61455-61457,61459-61464,61466-61467,61469-61470,61476-61477,61479,61481-61482,61485,61487,61490,61493-61494,61497,61499-61502,61505-61506,61508,61511-61514,61519,61521-61522,61530-61531,61533-61537,61541-61555,61557-61558,61561-61562,61566-61569,61572-61574,61578-61579,61583-61584,61588-61589,61592,61594,61598-61601,61603-61604,61607-61612,61617,61619-61620,61624,61626,61628-61630,61635-61638,61640-61643,61645,61648,61653-61655,61659-61662,61664,61666,61668-61671,61673,61675,61679-61680,61682,61685-61686,61689-61695,61697-61699,61701-61703,61706,61710,61713,61717,61723,61726-61730,61736,61738,61740,61742,61745-61752,61754-61760,61762-61764,61768,61770-61772,61774-61775,61784-61787,61789-61792,61794-61795,61797-61806,61808-61809,61811-61812,61814-61819,61824,61826-61833,61835-61840,61843-61845,61848,61850,61854-61862,61865-61866,61868,61872-61873,61876-61877,61883-61888,61890-61891,61893-61899,61901-61903,61905-61912,61914,61917,61920-61921,61927,61930,61932-61934,61939,61941-61942,61944-61951,61955,61960-61963,61980,61982-61983,61991,61994-61996,62001-62003,62008-62010,62016-62017,62022,62024,62027,62031-62034,62041,62045-62046,62048,62050-62051,62055-62066,62068-62074,62076-62078 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r62048 | georg.brandl | 2008-03-29 23:53:55 -0700 (Sat, 29 Mar 2008) | 2 lines Adapt test_ast to the new ExceptHandler type. ........ r62050 | georg.brandl | 2008-03-30 00:09:22 -0700 (Sun, 30 Mar 2008) | 2 lines Convert test_ast to unittest and add a test for r62049. ........ r62051 | georg.brandl | 2008-03-30 12:00:49 -0700 (Sun, 30 Mar 2008) | 2 lines Make _fields attr for no fields consistent with _attributes attr. ........ r62059 | georg.brandl | 2008-03-30 13:20:39 -0700 (Sun, 30 Mar 2008) | 2 lines Make AST nodes pickleable. ........
2008-03-31 13:29:39 +08:00
ast_type_methods, /* tp_methods */
0, /* tp_members */
2012-03-13 00:46:44 +08:00
ast_type_getsets, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
2012-03-13 00:46:44 +08:00
offsetof(AST_object, dict),/* tp_dictoffset */
(initproc)ast_type_init, /* tp_init */
PyType_GenericAlloc, /* tp_alloc */
PyType_GenericNew, /* tp_new */
PyObject_GC_Del, /* tp_free */
};
static PyTypeObject* make_type(char *type, PyTypeObject* base, char**fields, int num_fields)
{
_Py_IDENTIFIER(__module__);
_Py_IDENTIFIER(_ast);
PyObject *fnames, *result;
int i;
Merged revisions 61440-61441,61443,61445-61448,61451-61452,61455-61457,61459-61464,61466-61467,61469-61470,61476-61477,61479,61481-61482,61485,61487,61490,61493-61494,61497,61499-61502,61505-61506,61508,61511-61514,61519,61521-61522,61530-61531,61533-61537,61541-61555,61557-61558,61561-61562,61566-61569,61572-61574,61578-61579,61583-61584,61588-61589,61592,61594,61598-61601,61603-61604,61607-61612,61617,61619-61620,61624,61626,61628-61630,61635-61638,61640-61643,61645,61648,61653-61655,61659-61662,61664,61666,61668-61671,61673,61675,61679-61680,61682,61685-61686,61689-61695,61697-61699,61701-61703,61706,61710,61713,61717,61723,61726-61730,61736,61738,61740,61742,61745-61752,61754-61760,61762-61764,61768,61770-61772,61774-61775,61784-61787,61789-61792,61794-61795,61797-61806,61808-61809,61811-61812,61814-61819,61824,61826-61833,61835-61840,61843-61845,61848,61850,61854-61862,61865-61866,61868,61872-61873,61876-61877,61883-61888,61890-61891,61893-61899,61901-61903,61905-61912,61914,61917,61920-61921,61927,61930,61932-61934,61939,61941-61942,61944-61951,61955,61960-61963,61980,61982-61983,61991,61994-61996,62001-62003,62008-62010,62016-62017,62022,62024,62027,62031-62034,62041,62045-62046,62048,62050-62051,62055-62066,62068-62074,62076-62078 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r62048 | georg.brandl | 2008-03-29 23:53:55 -0700 (Sat, 29 Mar 2008) | 2 lines Adapt test_ast to the new ExceptHandler type. ........ r62050 | georg.brandl | 2008-03-30 00:09:22 -0700 (Sun, 30 Mar 2008) | 2 lines Convert test_ast to unittest and add a test for r62049. ........ r62051 | georg.brandl | 2008-03-30 12:00:49 -0700 (Sun, 30 Mar 2008) | 2 lines Make _fields attr for no fields consistent with _attributes attr. ........ r62059 | georg.brandl | 2008-03-30 13:20:39 -0700 (Sun, 30 Mar 2008) | 2 lines Make AST nodes pickleable. ........
2008-03-31 13:29:39 +08:00
fnames = PyTuple_New(num_fields);
if (!fnames) return NULL;
for (i = 0; i < num_fields; i++) {
2007-08-25 09:33:49 +08:00
PyObject *field = PyUnicode_FromString(fields[i]);
if (!field) {
Py_DECREF(fnames);
return NULL;
}
PyTuple_SET_ITEM(fnames, i, field);
}
result = PyObject_CallFunction((PyObject*)&PyType_Type, "s(O){OOOO}",
type, base,
_PyUnicode_FromId(&PyId__fields), fnames,
_PyUnicode_FromId(&PyId___module__),
_PyUnicode_FromId(&PyId__ast));
Py_DECREF(fnames);
return (PyTypeObject*)result;
}
static int add_attributes(PyTypeObject* type, char**attrs, int num_fields)
{
int i, result;
PyObject *s, *l = PyTuple_New(num_fields);
if (!l)
return 0;
for (i = 0; i < num_fields; i++) {
2007-08-25 09:33:49 +08:00
s = PyUnicode_FromString(attrs[i]);
if (!s) {
Py_DECREF(l);
return 0;
}
PyTuple_SET_ITEM(l, i, s);
}
result = _PyObject_SetAttrId((PyObject*)type, &PyId__attributes, l) >= 0;
Py_DECREF(l);
return result;
}
/* Conversion AST -> Python */
static PyObject* ast2obj_list(asdl_seq *seq, PyObject* (*func)(void*))
{
Py_ssize_t i, n = asdl_seq_LEN(seq);
PyObject *result = PyList_New(n);
PyObject *value;
if (!result)
return NULL;
for (i = 0; i < n; i++) {
value = func(asdl_seq_GET(seq, i));
if (!value) {
Py_DECREF(result);
return NULL;
}
PyList_SET_ITEM(result, i, value);
}
return result;
}
static PyObject* ast2obj_object(void *o)
{
if (!o)
o = Py_None;
Py_INCREF((PyObject*)o);
return (PyObject*)o;
}
#define ast2obj_singleton ast2obj_object
#define ast2obj_constant ast2obj_object
#define ast2obj_identifier ast2obj_object
#define ast2obj_string ast2obj_object
#define ast2obj_bytes ast2obj_object
static PyObject* ast2obj_int(long b)
{
return PyLong_FromLong(b);
}
/* Conversion Python -> AST */
static int obj2ast_object(PyObject* obj, PyObject** out, PyArena* arena)
{
if (obj == Py_None)
obj = NULL;
if (obj) {
if (PyArena_AddPyObject(arena, obj) < 0) {
*out = NULL;
return -1;
}
Py_INCREF(obj);
}
*out = obj;
return 0;
}
static int obj2ast_constant(PyObject* obj, PyObject** out, PyArena* arena)
{
if (PyArena_AddPyObject(arena, obj) < 0) {
*out = NULL;
return -1;
}
Py_INCREF(obj);
*out = obj;
return 0;
}
static int obj2ast_identifier(PyObject* obj, PyObject** out, PyArena* arena)
{
if (!PyUnicode_CheckExact(obj) && obj != Py_None) {
PyErr_SetString(PyExc_TypeError, "AST identifier must be of type str");
return 1;
}
return obj2ast_object(obj, out, arena);
}
static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena)
{
if (!PyUnicode_CheckExact(obj) && !PyBytes_CheckExact(obj)) {
PyErr_SetString(PyExc_TypeError, "AST string must be of type str");
return 1;
}
return obj2ast_object(obj, out, arena);
}
static int obj2ast_int(PyObject* obj, int* out, PyArena* arena)
{
int i;
if (!PyLong_Check(obj)) {
PyErr_Format(PyExc_ValueError, "invalid integer value: %R", obj);
return 1;
}
i = _PyLong_AsInt(obj);
if (i == -1 && PyErr_Occurred())
return 1;
*out = i;
return 0;
}
Merged revisions 66974,66977,66984,66989,66992,66994-66996,66998-67000,67007,67015 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r66974 | benjamin.peterson | 2008-10-19 08:59:01 -0500 (Sun, 19 Oct 2008) | 1 line fix compiler warning ........ r66977 | benjamin.peterson | 2008-10-19 14:39:16 -0500 (Sun, 19 Oct 2008) | 1 line mention -n ........ r66984 | armin.ronacher | 2008-10-20 16:29:08 -0500 (Mon, 20 Oct 2008) | 3 lines Fixed #4062, added import for _ast.__version__ to ast to match the documented behavior. ........ r66989 | matthias.klose | 2008-10-21 04:12:25 -0500 (Tue, 21 Oct 2008) | 2 lines - install versioned manpage ........ r66992 | benjamin.peterson | 2008-10-21 15:51:13 -0500 (Tue, 21 Oct 2008) | 1 line make sure to call iteritems() ........ r66994 | amaury.forgeotdarc | 2008-10-21 17:01:38 -0500 (Tue, 21 Oct 2008) | 6 lines #4157 move two test functions out of platform.py. Turn them into unit tests, and correct an obvious typo: (("a", "b") ("c", "d") ("e", "f")) compiles even with the missing commas, but does not execute very well... ........ r66995 | benjamin.peterson | 2008-10-21 17:18:29 -0500 (Tue, 21 Oct 2008) | 1 line return ArgInfo from inspect.getargvalues #4092 ........ r66996 | benjamin.peterson | 2008-10-21 17:20:31 -0500 (Tue, 21 Oct 2008) | 1 line add NEWs note for last change ........ r66998 | benjamin.peterson | 2008-10-22 15:57:43 -0500 (Wed, 22 Oct 2008) | 1 line fix a few typos ........ r66999 | benjamin.peterson | 2008-10-22 16:05:30 -0500 (Wed, 22 Oct 2008) | 1 line and another typo... ........ r67000 | benjamin.peterson | 2008-10-22 16:16:34 -0500 (Wed, 22 Oct 2008) | 1 line fix #4150: pdb's up command didn't work for generators in post-mortem ........ r67007 | benjamin.peterson | 2008-10-23 16:43:48 -0500 (Thu, 23 Oct 2008) | 1 line only nonempty __slots__ don't work ........ r67015 | georg.brandl | 2008-10-25 02:00:52 -0500 (Sat, 25 Oct 2008) | 2 lines Typo fix. ........
2008-10-25 23:49:17 +08:00
static int add_ast_fields(void)
2008-10-25 07:11:02 +08:00
{
PyObject *empty_tuple, *d;
if (PyType_Ready(&AST_type) < 0)
return -1;
d = AST_type.tp_dict;
empty_tuple = PyTuple_New(0);
if (!empty_tuple ||
_PyDict_SetItemId(d, &PyId__fields, empty_tuple) < 0 ||
_PyDict_SetItemId(d, &PyId__attributes, empty_tuple) < 0) {
2008-10-25 07:11:02 +08:00
Py_XDECREF(empty_tuple);
return -1;
}
Py_DECREF(empty_tuple);
return 0;
}
static int init_types(void)
{
static int initialized;
if (initialized) return 1;
if (add_ast_fields() < 0) return 0;
mod_type = make_type("mod", &AST_type, NULL, 0);
if (!mod_type) return 0;
if (!add_attributes(mod_type, NULL, 0)) return 0;
Module_type = make_type("Module", mod_type, Module_fields, 2);
if (!Module_type) return 0;
Interactive_type = make_type("Interactive", mod_type, Interactive_fields,
1);
if (!Interactive_type) return 0;
Expression_type = make_type("Expression", mod_type, Expression_fields, 1);
if (!Expression_type) return 0;
FunctionType_type = make_type("FunctionType", mod_type,
FunctionType_fields, 2);
if (!FunctionType_type) return 0;
Suite_type = make_type("Suite", mod_type, Suite_fields, 1);
if (!Suite_type) return 0;
stmt_type = make_type("stmt", &AST_type, NULL, 0);
if (!stmt_type) return 0;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
if (!add_attributes(stmt_type, stmt_attributes, 4)) return 0;
FunctionDef_type = make_type("FunctionDef", stmt_type, FunctionDef_fields,
6);
if (!FunctionDef_type) return 0;
AsyncFunctionDef_type = make_type("AsyncFunctionDef", stmt_type,
AsyncFunctionDef_fields, 6);
if (!AsyncFunctionDef_type) return 0;
ClassDef_type = make_type("ClassDef", stmt_type, ClassDef_fields, 5);
if (!ClassDef_type) return 0;
Return_type = make_type("Return", stmt_type, Return_fields, 1);
if (!Return_type) return 0;
Delete_type = make_type("Delete", stmt_type, Delete_fields, 1);
if (!Delete_type) return 0;
Assign_type = make_type("Assign", stmt_type, Assign_fields, 3);
if (!Assign_type) return 0;
AugAssign_type = make_type("AugAssign", stmt_type, AugAssign_fields, 3);
if (!AugAssign_type) return 0;
AnnAssign_type = make_type("AnnAssign", stmt_type, AnnAssign_fields, 4);
if (!AnnAssign_type) return 0;
For_type = make_type("For", stmt_type, For_fields, 5);
if (!For_type) return 0;
AsyncFor_type = make_type("AsyncFor", stmt_type, AsyncFor_fields, 5);
if (!AsyncFor_type) return 0;
While_type = make_type("While", stmt_type, While_fields, 3);
if (!While_type) return 0;
If_type = make_type("If", stmt_type, If_fields, 3);
if (!If_type) return 0;
With_type = make_type("With", stmt_type, With_fields, 3);
if (!With_type) return 0;
AsyncWith_type = make_type("AsyncWith", stmt_type, AsyncWith_fields, 3);
if (!AsyncWith_type) return 0;
Raise_type = make_type("Raise", stmt_type, Raise_fields, 2);
if (!Raise_type) return 0;
Try_type = make_type("Try", stmt_type, Try_fields, 4);
if (!Try_type) return 0;
Assert_type = make_type("Assert", stmt_type, Assert_fields, 2);
if (!Assert_type) return 0;
Import_type = make_type("Import", stmt_type, Import_fields, 1);
if (!Import_type) return 0;
ImportFrom_type = make_type("ImportFrom", stmt_type, ImportFrom_fields, 3);
if (!ImportFrom_type) return 0;
Global_type = make_type("Global", stmt_type, Global_fields, 1);
if (!Global_type) return 0;
Nonlocal_type = make_type("Nonlocal", stmt_type, Nonlocal_fields, 1);
if (!Nonlocal_type) return 0;
Expr_type = make_type("Expr", stmt_type, Expr_fields, 1);
if (!Expr_type) return 0;
Pass_type = make_type("Pass", stmt_type, NULL, 0);
if (!Pass_type) return 0;
Break_type = make_type("Break", stmt_type, NULL, 0);
if (!Break_type) return 0;
Continue_type = make_type("Continue", stmt_type, NULL, 0);
if (!Continue_type) return 0;
expr_type = make_type("expr", &AST_type, NULL, 0);
if (!expr_type) return 0;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
if (!add_attributes(expr_type, expr_attributes, 4)) return 0;
BoolOp_type = make_type("BoolOp", expr_type, BoolOp_fields, 2);
if (!BoolOp_type) return 0;
bpo-35224: PEP 572 Implementation (#10497) * Add tokenization of := - Add token to Include/token.h. Add token to documentation in Doc/library/token.rst. - Run `./python Lib/token.py` to regenerate Lib/token.py. - Update Parser/tokenizer.c: add case to handle `:=`. * Add initial usage of := in grammar. * Update Python.asdl to match the grammar updates. Regenerated Include/Python-ast.h and Python/Python-ast.c * Update AST and compiler files in Python/ast.c and Python/compile.c. Basic functionality, this isn't scoped properly * Regenerate Lib/symbol.py using `./python Lib/symbol.py` * Tests - Fix failing tests in test_parser.py due to changes in token numbers for internal representation * Tests - Add simple test for := token * Tests - Add simple tests for named expressions using expr and suite * Tests - Update number of levels for nested expressions to prevent stack overflow * Update symbol table to handle NamedExpr * Update Grammar to allow assignment expressions in if statements. Regenerate Python/graminit.c accordingly using `make regen-grammar` * Tests - Add additional tests for named expressions in RoundtripLegalSyntaxTestCase, based on examples and information directly from PEP 572 Note: failing tests are currently commented out (4 out of 24 tests currently fail) * Tests - Add temporary syntax test failure tests in test_parser.py Note: There is an outstanding TODO for this -- syntax tests need to be moved to a different file (presumably test_syntax.py), but this is covering what needs to be tested at the moment, and it's more convenient to run a single test for the time being * Add support for allowing assignment expressions as function argument annotations. Uncomment tests for these cases because they all pass now! * Tests - Move existing syntax tests out of test_parser.py and into test_named_expressions.py. Refactor syntax tests to use unittest * Add TargetScopeError exception to extend SyntaxError Note: This simply creates the TargetScopeError exception, it is not yet used anywhere * Tests - Update tests per PEP 572 Continue refactoring test suite: The named expression test suite now checks for any invalid cases that throw exceptions (no longer limited to SyntaxErrors), assignment tests to ensure that variables are properly assigned, and scope tests to ensure that variable availability and values are correct Note: - There are still tests that are marked to skip, as they are not yet implemented - There are approximately 300 lines of the PEP that have not yet been addressed, though these may be deferred * Documentation - Small updates to XXX/todo comments - Remove XXX from child description in ast.c - Add comment with number of previously supported nested expressions for 3.7.X in test_parser.py * Fix assert in seq_for_testlist() * Cleanup - Denote "Not implemented -- No keyword args" on failing test case. Fix PEP8 error for blank lines at beginning of test classes in test_parser.py * Tests - Wrap all file opens in `with...as` to ensure files are closed * WIP: handle f(a := 1) * Tests and Cleanup - No longer skips keyword arg test. Keyword arg test now uses a simpler test case and does not rely on an external file. Remove print statements from ast.c * Tests - Refactor last remaining test case that relied on on external file to use a simpler test case without the dependency * Tests - Add better description of remaning skipped tests. Add test checking scope when using assignment expression in a function argument * Tests - Add test for nested comprehension, testing value and scope. Fix variable name in skipped comprehension scope test * Handle restriction of LHS for named expressions - can only assign to LHS of type NAME. Specifically, restrict assignment to tuples This adds an alternative set_context specifically for named expressions, set_namedexpr_context. Thus, context is now set differently for standard assignment versus assignment for named expressions in order to handle restrictions. * Tests - Update negative test case for assigning to lambda to match new error message. Add negative test case for assigning to tuple * Tests - Reorder test cases to group invalid syntax cases and named assignment target errors * Tests - Update test case for named expression in function argument - check that result and variable are set correctly * Todo - Add todo for TargetScopeError based on Guido's comment (https://github.com/python/cpython/commit/2b3acd37bdfc2d35e5094228c6684050d2aa8b0a#r30472562) * Tests - Add named expression tests for assignment operator in function arguments Note: One of two tests are skipped, as function arguments are currently treating an assignment expression inside of parenthesis as one child, which does not properly catch the named expression, nor does it count arguments properly * Add NamedStore to expr_context. Regenerate related code with `make regen-ast` * Add usage of NamedStore to ast_for_named_expr in ast.c. Update occurances of checking for Store to also handle NamedStore where appropriate * Add ste_comprehension to _symtable_entry to track if the namespace is a comprehension. Initialize ste_comprehension to 0. Set set_comprehension to 1 in symtable_handle_comprehension * s/symtable_add_def/symtable_add_def_helper. Add symtable_add_def to handle grabbing st->st_cur and passing it to symtable_add_def_helper. This now allows us to call the original code from symtable_add_def by instead calling symtable_add_def_helper with a different ste. * Refactor symtable_record_directive to take lineno and col_offset as arguments instead of stmt_ty. This allows symtable_record_directive to be used for stmt_ty and expr_ty * Handle elevating scope for named expressions in comprehensions. * Handle error for usage of named expression inside a class block * Tests - No longer skip scope tests. Add additional scope tests * Cleanup - Update error message for named expression within a comprehension within a class. Update comments. Add assert for symtable_extend_namedexpr_scope to validate that we always find at least a ModuleScope if we don't find a Class or FunctionScope * Cleanup - Add missing case for NamedStore in expr_context_name. Remove unused var in set_namedexpr_content * Refactor - Consolidate set_context and set_namedexpr_context to reduce duplicated code. Special cases for named expressions are handled by checking if ctx is NamedStore * Cleanup - Add additional use cases for ast_for_namedexpr in usage comment. Fix multiple blank lines in test_named_expressions * Tests - Remove unnecessary test case. Renumber test case function names * Remove TargetScopeError for now. Will add back if needed * Cleanup - Small comment nit for consistency * Handle positional argument check with named expression * Add TargetScopeError exception definition. Add documentation for TargetScopeError in c-api docs. Throw TargetScopeError instead of SyntaxError when using a named expression in a comprehension within a class scope * Increase stack size for parser by 200. This is a minimal change (approx. 5kb) and should not have an impact on any systems. Update parser test to allow 99 nested levels again * Add TargetScopeError to exception_hierarchy.txt for test_baseexception.py_ * Tests - Major update for named expression tests, both in test_named_expressions and test_parser - Add test for TargetScopeError - Add tests for named expressions in comprehension scope and edge cases - Add tests for named expressions in function arguments (declarations and call sites) - Reorganize tests to group them more logically * Cleanup - Remove unnecessary comment * Cleanup - Comment nitpicks * Explicitly disallow assignment expressions to a name inside parentheses, e.g.: ((x) := 0) - Add check for LHS types to detect a parenthesis then a name (see note) - Add test for this scenario - Update tests for changed error message for named assignment to a tuple (also, see note) Note: This caused issues with the previous error handling for named assignment to a LHS that contained an expression, such as a tuple. Thus, the check for the LHS of a named expression must be changed to be more specific if we wish to maintain the previous error messages * Cleanup - Wrap lines more strictly in test file * Revert "Explicitly disallow assignment expressions to a name inside parentheses, e.g.: ((x) := 0)" This reverts commit f1531400ca7d7a2d148830c8ac703f041740896d. * Add NEWS.d entry * Tests - Fix error in test_pickle.test_exceptions by adding TargetScopeError to list of exceptions * Tests - Update error message tests to reflect improved messaging convention (s/can't/cannot) * Remove cases that cannot be reached in compile.c. Small linting update. * Update Grammar/Tokens to add COLONEQUAL. Regenerate all files * Update TargetScopeError PRE_INIT and POST_INIT, as this was purposefully left out when fixing rebase conflicts * Add NamedStore back and regenerate files * Pass along line number and end col info for named expression * Simplify News entry * Fix compiler warning and explicity mark fallthrough
2019-01-25 07:49:56 +08:00
NamedExpr_type = make_type("NamedExpr", expr_type, NamedExpr_fields, 2);
if (!NamedExpr_type) return 0;
BinOp_type = make_type("BinOp", expr_type, BinOp_fields, 3);
if (!BinOp_type) return 0;
UnaryOp_type = make_type("UnaryOp", expr_type, UnaryOp_fields, 2);
if (!UnaryOp_type) return 0;
Lambda_type = make_type("Lambda", expr_type, Lambda_fields, 2);
if (!Lambda_type) return 0;
IfExp_type = make_type("IfExp", expr_type, IfExp_fields, 3);
if (!IfExp_type) return 0;
Dict_type = make_type("Dict", expr_type, Dict_fields, 2);
if (!Dict_type) return 0;
Set_type = make_type("Set", expr_type, Set_fields, 1);
if (!Set_type) return 0;
ListComp_type = make_type("ListComp", expr_type, ListComp_fields, 2);
if (!ListComp_type) return 0;
SetComp_type = make_type("SetComp", expr_type, SetComp_fields, 2);
if (!SetComp_type) return 0;
DictComp_type = make_type("DictComp", expr_type, DictComp_fields, 3);
if (!DictComp_type) return 0;
GeneratorExp_type = make_type("GeneratorExp", expr_type,
GeneratorExp_fields, 2);
if (!GeneratorExp_type) return 0;
Await_type = make_type("Await", expr_type, Await_fields, 1);
if (!Await_type) return 0;
Yield_type = make_type("Yield", expr_type, Yield_fields, 1);
if (!Yield_type) return 0;
YieldFrom_type = make_type("YieldFrom", expr_type, YieldFrom_fields, 1);
if (!YieldFrom_type) return 0;
Compare_type = make_type("Compare", expr_type, Compare_fields, 3);
if (!Compare_type) return 0;
Call_type = make_type("Call", expr_type, Call_fields, 3);
if (!Call_type) return 0;
FormattedValue_type = make_type("FormattedValue", expr_type,
FormattedValue_fields, 3);
if (!FormattedValue_type) return 0;
JoinedStr_type = make_type("JoinedStr", expr_type, JoinedStr_fields, 1);
if (!JoinedStr_type) return 0;
Constant_type = make_type("Constant", expr_type, Constant_fields, 1);
if (!Constant_type) return 0;
Attribute_type = make_type("Attribute", expr_type, Attribute_fields, 3);
if (!Attribute_type) return 0;
Subscript_type = make_type("Subscript", expr_type, Subscript_fields, 3);
if (!Subscript_type) return 0;
Starred_type = make_type("Starred", expr_type, Starred_fields, 2);
if (!Starred_type) return 0;
Name_type = make_type("Name", expr_type, Name_fields, 2);
if (!Name_type) return 0;
List_type = make_type("List", expr_type, List_fields, 2);
if (!List_type) return 0;
Tuple_type = make_type("Tuple", expr_type, Tuple_fields, 2);
if (!Tuple_type) return 0;
expr_context_type = make_type("expr_context", &AST_type, NULL, 0);
if (!expr_context_type) return 0;
if (!add_attributes(expr_context_type, NULL, 0)) return 0;
Load_type = make_type("Load", expr_context_type, NULL, 0);
if (!Load_type) return 0;
Load_singleton = PyType_GenericNew(Load_type, NULL, NULL);
if (!Load_singleton) return 0;
Store_type = make_type("Store", expr_context_type, NULL, 0);
if (!Store_type) return 0;
Store_singleton = PyType_GenericNew(Store_type, NULL, NULL);
if (!Store_singleton) return 0;
Del_type = make_type("Del", expr_context_type, NULL, 0);
if (!Del_type) return 0;
Del_singleton = PyType_GenericNew(Del_type, NULL, NULL);
if (!Del_singleton) return 0;
AugLoad_type = make_type("AugLoad", expr_context_type, NULL, 0);
if (!AugLoad_type) return 0;
AugLoad_singleton = PyType_GenericNew(AugLoad_type, NULL, NULL);
if (!AugLoad_singleton) return 0;
AugStore_type = make_type("AugStore", expr_context_type, NULL, 0);
if (!AugStore_type) return 0;
AugStore_singleton = PyType_GenericNew(AugStore_type, NULL, NULL);
if (!AugStore_singleton) return 0;
Param_type = make_type("Param", expr_context_type, NULL, 0);
if (!Param_type) return 0;
Param_singleton = PyType_GenericNew(Param_type, NULL, NULL);
if (!Param_singleton) return 0;
slice_type = make_type("slice", &AST_type, NULL, 0);
if (!slice_type) return 0;
if (!add_attributes(slice_type, NULL, 0)) return 0;
Slice_type = make_type("Slice", slice_type, Slice_fields, 3);
if (!Slice_type) return 0;
ExtSlice_type = make_type("ExtSlice", slice_type, ExtSlice_fields, 1);
if (!ExtSlice_type) return 0;
Index_type = make_type("Index", slice_type, Index_fields, 1);
if (!Index_type) return 0;
boolop_type = make_type("boolop", &AST_type, NULL, 0);
if (!boolop_type) return 0;
if (!add_attributes(boolop_type, NULL, 0)) return 0;
And_type = make_type("And", boolop_type, NULL, 0);
if (!And_type) return 0;
And_singleton = PyType_GenericNew(And_type, NULL, NULL);
if (!And_singleton) return 0;
Or_type = make_type("Or", boolop_type, NULL, 0);
if (!Or_type) return 0;
Or_singleton = PyType_GenericNew(Or_type, NULL, NULL);
if (!Or_singleton) return 0;
operator_type = make_type("operator", &AST_type, NULL, 0);
if (!operator_type) return 0;
if (!add_attributes(operator_type, NULL, 0)) return 0;
Add_type = make_type("Add", operator_type, NULL, 0);
if (!Add_type) return 0;
Add_singleton = PyType_GenericNew(Add_type, NULL, NULL);
if (!Add_singleton) return 0;
Sub_type = make_type("Sub", operator_type, NULL, 0);
if (!Sub_type) return 0;
Sub_singleton = PyType_GenericNew(Sub_type, NULL, NULL);
if (!Sub_singleton) return 0;
Mult_type = make_type("Mult", operator_type, NULL, 0);
if (!Mult_type) return 0;
Mult_singleton = PyType_GenericNew(Mult_type, NULL, NULL);
if (!Mult_singleton) return 0;
MatMult_type = make_type("MatMult", operator_type, NULL, 0);
if (!MatMult_type) return 0;
MatMult_singleton = PyType_GenericNew(MatMult_type, NULL, NULL);
if (!MatMult_singleton) return 0;
Div_type = make_type("Div", operator_type, NULL, 0);
if (!Div_type) return 0;
Div_singleton = PyType_GenericNew(Div_type, NULL, NULL);
if (!Div_singleton) return 0;
Mod_type = make_type("Mod", operator_type, NULL, 0);
if (!Mod_type) return 0;
Mod_singleton = PyType_GenericNew(Mod_type, NULL, NULL);
if (!Mod_singleton) return 0;
Pow_type = make_type("Pow", operator_type, NULL, 0);
if (!Pow_type) return 0;
Pow_singleton = PyType_GenericNew(Pow_type, NULL, NULL);
if (!Pow_singleton) return 0;
LShift_type = make_type("LShift", operator_type, NULL, 0);
if (!LShift_type) return 0;
LShift_singleton = PyType_GenericNew(LShift_type, NULL, NULL);
if (!LShift_singleton) return 0;
RShift_type = make_type("RShift", operator_type, NULL, 0);
if (!RShift_type) return 0;
RShift_singleton = PyType_GenericNew(RShift_type, NULL, NULL);
if (!RShift_singleton) return 0;
BitOr_type = make_type("BitOr", operator_type, NULL, 0);
if (!BitOr_type) return 0;
BitOr_singleton = PyType_GenericNew(BitOr_type, NULL, NULL);
if (!BitOr_singleton) return 0;
BitXor_type = make_type("BitXor", operator_type, NULL, 0);
if (!BitXor_type) return 0;
BitXor_singleton = PyType_GenericNew(BitXor_type, NULL, NULL);
if (!BitXor_singleton) return 0;
BitAnd_type = make_type("BitAnd", operator_type, NULL, 0);
if (!BitAnd_type) return 0;
BitAnd_singleton = PyType_GenericNew(BitAnd_type, NULL, NULL);
if (!BitAnd_singleton) return 0;
FloorDiv_type = make_type("FloorDiv", operator_type, NULL, 0);
if (!FloorDiv_type) return 0;
FloorDiv_singleton = PyType_GenericNew(FloorDiv_type, NULL, NULL);
if (!FloorDiv_singleton) return 0;
unaryop_type = make_type("unaryop", &AST_type, NULL, 0);
if (!unaryop_type) return 0;
if (!add_attributes(unaryop_type, NULL, 0)) return 0;
Invert_type = make_type("Invert", unaryop_type, NULL, 0);
if (!Invert_type) return 0;
Invert_singleton = PyType_GenericNew(Invert_type, NULL, NULL);
if (!Invert_singleton) return 0;
Not_type = make_type("Not", unaryop_type, NULL, 0);
if (!Not_type) return 0;
Not_singleton = PyType_GenericNew(Not_type, NULL, NULL);
if (!Not_singleton) return 0;
UAdd_type = make_type("UAdd", unaryop_type, NULL, 0);
if (!UAdd_type) return 0;
UAdd_singleton = PyType_GenericNew(UAdd_type, NULL, NULL);
if (!UAdd_singleton) return 0;
USub_type = make_type("USub", unaryop_type, NULL, 0);
if (!USub_type) return 0;
USub_singleton = PyType_GenericNew(USub_type, NULL, NULL);
if (!USub_singleton) return 0;
cmpop_type = make_type("cmpop", &AST_type, NULL, 0);
if (!cmpop_type) return 0;
if (!add_attributes(cmpop_type, NULL, 0)) return 0;
Eq_type = make_type("Eq", cmpop_type, NULL, 0);
if (!Eq_type) return 0;
Eq_singleton = PyType_GenericNew(Eq_type, NULL, NULL);
if (!Eq_singleton) return 0;
NotEq_type = make_type("NotEq", cmpop_type, NULL, 0);
if (!NotEq_type) return 0;
NotEq_singleton = PyType_GenericNew(NotEq_type, NULL, NULL);
if (!NotEq_singleton) return 0;
Lt_type = make_type("Lt", cmpop_type, NULL, 0);
if (!Lt_type) return 0;
Lt_singleton = PyType_GenericNew(Lt_type, NULL, NULL);
if (!Lt_singleton) return 0;
LtE_type = make_type("LtE", cmpop_type, NULL, 0);
if (!LtE_type) return 0;
LtE_singleton = PyType_GenericNew(LtE_type, NULL, NULL);
if (!LtE_singleton) return 0;
Gt_type = make_type("Gt", cmpop_type, NULL, 0);
if (!Gt_type) return 0;
Gt_singleton = PyType_GenericNew(Gt_type, NULL, NULL);
if (!Gt_singleton) return 0;
GtE_type = make_type("GtE", cmpop_type, NULL, 0);
if (!GtE_type) return 0;
GtE_singleton = PyType_GenericNew(GtE_type, NULL, NULL);
if (!GtE_singleton) return 0;
Is_type = make_type("Is", cmpop_type, NULL, 0);
if (!Is_type) return 0;
Is_singleton = PyType_GenericNew(Is_type, NULL, NULL);
if (!Is_singleton) return 0;
IsNot_type = make_type("IsNot", cmpop_type, NULL, 0);
if (!IsNot_type) return 0;
IsNot_singleton = PyType_GenericNew(IsNot_type, NULL, NULL);
if (!IsNot_singleton) return 0;
In_type = make_type("In", cmpop_type, NULL, 0);
if (!In_type) return 0;
In_singleton = PyType_GenericNew(In_type, NULL, NULL);
if (!In_singleton) return 0;
NotIn_type = make_type("NotIn", cmpop_type, NULL, 0);
if (!NotIn_type) return 0;
NotIn_singleton = PyType_GenericNew(NotIn_type, NULL, NULL);
if (!NotIn_singleton) return 0;
comprehension_type = make_type("comprehension", &AST_type,
comprehension_fields, 4);
if (!comprehension_type) return 0;
if (!add_attributes(comprehension_type, NULL, 0)) return 0;
excepthandler_type = make_type("excepthandler", &AST_type, NULL, 0);
if (!excepthandler_type) return 0;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
if (!add_attributes(excepthandler_type, excepthandler_attributes, 4))
return 0;
ExceptHandler_type = make_type("ExceptHandler", excepthandler_type,
ExceptHandler_fields, 3);
if (!ExceptHandler_type) return 0;
arguments_type = make_type("arguments", &AST_type, arguments_fields, 6);
if (!arguments_type) return 0;
if (!add_attributes(arguments_type, NULL, 0)) return 0;
arg_type = make_type("arg", &AST_type, arg_fields, 3);
if (!arg_type) return 0;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
if (!add_attributes(arg_type, arg_attributes, 4)) return 0;
keyword_type = make_type("keyword", &AST_type, keyword_fields, 2);
if (!keyword_type) return 0;
if (!add_attributes(keyword_type, NULL, 0)) return 0;
alias_type = make_type("alias", &AST_type, alias_fields, 2);
if (!alias_type) return 0;
if (!add_attributes(alias_type, NULL, 0)) return 0;
withitem_type = make_type("withitem", &AST_type, withitem_fields, 2);
if (!withitem_type) return 0;
if (!add_attributes(withitem_type, NULL, 0)) return 0;
type_ignore_type = make_type("type_ignore", &AST_type, NULL, 0);
if (!type_ignore_type) return 0;
if (!add_attributes(type_ignore_type, NULL, 0)) return 0;
TypeIgnore_type = make_type("TypeIgnore", type_ignore_type,
TypeIgnore_fields, 1);
if (!TypeIgnore_type) return 0;
initialized = 1;
return 1;
}
static int obj2ast_mod(PyObject* obj, mod_ty* out, PyArena* arena);
static int obj2ast_stmt(PyObject* obj, stmt_ty* out, PyArena* arena);
static int obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena);
static int obj2ast_expr_context(PyObject* obj, expr_context_ty* out, PyArena*
arena);
static int obj2ast_slice(PyObject* obj, slice_ty* out, PyArena* arena);
static int obj2ast_boolop(PyObject* obj, boolop_ty* out, PyArena* arena);
static int obj2ast_operator(PyObject* obj, operator_ty* out, PyArena* arena);
static int obj2ast_unaryop(PyObject* obj, unaryop_ty* out, PyArena* arena);
static int obj2ast_cmpop(PyObject* obj, cmpop_ty* out, PyArena* arena);
static int obj2ast_comprehension(PyObject* obj, comprehension_ty* out, PyArena*
arena);
static int obj2ast_excepthandler(PyObject* obj, excepthandler_ty* out, PyArena*
arena);
static int obj2ast_arguments(PyObject* obj, arguments_ty* out, PyArena* arena);
static int obj2ast_arg(PyObject* obj, arg_ty* out, PyArena* arena);
static int obj2ast_keyword(PyObject* obj, keyword_ty* out, PyArena* arena);
static int obj2ast_alias(PyObject* obj, alias_ty* out, PyArena* arena);
static int obj2ast_withitem(PyObject* obj, withitem_ty* out, PyArena* arena);
static int obj2ast_type_ignore(PyObject* obj, type_ignore_ty* out, PyArena*
arena);
mod_ty
Module(asdl_seq * body, asdl_seq * type_ignores, PyArena *arena)
{
mod_ty p;
p = (mod_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Module_kind;
p->v.Module.body = body;
p->v.Module.type_ignores = type_ignores;
return p;
}
mod_ty
Interactive(asdl_seq * body, PyArena *arena)
{
mod_ty p;
p = (mod_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Interactive_kind;
p->v.Interactive.body = body;
return p;
}
mod_ty
Expression(expr_ty body, PyArena *arena)
{
mod_ty p;
if (!body) {
PyErr_SetString(PyExc_ValueError,
"field body is required for Expression");
return NULL;
}
p = (mod_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Expression_kind;
p->v.Expression.body = body;
return p;
}
mod_ty
FunctionType(asdl_seq * argtypes, expr_ty returns, PyArena *arena)
{
mod_ty p;
if (!returns) {
PyErr_SetString(PyExc_ValueError,
"field returns is required for FunctionType");
return NULL;
}
p = (mod_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = FunctionType_kind;
p->v.FunctionType.argtypes = argtypes;
p->v.FunctionType.returns = returns;
return p;
}
mod_ty
Suite(asdl_seq * body, PyArena *arena)
{
mod_ty p;
p = (mod_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Suite_kind;
p->v.Suite.body = body;
return p;
}
stmt_ty
FunctionDef(identifier name, arguments_ty args, asdl_seq * body, asdl_seq *
decorator_list, expr_ty returns, string type_comment, int lineno,
int col_offset, int end_lineno, int end_col_offset, PyArena *arena)
{
stmt_ty p;
if (!name) {
PyErr_SetString(PyExc_ValueError,
"field name is required for FunctionDef");
return NULL;
}
if (!args) {
PyErr_SetString(PyExc_ValueError,
"field args is required for FunctionDef");
return NULL;
}
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = FunctionDef_kind;
p->v.FunctionDef.name = name;
p->v.FunctionDef.args = args;
p->v.FunctionDef.body = body;
p->v.FunctionDef.decorator_list = decorator_list;
p->v.FunctionDef.returns = returns;
p->v.FunctionDef.type_comment = type_comment;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
AsyncFunctionDef(identifier name, arguments_ty args, asdl_seq * body, asdl_seq
* decorator_list, expr_ty returns, string type_comment, int
lineno, int col_offset, int end_lineno, int end_col_offset,
PyArena *arena)
{
stmt_ty p;
if (!name) {
PyErr_SetString(PyExc_ValueError,
"field name is required for AsyncFunctionDef");
return NULL;
}
if (!args) {
PyErr_SetString(PyExc_ValueError,
"field args is required for AsyncFunctionDef");
return NULL;
}
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = AsyncFunctionDef_kind;
p->v.AsyncFunctionDef.name = name;
p->v.AsyncFunctionDef.args = args;
p->v.AsyncFunctionDef.body = body;
p->v.AsyncFunctionDef.decorator_list = decorator_list;
p->v.AsyncFunctionDef.returns = returns;
p->v.AsyncFunctionDef.type_comment = type_comment;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
ClassDef(identifier name, asdl_seq * bases, asdl_seq * keywords, asdl_seq *
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
body, asdl_seq * decorator_list, int lineno, int col_offset, int
end_lineno, int end_col_offset, PyArena *arena)
{
stmt_ty p;
if (!name) {
PyErr_SetString(PyExc_ValueError,
"field name is required for ClassDef");
return NULL;
}
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = ClassDef_kind;
p->v.ClassDef.name = name;
p->v.ClassDef.bases = bases;
p->v.ClassDef.keywords = keywords;
p->v.ClassDef.body = body;
p->v.ClassDef.decorator_list = decorator_list;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Return(expr_ty value, int lineno, int col_offset, int end_lineno, int
end_col_offset, PyArena *arena)
{
stmt_ty p;
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Return_kind;
p->v.Return.value = value;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Delete(asdl_seq * targets, int lineno, int col_offset, int end_lineno, int
end_col_offset, PyArena *arena)
{
stmt_ty p;
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Delete_kind;
p->v.Delete.targets = targets;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
Assign(asdl_seq * targets, expr_ty value, string type_comment, int lineno, int
col_offset, int end_lineno, int end_col_offset, PyArena *arena)
{
stmt_ty p;
if (!value) {
PyErr_SetString(PyExc_ValueError,
"field value is required for Assign");
return NULL;
}
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Assign_kind;
p->v.Assign.targets = targets;
p->v.Assign.value = value;
p->v.Assign.type_comment = type_comment;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
AugAssign(expr_ty target, operator_ty op, expr_ty value, int lineno, int
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
col_offset, int end_lineno, int end_col_offset, PyArena *arena)
{
stmt_ty p;
if (!target) {
PyErr_SetString(PyExc_ValueError,
"field target is required for AugAssign");
return NULL;
}
if (!op) {
PyErr_SetString(PyExc_ValueError,
"field op is required for AugAssign");
return NULL;
}
if (!value) {
PyErr_SetString(PyExc_ValueError,
"field value is required for AugAssign");
return NULL;
}
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = AugAssign_kind;
p->v.AugAssign.target = target;
p->v.AugAssign.op = op;
p->v.AugAssign.value = value;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
AnnAssign(expr_ty target, expr_ty annotation, expr_ty value, int simple, int
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
lineno, int col_offset, int end_lineno, int end_col_offset, PyArena
*arena)
{
stmt_ty p;
if (!target) {
PyErr_SetString(PyExc_ValueError,
"field target is required for AnnAssign");
return NULL;
}
if (!annotation) {
PyErr_SetString(PyExc_ValueError,
"field annotation is required for AnnAssign");
return NULL;
}
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = AnnAssign_kind;
p->v.AnnAssign.target = target;
p->v.AnnAssign.annotation = annotation;
p->v.AnnAssign.value = value;
p->v.AnnAssign.simple = simple;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
For(expr_ty target, expr_ty iter, asdl_seq * body, asdl_seq * orelse, string
type_comment, int lineno, int col_offset, int end_lineno, int
end_col_offset, PyArena *arena)
{
stmt_ty p;
if (!target) {
PyErr_SetString(PyExc_ValueError,
"field target is required for For");
return NULL;
}
if (!iter) {
PyErr_SetString(PyExc_ValueError,
"field iter is required for For");
return NULL;
}
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = For_kind;
p->v.For.target = target;
p->v.For.iter = iter;
p->v.For.body = body;
p->v.For.orelse = orelse;
p->v.For.type_comment = type_comment;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
AsyncFor(expr_ty target, expr_ty iter, asdl_seq * body, asdl_seq * orelse,
string type_comment, int lineno, int col_offset, int end_lineno, int
end_col_offset, PyArena *arena)
{
stmt_ty p;
if (!target) {
PyErr_SetString(PyExc_ValueError,
"field target is required for AsyncFor");
return NULL;
}
if (!iter) {
PyErr_SetString(PyExc_ValueError,
"field iter is required for AsyncFor");
return NULL;
}
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = AsyncFor_kind;
p->v.AsyncFor.target = target;
p->v.AsyncFor.iter = iter;
p->v.AsyncFor.body = body;
p->v.AsyncFor.orelse = orelse;
p->v.AsyncFor.type_comment = type_comment;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
While(expr_ty test, asdl_seq * body, asdl_seq * orelse, int lineno, int
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
col_offset, int end_lineno, int end_col_offset, PyArena *arena)
{
stmt_ty p;
if (!test) {
PyErr_SetString(PyExc_ValueError,
"field test is required for While");
return NULL;
}
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = While_kind;
p->v.While.test = test;
p->v.While.body = body;
p->v.While.orelse = orelse;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
If(expr_ty test, asdl_seq * body, asdl_seq * orelse, int lineno, int
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
col_offset, int end_lineno, int end_col_offset, PyArena *arena)
{
stmt_ty p;
if (!test) {
PyErr_SetString(PyExc_ValueError,
"field test is required for If");
return NULL;
}
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = If_kind;
p->v.If.test = test;
p->v.If.body = body;
p->v.If.orelse = orelse;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
With(asdl_seq * items, asdl_seq * body, string type_comment, int lineno, int
col_offset, int end_lineno, int end_col_offset, PyArena *arena)
{
stmt_ty p;
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = With_kind;
p->v.With.items = items;
p->v.With.body = body;
p->v.With.type_comment = type_comment;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
AsyncWith(asdl_seq * items, asdl_seq * body, string type_comment, int lineno,
int col_offset, int end_lineno, int end_col_offset, PyArena *arena)
{
stmt_ty p;
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = AsyncWith_kind;
p->v.AsyncWith.items = items;
p->v.AsyncWith.body = body;
p->v.AsyncWith.type_comment = type_comment;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Raise(expr_ty exc, expr_ty cause, int lineno, int col_offset, int end_lineno,
int end_col_offset, PyArena *arena)
{
stmt_ty p;
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Raise_kind;
p->v.Raise.exc = exc;
p->v.Raise.cause = cause;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
Try(asdl_seq * body, asdl_seq * handlers, asdl_seq * orelse, asdl_seq *
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
finalbody, int lineno, int col_offset, int end_lineno, int end_col_offset,
PyArena *arena)
{
stmt_ty p;
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Try_kind;
p->v.Try.body = body;
p->v.Try.handlers = handlers;
p->v.Try.orelse = orelse;
p->v.Try.finalbody = finalbody;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Assert(expr_ty test, expr_ty msg, int lineno, int col_offset, int end_lineno,
int end_col_offset, PyArena *arena)
{
stmt_ty p;
if (!test) {
PyErr_SetString(PyExc_ValueError,
"field test is required for Assert");
return NULL;
}
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Assert_kind;
p->v.Assert.test = test;
p->v.Assert.msg = msg;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Import(asdl_seq * names, int lineno, int col_offset, int end_lineno, int
end_col_offset, PyArena *arena)
{
stmt_ty p;
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Import_kind;
p->v.Import.names = names;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
ImportFrom(identifier module, asdl_seq * names, int level, int lineno, int
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
col_offset, int end_lineno, int end_col_offset, PyArena *arena)
{
stmt_ty p;
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = ImportFrom_kind;
p->v.ImportFrom.module = module;
p->v.ImportFrom.names = names;
p->v.ImportFrom.level = level;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Global(asdl_seq * names, int lineno, int col_offset, int end_lineno, int
end_col_offset, PyArena *arena)
{
stmt_ty p;
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Global_kind;
p->v.Global.names = names;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Nonlocal(asdl_seq * names, int lineno, int col_offset, int end_lineno, int
end_col_offset, PyArena *arena)
{
stmt_ty p;
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Nonlocal_kind;
p->v.Nonlocal.names = names;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Expr(expr_ty value, int lineno, int col_offset, int end_lineno, int
end_col_offset, PyArena *arena)
{
stmt_ty p;
if (!value) {
PyErr_SetString(PyExc_ValueError,
"field value is required for Expr");
return NULL;
}
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Expr_kind;
p->v.Expr.value = value;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Pass(int lineno, int col_offset, int end_lineno, int end_col_offset, PyArena
*arena)
{
stmt_ty p;
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Pass_kind;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Break(int lineno, int col_offset, int end_lineno, int end_col_offset, PyArena
*arena)
{
stmt_ty p;
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Break_kind;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
stmt_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Continue(int lineno, int col_offset, int end_lineno, int end_col_offset,
PyArena *arena)
{
stmt_ty p;
p = (stmt_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Continue_kind;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
BoolOp(boolop_ty op, asdl_seq * values, int lineno, int col_offset, int
end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
if (!op) {
PyErr_SetString(PyExc_ValueError,
"field op is required for BoolOp");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = BoolOp_kind;
p->v.BoolOp.op = op;
p->v.BoolOp.values = values;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
bpo-35224: PEP 572 Implementation (#10497) * Add tokenization of := - Add token to Include/token.h. Add token to documentation in Doc/library/token.rst. - Run `./python Lib/token.py` to regenerate Lib/token.py. - Update Parser/tokenizer.c: add case to handle `:=`. * Add initial usage of := in grammar. * Update Python.asdl to match the grammar updates. Regenerated Include/Python-ast.h and Python/Python-ast.c * Update AST and compiler files in Python/ast.c and Python/compile.c. Basic functionality, this isn't scoped properly * Regenerate Lib/symbol.py using `./python Lib/symbol.py` * Tests - Fix failing tests in test_parser.py due to changes in token numbers for internal representation * Tests - Add simple test for := token * Tests - Add simple tests for named expressions using expr and suite * Tests - Update number of levels for nested expressions to prevent stack overflow * Update symbol table to handle NamedExpr * Update Grammar to allow assignment expressions in if statements. Regenerate Python/graminit.c accordingly using `make regen-grammar` * Tests - Add additional tests for named expressions in RoundtripLegalSyntaxTestCase, based on examples and information directly from PEP 572 Note: failing tests are currently commented out (4 out of 24 tests currently fail) * Tests - Add temporary syntax test failure tests in test_parser.py Note: There is an outstanding TODO for this -- syntax tests need to be moved to a different file (presumably test_syntax.py), but this is covering what needs to be tested at the moment, and it's more convenient to run a single test for the time being * Add support for allowing assignment expressions as function argument annotations. Uncomment tests for these cases because they all pass now! * Tests - Move existing syntax tests out of test_parser.py and into test_named_expressions.py. Refactor syntax tests to use unittest * Add TargetScopeError exception to extend SyntaxError Note: This simply creates the TargetScopeError exception, it is not yet used anywhere * Tests - Update tests per PEP 572 Continue refactoring test suite: The named expression test suite now checks for any invalid cases that throw exceptions (no longer limited to SyntaxErrors), assignment tests to ensure that variables are properly assigned, and scope tests to ensure that variable availability and values are correct Note: - There are still tests that are marked to skip, as they are not yet implemented - There are approximately 300 lines of the PEP that have not yet been addressed, though these may be deferred * Documentation - Small updates to XXX/todo comments - Remove XXX from child description in ast.c - Add comment with number of previously supported nested expressions for 3.7.X in test_parser.py * Fix assert in seq_for_testlist() * Cleanup - Denote "Not implemented -- No keyword args" on failing test case. Fix PEP8 error for blank lines at beginning of test classes in test_parser.py * Tests - Wrap all file opens in `with...as` to ensure files are closed * WIP: handle f(a := 1) * Tests and Cleanup - No longer skips keyword arg test. Keyword arg test now uses a simpler test case and does not rely on an external file. Remove print statements from ast.c * Tests - Refactor last remaining test case that relied on on external file to use a simpler test case without the dependency * Tests - Add better description of remaning skipped tests. Add test checking scope when using assignment expression in a function argument * Tests - Add test for nested comprehension, testing value and scope. Fix variable name in skipped comprehension scope test * Handle restriction of LHS for named expressions - can only assign to LHS of type NAME. Specifically, restrict assignment to tuples This adds an alternative set_context specifically for named expressions, set_namedexpr_context. Thus, context is now set differently for standard assignment versus assignment for named expressions in order to handle restrictions. * Tests - Update negative test case for assigning to lambda to match new error message. Add negative test case for assigning to tuple * Tests - Reorder test cases to group invalid syntax cases and named assignment target errors * Tests - Update test case for named expression in function argument - check that result and variable are set correctly * Todo - Add todo for TargetScopeError based on Guido's comment (https://github.com/python/cpython/commit/2b3acd37bdfc2d35e5094228c6684050d2aa8b0a#r30472562) * Tests - Add named expression tests for assignment operator in function arguments Note: One of two tests are skipped, as function arguments are currently treating an assignment expression inside of parenthesis as one child, which does not properly catch the named expression, nor does it count arguments properly * Add NamedStore to expr_context. Regenerate related code with `make regen-ast` * Add usage of NamedStore to ast_for_named_expr in ast.c. Update occurances of checking for Store to also handle NamedStore where appropriate * Add ste_comprehension to _symtable_entry to track if the namespace is a comprehension. Initialize ste_comprehension to 0. Set set_comprehension to 1 in symtable_handle_comprehension * s/symtable_add_def/symtable_add_def_helper. Add symtable_add_def to handle grabbing st->st_cur and passing it to symtable_add_def_helper. This now allows us to call the original code from symtable_add_def by instead calling symtable_add_def_helper with a different ste. * Refactor symtable_record_directive to take lineno and col_offset as arguments instead of stmt_ty. This allows symtable_record_directive to be used for stmt_ty and expr_ty * Handle elevating scope for named expressions in comprehensions. * Handle error for usage of named expression inside a class block * Tests - No longer skip scope tests. Add additional scope tests * Cleanup - Update error message for named expression within a comprehension within a class. Update comments. Add assert for symtable_extend_namedexpr_scope to validate that we always find at least a ModuleScope if we don't find a Class or FunctionScope * Cleanup - Add missing case for NamedStore in expr_context_name. Remove unused var in set_namedexpr_content * Refactor - Consolidate set_context and set_namedexpr_context to reduce duplicated code. Special cases for named expressions are handled by checking if ctx is NamedStore * Cleanup - Add additional use cases for ast_for_namedexpr in usage comment. Fix multiple blank lines in test_named_expressions * Tests - Remove unnecessary test case. Renumber test case function names * Remove TargetScopeError for now. Will add back if needed * Cleanup - Small comment nit for consistency * Handle positional argument check with named expression * Add TargetScopeError exception definition. Add documentation for TargetScopeError in c-api docs. Throw TargetScopeError instead of SyntaxError when using a named expression in a comprehension within a class scope * Increase stack size for parser by 200. This is a minimal change (approx. 5kb) and should not have an impact on any systems. Update parser test to allow 99 nested levels again * Add TargetScopeError to exception_hierarchy.txt for test_baseexception.py_ * Tests - Major update for named expression tests, both in test_named_expressions and test_parser - Add test for TargetScopeError - Add tests for named expressions in comprehension scope and edge cases - Add tests for named expressions in function arguments (declarations and call sites) - Reorganize tests to group them more logically * Cleanup - Remove unnecessary comment * Cleanup - Comment nitpicks * Explicitly disallow assignment expressions to a name inside parentheses, e.g.: ((x) := 0) - Add check for LHS types to detect a parenthesis then a name (see note) - Add test for this scenario - Update tests for changed error message for named assignment to a tuple (also, see note) Note: This caused issues with the previous error handling for named assignment to a LHS that contained an expression, such as a tuple. Thus, the check for the LHS of a named expression must be changed to be more specific if we wish to maintain the previous error messages * Cleanup - Wrap lines more strictly in test file * Revert "Explicitly disallow assignment expressions to a name inside parentheses, e.g.: ((x) := 0)" This reverts commit f1531400ca7d7a2d148830c8ac703f041740896d. * Add NEWS.d entry * Tests - Fix error in test_pickle.test_exceptions by adding TargetScopeError to list of exceptions * Tests - Update error message tests to reflect improved messaging convention (s/can't/cannot) * Remove cases that cannot be reached in compile.c. Small linting update. * Update Grammar/Tokens to add COLONEQUAL. Regenerate all files * Update TargetScopeError PRE_INIT and POST_INIT, as this was purposefully left out when fixing rebase conflicts * Add NamedStore back and regenerate files * Pass along line number and end col info for named expression * Simplify News entry * Fix compiler warning and explicity mark fallthrough
2019-01-25 07:49:56 +08:00
expr_ty
NamedExpr(expr_ty target, expr_ty value, int lineno, int col_offset, int
end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
if (!target) {
PyErr_SetString(PyExc_ValueError,
"field target is required for NamedExpr");
return NULL;
}
if (!value) {
PyErr_SetString(PyExc_ValueError,
"field value is required for NamedExpr");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = NamedExpr_kind;
p->v.NamedExpr.target = target;
p->v.NamedExpr.value = value;
p->lineno = lineno;
p->col_offset = col_offset;
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
BinOp(expr_ty left, operator_ty op, expr_ty right, int lineno, int col_offset,
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
int end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
if (!left) {
PyErr_SetString(PyExc_ValueError,
"field left is required for BinOp");
return NULL;
}
if (!op) {
PyErr_SetString(PyExc_ValueError,
"field op is required for BinOp");
return NULL;
}
if (!right) {
PyErr_SetString(PyExc_ValueError,
"field right is required for BinOp");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = BinOp_kind;
p->v.BinOp.left = left;
p->v.BinOp.op = op;
p->v.BinOp.right = right;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
UnaryOp(unaryop_ty op, expr_ty operand, int lineno, int col_offset, int
end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
if (!op) {
PyErr_SetString(PyExc_ValueError,
"field op is required for UnaryOp");
return NULL;
}
if (!operand) {
PyErr_SetString(PyExc_ValueError,
"field operand is required for UnaryOp");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = UnaryOp_kind;
p->v.UnaryOp.op = op;
p->v.UnaryOp.operand = operand;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Lambda(arguments_ty args, expr_ty body, int lineno, int col_offset, int
end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
if (!args) {
PyErr_SetString(PyExc_ValueError,
"field args is required for Lambda");
return NULL;
}
if (!body) {
PyErr_SetString(PyExc_ValueError,
"field body is required for Lambda");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Lambda_kind;
p->v.Lambda.args = args;
p->v.Lambda.body = body;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
IfExp(expr_ty test, expr_ty body, expr_ty orelse, int lineno, int col_offset,
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
int end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
if (!test) {
PyErr_SetString(PyExc_ValueError,
"field test is required for IfExp");
return NULL;
}
if (!body) {
PyErr_SetString(PyExc_ValueError,
"field body is required for IfExp");
return NULL;
}
if (!orelse) {
PyErr_SetString(PyExc_ValueError,
"field orelse is required for IfExp");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = IfExp_kind;
p->v.IfExp.test = test;
p->v.IfExp.body = body;
p->v.IfExp.orelse = orelse;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Dict(asdl_seq * keys, asdl_seq * values, int lineno, int col_offset, int
end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Dict_kind;
p->v.Dict.keys = keys;
p->v.Dict.values = values;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Set(asdl_seq * elts, int lineno, int col_offset, int end_lineno, int
end_col_offset, PyArena *arena)
{
expr_ty p;
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Set_kind;
p->v.Set.elts = elts;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
ListComp(expr_ty elt, asdl_seq * generators, int lineno, int col_offset, int
end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
if (!elt) {
PyErr_SetString(PyExc_ValueError,
"field elt is required for ListComp");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = ListComp_kind;
p->v.ListComp.elt = elt;
p->v.ListComp.generators = generators;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
SetComp(expr_ty elt, asdl_seq * generators, int lineno, int col_offset, int
end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
if (!elt) {
PyErr_SetString(PyExc_ValueError,
"field elt is required for SetComp");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = SetComp_kind;
p->v.SetComp.elt = elt;
p->v.SetComp.generators = generators;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
Merged revisions 56154-56264 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/p3yk ................ r56155 | neal.norwitz | 2007-07-03 08:59:08 +0300 (Tue, 03 Jul 2007) | 1 line Get this test working after converting map to return an iterator ................ r56202 | neal.norwitz | 2007-07-09 04:30:09 +0300 (Mon, 09 Jul 2007) | 37 lines Merged revisions 56124-56201 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r56129 | georg.brandl | 2007-06-30 04:01:01 -0700 (Sat, 30 Jun 2007) | 2 lines Document smtp.SMTPAuthenticationError. ........ r56137 | georg.brandl | 2007-07-01 01:11:35 -0700 (Sun, 01 Jul 2007) | 2 lines Fix a few webbrowser.py problems. ........ r56143 | georg.brandl | 2007-07-02 04:54:28 -0700 (Mon, 02 Jul 2007) | 2 lines Remove duplicate sentence from alarm() doc. ........ r56170 | mark.hammond | 2007-07-03 19:03:10 -0700 (Tue, 03 Jul 2007) | 3 lines copy built files to the PCBuild directory, where tools like distutils or external build processes can find them. ........ r56176 | kurt.kaiser | 2007-07-05 15:03:39 -0700 (Thu, 05 Jul 2007) | 10 lines Many calls to tk.call involve an arglist containing a single tuple. Calls using METH_OLDARGS unpack this tuple; calls using METH_VARARG don't. Tcl's concatenation of args was affected; IDLE doesn't start. Modify Tkapp_Call() to unpack single tuple arglists. Bug 1733943 Ref http://mail.python.org/pipermail/python-checkins/2007-May/060454.html ........ r56177 | neal.norwitz | 2007-07-05 21:13:39 -0700 (Thu, 05 Jul 2007) | 1 line Fix typo in comment ........ ................ r56251 | neal.norwitz | 2007-07-11 10:01:01 +0300 (Wed, 11 Jul 2007) | 1 line Get working with map returning an iterator (had to fix whitespace too) ................ r56255 | thomas.wouters | 2007-07-11 13:41:37 +0300 (Wed, 11 Jul 2007) | 6 lines Clean up merge glitch or copy-paste error (the entire module was duplicated, except the first half even had some more copy-paste errors, referring to listcomps and genexps instead of setcomps) ................ r56256 | thomas.wouters | 2007-07-11 15:16:01 +0300 (Wed, 11 Jul 2007) | 14 lines Dict comprehensions. Still needs doc changes (like many python-3000 features ;-). It generates bytecode similar to: x = {} for k, v in (generator here): x[k] = v except there is no tuple-packing and -unpacking involved. Trivial measurement suggests it's significantly faster than dict(generator here) (in the order of 2 to 3 times as fast) but I have not done extensive measurements. ................ r56263 | guido.van.rossum | 2007-07-11 15:36:26 +0300 (Wed, 11 Jul 2007) | 3 lines Patch 1724999 by Ali Gholami Rudi -- avoid complaints about dict size change during iter in destroy call. ................
2007-07-11 21:09:30 +08:00
expr_ty
DictComp(expr_ty key, expr_ty value, asdl_seq * generators, int lineno, int
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
col_offset, int end_lineno, int end_col_offset, PyArena *arena)
Merged revisions 56154-56264 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/p3yk ................ r56155 | neal.norwitz | 2007-07-03 08:59:08 +0300 (Tue, 03 Jul 2007) | 1 line Get this test working after converting map to return an iterator ................ r56202 | neal.norwitz | 2007-07-09 04:30:09 +0300 (Mon, 09 Jul 2007) | 37 lines Merged revisions 56124-56201 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r56129 | georg.brandl | 2007-06-30 04:01:01 -0700 (Sat, 30 Jun 2007) | 2 lines Document smtp.SMTPAuthenticationError. ........ r56137 | georg.brandl | 2007-07-01 01:11:35 -0700 (Sun, 01 Jul 2007) | 2 lines Fix a few webbrowser.py problems. ........ r56143 | georg.brandl | 2007-07-02 04:54:28 -0700 (Mon, 02 Jul 2007) | 2 lines Remove duplicate sentence from alarm() doc. ........ r56170 | mark.hammond | 2007-07-03 19:03:10 -0700 (Tue, 03 Jul 2007) | 3 lines copy built files to the PCBuild directory, where tools like distutils or external build processes can find them. ........ r56176 | kurt.kaiser | 2007-07-05 15:03:39 -0700 (Thu, 05 Jul 2007) | 10 lines Many calls to tk.call involve an arglist containing a single tuple. Calls using METH_OLDARGS unpack this tuple; calls using METH_VARARG don't. Tcl's concatenation of args was affected; IDLE doesn't start. Modify Tkapp_Call() to unpack single tuple arglists. Bug 1733943 Ref http://mail.python.org/pipermail/python-checkins/2007-May/060454.html ........ r56177 | neal.norwitz | 2007-07-05 21:13:39 -0700 (Thu, 05 Jul 2007) | 1 line Fix typo in comment ........ ................ r56251 | neal.norwitz | 2007-07-11 10:01:01 +0300 (Wed, 11 Jul 2007) | 1 line Get working with map returning an iterator (had to fix whitespace too) ................ r56255 | thomas.wouters | 2007-07-11 13:41:37 +0300 (Wed, 11 Jul 2007) | 6 lines Clean up merge glitch or copy-paste error (the entire module was duplicated, except the first half even had some more copy-paste errors, referring to listcomps and genexps instead of setcomps) ................ r56256 | thomas.wouters | 2007-07-11 15:16:01 +0300 (Wed, 11 Jul 2007) | 14 lines Dict comprehensions. Still needs doc changes (like many python-3000 features ;-). It generates bytecode similar to: x = {} for k, v in (generator here): x[k] = v except there is no tuple-packing and -unpacking involved. Trivial measurement suggests it's significantly faster than dict(generator here) (in the order of 2 to 3 times as fast) but I have not done extensive measurements. ................ r56263 | guido.van.rossum | 2007-07-11 15:36:26 +0300 (Wed, 11 Jul 2007) | 3 lines Patch 1724999 by Ali Gholami Rudi -- avoid complaints about dict size change during iter in destroy call. ................
2007-07-11 21:09:30 +08:00
{
expr_ty p;
if (!key) {
PyErr_SetString(PyExc_ValueError,
"field key is required for DictComp");
return NULL;
}
if (!value) {
PyErr_SetString(PyExc_ValueError,
"field value is required for DictComp");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = DictComp_kind;
p->v.DictComp.key = key;
p->v.DictComp.value = value;
p->v.DictComp.generators = generators;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
Merged revisions 56154-56264 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/p3yk ................ r56155 | neal.norwitz | 2007-07-03 08:59:08 +0300 (Tue, 03 Jul 2007) | 1 line Get this test working after converting map to return an iterator ................ r56202 | neal.norwitz | 2007-07-09 04:30:09 +0300 (Mon, 09 Jul 2007) | 37 lines Merged revisions 56124-56201 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r56129 | georg.brandl | 2007-06-30 04:01:01 -0700 (Sat, 30 Jun 2007) | 2 lines Document smtp.SMTPAuthenticationError. ........ r56137 | georg.brandl | 2007-07-01 01:11:35 -0700 (Sun, 01 Jul 2007) | 2 lines Fix a few webbrowser.py problems. ........ r56143 | georg.brandl | 2007-07-02 04:54:28 -0700 (Mon, 02 Jul 2007) | 2 lines Remove duplicate sentence from alarm() doc. ........ r56170 | mark.hammond | 2007-07-03 19:03:10 -0700 (Tue, 03 Jul 2007) | 3 lines copy built files to the PCBuild directory, where tools like distutils or external build processes can find them. ........ r56176 | kurt.kaiser | 2007-07-05 15:03:39 -0700 (Thu, 05 Jul 2007) | 10 lines Many calls to tk.call involve an arglist containing a single tuple. Calls using METH_OLDARGS unpack this tuple; calls using METH_VARARG don't. Tcl's concatenation of args was affected; IDLE doesn't start. Modify Tkapp_Call() to unpack single tuple arglists. Bug 1733943 Ref http://mail.python.org/pipermail/python-checkins/2007-May/060454.html ........ r56177 | neal.norwitz | 2007-07-05 21:13:39 -0700 (Thu, 05 Jul 2007) | 1 line Fix typo in comment ........ ................ r56251 | neal.norwitz | 2007-07-11 10:01:01 +0300 (Wed, 11 Jul 2007) | 1 line Get working with map returning an iterator (had to fix whitespace too) ................ r56255 | thomas.wouters | 2007-07-11 13:41:37 +0300 (Wed, 11 Jul 2007) | 6 lines Clean up merge glitch or copy-paste error (the entire module was duplicated, except the first half even had some more copy-paste errors, referring to listcomps and genexps instead of setcomps) ................ r56256 | thomas.wouters | 2007-07-11 15:16:01 +0300 (Wed, 11 Jul 2007) | 14 lines Dict comprehensions. Still needs doc changes (like many python-3000 features ;-). It generates bytecode similar to: x = {} for k, v in (generator here): x[k] = v except there is no tuple-packing and -unpacking involved. Trivial measurement suggests it's significantly faster than dict(generator here) (in the order of 2 to 3 times as fast) but I have not done extensive measurements. ................ r56263 | guido.van.rossum | 2007-07-11 15:36:26 +0300 (Wed, 11 Jul 2007) | 3 lines Patch 1724999 by Ali Gholami Rudi -- avoid complaints about dict size change during iter in destroy call. ................
2007-07-11 21:09:30 +08:00
}
expr_ty
GeneratorExp(expr_ty elt, asdl_seq * generators, int lineno, int col_offset,
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
int end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
if (!elt) {
PyErr_SetString(PyExc_ValueError,
"field elt is required for GeneratorExp");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = GeneratorExp_kind;
p->v.GeneratorExp.elt = elt;
p->v.GeneratorExp.generators = generators;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Await(expr_ty value, int lineno, int col_offset, int end_lineno, int
end_col_offset, PyArena *arena)
{
expr_ty p;
if (!value) {
PyErr_SetString(PyExc_ValueError,
"field value is required for Await");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Await_kind;
p->v.Await.value = value;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Yield(expr_ty value, int lineno, int col_offset, int end_lineno, int
end_col_offset, PyArena *arena)
{
expr_ty p;
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Yield_kind;
p->v.Yield.value = value;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
YieldFrom(expr_ty value, int lineno, int col_offset, int end_lineno, int
end_col_offset, PyArena *arena)
{
expr_ty p;
if (!value) {
PyErr_SetString(PyExc_ValueError,
"field value is required for YieldFrom");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = YieldFrom_kind;
p->v.YieldFrom.value = value;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
Compare(expr_ty left, asdl_int_seq * ops, asdl_seq * comparators, int lineno,
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
int col_offset, int end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
if (!left) {
PyErr_SetString(PyExc_ValueError,
"field left is required for Compare");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Compare_kind;
p->v.Compare.left = left;
p->v.Compare.ops = ops;
p->v.Compare.comparators = comparators;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int lineno, int
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
col_offset, int end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
if (!func) {
PyErr_SetString(PyExc_ValueError,
"field func is required for Call");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Call_kind;
p->v.Call.func = func;
p->v.Call.args = args;
p->v.Call.keywords = keywords;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
FormattedValue(expr_ty value, int conversion, expr_ty format_spec, int lineno,
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
int col_offset, int end_lineno, int end_col_offset, PyArena
*arena)
{
expr_ty p;
if (!value) {
PyErr_SetString(PyExc_ValueError,
"field value is required for FormattedValue");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = FormattedValue_kind;
p->v.FormattedValue.value = value;
p->v.FormattedValue.conversion = conversion;
p->v.FormattedValue.format_spec = format_spec;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
JoinedStr(asdl_seq * values, int lineno, int col_offset, int end_lineno, int
end_col_offset, PyArena *arena)
{
expr_ty p;
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = JoinedStr_kind;
p->v.JoinedStr.values = values;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Constant(constant value, int lineno, int col_offset, int end_lineno, int
end_col_offset, PyArena *arena)
{
expr_ty p;
if (!value) {
PyErr_SetString(PyExc_ValueError,
"field value is required for Constant");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Constant_kind;
p->v.Constant.value = value;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
Attribute(expr_ty value, identifier attr, expr_context_ty ctx, int lineno, int
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
col_offset, int end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
if (!value) {
PyErr_SetString(PyExc_ValueError,
"field value is required for Attribute");
return NULL;
}
if (!attr) {
PyErr_SetString(PyExc_ValueError,
"field attr is required for Attribute");
return NULL;
}
if (!ctx) {
PyErr_SetString(PyExc_ValueError,
"field ctx is required for Attribute");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Attribute_kind;
p->v.Attribute.value = value;
p->v.Attribute.attr = attr;
p->v.Attribute.ctx = ctx;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
Subscript(expr_ty value, slice_ty slice, expr_context_ty ctx, int lineno, int
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
col_offset, int end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
if (!value) {
PyErr_SetString(PyExc_ValueError,
"field value is required for Subscript");
return NULL;
}
if (!slice) {
PyErr_SetString(PyExc_ValueError,
"field slice is required for Subscript");
return NULL;
}
if (!ctx) {
PyErr_SetString(PyExc_ValueError,
"field ctx is required for Subscript");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Subscript_kind;
p->v.Subscript.value = value;
p->v.Subscript.slice = slice;
p->v.Subscript.ctx = ctx;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
Merged revisions 55225-55227,55229-55269 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/p3yk ................ r55238 | guido.van.rossum | 2007-05-10 16:46:05 -0700 (Thu, 10 May 2007) | 9 lines Merged revisions 55227 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r55227 | guido.van.rossum | 2007-05-10 10:20:15 -0700 (Thu, 10 May 2007) | 2 lines Fix a bug in test_c_api() that caused a negative refcount. ........ ................ r55246 | neal.norwitz | 2007-05-11 00:01:52 -0700 (Fri, 11 May 2007) | 1 line Remove commands.getstatus() it is obsolete. ................ r55248 | neal.norwitz | 2007-05-11 00:29:05 -0700 (Fri, 11 May 2007) | 2 lines Remove bsddb185 support. ................ r55249 | neal.norwitz | 2007-05-11 00:29:50 -0700 (Fri, 11 May 2007) | 1 line Remove bsddb185 module too ................ r55250 | neal.norwitz | 2007-05-11 00:32:13 -0700 (Fri, 11 May 2007) | 1 line bsddb185: Gotta remove from the file checked in, not Setup ................ r55251 | neal.norwitz | 2007-05-11 00:53:26 -0700 (Fri, 11 May 2007) | 1 line Remove obsolete IRIX modules (as much as I could find, there is probably more) ................ r55252 | neal.norwitz | 2007-05-11 00:55:35 -0700 (Fri, 11 May 2007) | 1 line Remove SGI turd. ................ r55254 | georg.brandl | 2007-05-11 03:11:01 -0700 (Fri, 11 May 2007) | 2 lines Add a case for set comprehensions to the "cannot assign to" switch. ................ r55255 | georg.brandl | 2007-05-11 03:11:25 -0700 (Fri, 11 May 2007) | 2 lines Fix wrong imports. ................ r55261 | georg.brandl | 2007-05-11 07:37:48 -0700 (Fri, 11 May 2007) | 2 lines Remove removed tex files. ................ r55262 | georg.brandl | 2007-05-11 08:28:41 -0700 (Fri, 11 May 2007) | 2 lines Commit PEP 3132 implementation. ................ r55264 | georg.brandl | 2007-05-11 08:50:19 -0700 (Fri, 11 May 2007) | 2 lines Check in the inevitable AST version number and format Py_ssize_t with %zd. ................ r55265 | neal.norwitz | 2007-05-11 09:12:22 -0700 (Fri, 11 May 2007) | 1 line Remove mention of os.popen* and popen2.* since these will be removed. ................ r55266 | neal.norwitz | 2007-05-11 09:19:57 -0700 (Fri, 11 May 2007) | 1 line Get doc to build again (almost, the doc is fine) ................ r55267 | neal.norwitz | 2007-05-11 09:21:02 -0700 (Fri, 11 May 2007) | 1 line Really get doc to build (remove use of string module) ................ r55269 | neal.norwitz | 2007-05-11 09:29:43 -0700 (Fri, 11 May 2007) | 1 line Add some notes to cleanup later ................
2007-05-12 00:50:42 +08:00
expr_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Starred(expr_ty value, expr_context_ty ctx, int lineno, int col_offset, int
end_lineno, int end_col_offset, PyArena *arena)
Merged revisions 55225-55227,55229-55269 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/p3yk ................ r55238 | guido.van.rossum | 2007-05-10 16:46:05 -0700 (Thu, 10 May 2007) | 9 lines Merged revisions 55227 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r55227 | guido.van.rossum | 2007-05-10 10:20:15 -0700 (Thu, 10 May 2007) | 2 lines Fix a bug in test_c_api() that caused a negative refcount. ........ ................ r55246 | neal.norwitz | 2007-05-11 00:01:52 -0700 (Fri, 11 May 2007) | 1 line Remove commands.getstatus() it is obsolete. ................ r55248 | neal.norwitz | 2007-05-11 00:29:05 -0700 (Fri, 11 May 2007) | 2 lines Remove bsddb185 support. ................ r55249 | neal.norwitz | 2007-05-11 00:29:50 -0700 (Fri, 11 May 2007) | 1 line Remove bsddb185 module too ................ r55250 | neal.norwitz | 2007-05-11 00:32:13 -0700 (Fri, 11 May 2007) | 1 line bsddb185: Gotta remove from the file checked in, not Setup ................ r55251 | neal.norwitz | 2007-05-11 00:53:26 -0700 (Fri, 11 May 2007) | 1 line Remove obsolete IRIX modules (as much as I could find, there is probably more) ................ r55252 | neal.norwitz | 2007-05-11 00:55:35 -0700 (Fri, 11 May 2007) | 1 line Remove SGI turd. ................ r55254 | georg.brandl | 2007-05-11 03:11:01 -0700 (Fri, 11 May 2007) | 2 lines Add a case for set comprehensions to the "cannot assign to" switch. ................ r55255 | georg.brandl | 2007-05-11 03:11:25 -0700 (Fri, 11 May 2007) | 2 lines Fix wrong imports. ................ r55261 | georg.brandl | 2007-05-11 07:37:48 -0700 (Fri, 11 May 2007) | 2 lines Remove removed tex files. ................ r55262 | georg.brandl | 2007-05-11 08:28:41 -0700 (Fri, 11 May 2007) | 2 lines Commit PEP 3132 implementation. ................ r55264 | georg.brandl | 2007-05-11 08:50:19 -0700 (Fri, 11 May 2007) | 2 lines Check in the inevitable AST version number and format Py_ssize_t with %zd. ................ r55265 | neal.norwitz | 2007-05-11 09:12:22 -0700 (Fri, 11 May 2007) | 1 line Remove mention of os.popen* and popen2.* since these will be removed. ................ r55266 | neal.norwitz | 2007-05-11 09:19:57 -0700 (Fri, 11 May 2007) | 1 line Get doc to build again (almost, the doc is fine) ................ r55267 | neal.norwitz | 2007-05-11 09:21:02 -0700 (Fri, 11 May 2007) | 1 line Really get doc to build (remove use of string module) ................ r55269 | neal.norwitz | 2007-05-11 09:29:43 -0700 (Fri, 11 May 2007) | 1 line Add some notes to cleanup later ................
2007-05-12 00:50:42 +08:00
{
expr_ty p;
if (!value) {
PyErr_SetString(PyExc_ValueError,
"field value is required for Starred");
return NULL;
}
if (!ctx) {
PyErr_SetString(PyExc_ValueError,
"field ctx is required for Starred");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Starred_kind;
p->v.Starred.value = value;
p->v.Starred.ctx = ctx;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
Merged revisions 55225-55227,55229-55269 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/p3yk ................ r55238 | guido.van.rossum | 2007-05-10 16:46:05 -0700 (Thu, 10 May 2007) | 9 lines Merged revisions 55227 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r55227 | guido.van.rossum | 2007-05-10 10:20:15 -0700 (Thu, 10 May 2007) | 2 lines Fix a bug in test_c_api() that caused a negative refcount. ........ ................ r55246 | neal.norwitz | 2007-05-11 00:01:52 -0700 (Fri, 11 May 2007) | 1 line Remove commands.getstatus() it is obsolete. ................ r55248 | neal.norwitz | 2007-05-11 00:29:05 -0700 (Fri, 11 May 2007) | 2 lines Remove bsddb185 support. ................ r55249 | neal.norwitz | 2007-05-11 00:29:50 -0700 (Fri, 11 May 2007) | 1 line Remove bsddb185 module too ................ r55250 | neal.norwitz | 2007-05-11 00:32:13 -0700 (Fri, 11 May 2007) | 1 line bsddb185: Gotta remove from the file checked in, not Setup ................ r55251 | neal.norwitz | 2007-05-11 00:53:26 -0700 (Fri, 11 May 2007) | 1 line Remove obsolete IRIX modules (as much as I could find, there is probably more) ................ r55252 | neal.norwitz | 2007-05-11 00:55:35 -0700 (Fri, 11 May 2007) | 1 line Remove SGI turd. ................ r55254 | georg.brandl | 2007-05-11 03:11:01 -0700 (Fri, 11 May 2007) | 2 lines Add a case for set comprehensions to the "cannot assign to" switch. ................ r55255 | georg.brandl | 2007-05-11 03:11:25 -0700 (Fri, 11 May 2007) | 2 lines Fix wrong imports. ................ r55261 | georg.brandl | 2007-05-11 07:37:48 -0700 (Fri, 11 May 2007) | 2 lines Remove removed tex files. ................ r55262 | georg.brandl | 2007-05-11 08:28:41 -0700 (Fri, 11 May 2007) | 2 lines Commit PEP 3132 implementation. ................ r55264 | georg.brandl | 2007-05-11 08:50:19 -0700 (Fri, 11 May 2007) | 2 lines Check in the inevitable AST version number and format Py_ssize_t with %zd. ................ r55265 | neal.norwitz | 2007-05-11 09:12:22 -0700 (Fri, 11 May 2007) | 1 line Remove mention of os.popen* and popen2.* since these will be removed. ................ r55266 | neal.norwitz | 2007-05-11 09:19:57 -0700 (Fri, 11 May 2007) | 1 line Get doc to build again (almost, the doc is fine) ................ r55267 | neal.norwitz | 2007-05-11 09:21:02 -0700 (Fri, 11 May 2007) | 1 line Really get doc to build (remove use of string module) ................ r55269 | neal.norwitz | 2007-05-11 09:29:43 -0700 (Fri, 11 May 2007) | 1 line Add some notes to cleanup later ................
2007-05-12 00:50:42 +08:00
}
expr_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Name(identifier id, expr_context_ty ctx, int lineno, int col_offset, int
end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
if (!id) {
PyErr_SetString(PyExc_ValueError,
"field id is required for Name");
return NULL;
}
if (!ctx) {
PyErr_SetString(PyExc_ValueError,
"field ctx is required for Name");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Name_kind;
p->v.Name.id = id;
p->v.Name.ctx = ctx;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
List(asdl_seq * elts, expr_context_ty ctx, int lineno, int col_offset, int
end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
if (!ctx) {
PyErr_SetString(PyExc_ValueError,
"field ctx is required for List");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = List_kind;
p->v.List.elts = elts;
p->v.List.ctx = ctx;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
expr_ty
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
Tuple(asdl_seq * elts, expr_context_ty ctx, int lineno, int col_offset, int
end_lineno, int end_col_offset, PyArena *arena)
{
expr_ty p;
if (!ctx) {
PyErr_SetString(PyExc_ValueError,
"field ctx is required for Tuple");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Tuple_kind;
p->v.Tuple.elts = elts;
p->v.Tuple.ctx = ctx;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
slice_ty
Slice(expr_ty lower, expr_ty upper, expr_ty step, PyArena *arena)
{
slice_ty p;
p = (slice_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Slice_kind;
p->v.Slice.lower = lower;
p->v.Slice.upper = upper;
p->v.Slice.step = step;
return p;
}
slice_ty
ExtSlice(asdl_seq * dims, PyArena *arena)
{
slice_ty p;
p = (slice_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = ExtSlice_kind;
p->v.ExtSlice.dims = dims;
return p;
}
slice_ty
Index(expr_ty value, PyArena *arena)
{
slice_ty p;
if (!value) {
PyErr_SetString(PyExc_ValueError,
"field value is required for Index");
return NULL;
}
p = (slice_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Index_kind;
p->v.Index.value = value;
return p;
}
comprehension_ty
comprehension(expr_ty target, expr_ty iter, asdl_seq * ifs, int is_async,
PyArena *arena)
{
comprehension_ty p;
if (!target) {
PyErr_SetString(PyExc_ValueError,
"field target is required for comprehension");
return NULL;
}
if (!iter) {
PyErr_SetString(PyExc_ValueError,
"field iter is required for comprehension");
return NULL;
}
p = (comprehension_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->target = target;
p->iter = iter;
p->ifs = ifs;
p->is_async = is_async;
return p;
}
excepthandler_ty
ExceptHandler(expr_ty type, identifier name, asdl_seq * body, int lineno, int
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
col_offset, int end_lineno, int end_col_offset, PyArena *arena)
{
excepthandler_ty p;
p = (excepthandler_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = ExceptHandler_kind;
p->v.ExceptHandler.type = type;
p->v.ExceptHandler.name = name;
p->v.ExceptHandler.body = body;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
arguments_ty
arguments(asdl_seq * args, arg_ty vararg, asdl_seq * kwonlyargs, asdl_seq *
kw_defaults, arg_ty kwarg, asdl_seq * defaults, PyArena *arena)
{
arguments_ty p;
p = (arguments_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->args = args;
p->vararg = vararg;
p->kwonlyargs = kwonlyargs;
p->kw_defaults = kw_defaults;
p->kwarg = kwarg;
p->defaults = defaults;
return p;
}
arg_ty
arg(identifier arg, expr_ty annotation, string type_comment, int lineno, int
col_offset, int end_lineno, int end_col_offset, PyArena *arena)
{
arg_ty p;
if (!arg) {
PyErr_SetString(PyExc_ValueError,
"field arg is required for arg");
return NULL;
}
p = (arg_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->arg = arg;
p->annotation = annotation;
p->type_comment = type_comment;
p->lineno = lineno;
p->col_offset = col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
p->end_lineno = end_lineno;
p->end_col_offset = end_col_offset;
return p;
}
keyword_ty
keyword(identifier arg, expr_ty value, PyArena *arena)
{
keyword_ty p;
if (!value) {
PyErr_SetString(PyExc_ValueError,
"field value is required for keyword");
return NULL;
}
p = (keyword_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->arg = arg;
p->value = value;
return p;
}
alias_ty
alias(identifier name, identifier asname, PyArena *arena)
{
alias_ty p;
if (!name) {
PyErr_SetString(PyExc_ValueError,
"field name is required for alias");
return NULL;
}
p = (alias_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->name = name;
p->asname = asname;
return p;
}
withitem_ty
withitem(expr_ty context_expr, expr_ty optional_vars, PyArena *arena)
{
withitem_ty p;
if (!context_expr) {
PyErr_SetString(PyExc_ValueError,
"field context_expr is required for withitem");
return NULL;
}
p = (withitem_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->context_expr = context_expr;
p->optional_vars = optional_vars;
return p;
}
type_ignore_ty
TypeIgnore(int lineno, PyArena *arena)
{
type_ignore_ty p;
p = (type_ignore_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = TypeIgnore_kind;
p->v.TypeIgnore.lineno = lineno;
return p;
}
PyObject*
ast2obj_mod(void* _o)
{
mod_ty o = (mod_ty)_o;
PyObject *result = NULL, *value = NULL;
if (!o) {
Py_RETURN_NONE;
}
switch (o->kind) {
case Module_kind:
result = PyType_GenericNew(Module_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.Module.body, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
2006-03-02 08:31:27 +08:00
Py_DECREF(value);
value = ast2obj_list(o->v.Module.type_ignores, ast2obj_type_ignore);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_type_ignores, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Interactive_kind:
result = PyType_GenericNew(Interactive_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.Interactive.body, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
2006-03-02 08:31:27 +08:00
Py_DECREF(value);
break;
case Expression_kind:
result = PyType_GenericNew(Expression_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.Expression.body);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
2006-03-02 08:31:27 +08:00
Py_DECREF(value);
break;
case FunctionType_kind:
result = PyType_GenericNew(FunctionType_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.FunctionType.argtypes, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_argtypes, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.FunctionType.returns);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_returns, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Suite_kind:
result = PyType_GenericNew(Suite_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.Suite.body, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
2006-03-02 08:31:27 +08:00
Py_DECREF(value);
break;
}
return result;
failed:
Py_XDECREF(value);
Py_XDECREF(result);
return NULL;
}
PyObject*
ast2obj_stmt(void* _o)
{
stmt_ty o = (stmt_ty)_o;
PyObject *result = NULL, *value = NULL;
if (!o) {
Py_RETURN_NONE;
}
switch (o->kind) {
case FunctionDef_kind:
result = PyType_GenericNew(FunctionDef_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_identifier(o->v.FunctionDef.name);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_name, value) == -1)
goto failed;
2006-03-02 08:31:27 +08:00
Py_DECREF(value);
value = ast2obj_arguments(o->v.FunctionDef.args);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_args, value) == -1)
goto failed;
2006-03-02 08:31:27 +08:00
Py_DECREF(value);
value = ast2obj_list(o->v.FunctionDef.body, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
2006-03-02 08:31:27 +08:00
Py_DECREF(value);
value = ast2obj_list(o->v.FunctionDef.decorator_list, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_decorator_list, value) == -1)
goto failed;
2006-03-02 08:31:27 +08:00
Py_DECREF(value);
value = ast2obj_expr(o->v.FunctionDef.returns);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_returns, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_string(o->v.FunctionDef.type_comment);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_type_comment, value) == -1)
goto failed;
Py_DECREF(value);
break;
case AsyncFunctionDef_kind:
result = PyType_GenericNew(AsyncFunctionDef_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_identifier(o->v.AsyncFunctionDef.name);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_name, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_arguments(o->v.AsyncFunctionDef.args);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_args, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.AsyncFunctionDef.body, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.AsyncFunctionDef.decorator_list,
ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_decorator_list, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.AsyncFunctionDef.returns);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_returns, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_string(o->v.AsyncFunctionDef.type_comment);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_type_comment, value) == -1)
goto failed;
Py_DECREF(value);
break;
case ClassDef_kind:
result = PyType_GenericNew(ClassDef_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_identifier(o->v.ClassDef.name);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_name, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.ClassDef.bases, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_bases, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.ClassDef.keywords, ast2obj_keyword);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_keywords, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.ClassDef.body, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.ClassDef.decorator_list, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_decorator_list, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Return_kind:
result = PyType_GenericNew(Return_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.Return.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Delete_kind:
result = PyType_GenericNew(Delete_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.Delete.targets, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_targets, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Assign_kind:
result = PyType_GenericNew(Assign_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.Assign.targets, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_targets, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.Assign.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_string(o->v.Assign.type_comment);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_type_comment, value) == -1)
goto failed;
Py_DECREF(value);
break;
case AugAssign_kind:
result = PyType_GenericNew(AugAssign_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.AugAssign.target);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_target, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_operator(o->v.AugAssign.op);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_op, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.AugAssign.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
break;
case AnnAssign_kind:
result = PyType_GenericNew(AnnAssign_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.AnnAssign.target);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_target, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.AnnAssign.annotation);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_annotation, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.AnnAssign.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_int(o->v.AnnAssign.simple);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_simple, value) == -1)
goto failed;
Py_DECREF(value);
break;
case For_kind:
result = PyType_GenericNew(For_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.For.target);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_target, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.For.iter);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_iter, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.For.body, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.For.orelse, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_orelse, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_string(o->v.For.type_comment);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_type_comment, value) == -1)
goto failed;
Py_DECREF(value);
break;
case AsyncFor_kind:
result = PyType_GenericNew(AsyncFor_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.AsyncFor.target);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_target, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.AsyncFor.iter);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_iter, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.AsyncFor.body, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.AsyncFor.orelse, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_orelse, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_string(o->v.AsyncFor.type_comment);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_type_comment, value) == -1)
goto failed;
Py_DECREF(value);
break;
case While_kind:
result = PyType_GenericNew(While_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.While.test);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_test, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.While.body, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.While.orelse, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_orelse, value) == -1)
goto failed;
Py_DECREF(value);
break;
case If_kind:
result = PyType_GenericNew(If_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.If.test);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_test, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.If.body, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.If.orelse, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_orelse, value) == -1)
goto failed;
Py_DECREF(value);
break;
case With_kind:
result = PyType_GenericNew(With_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.With.items, ast2obj_withitem);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_items, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.With.body, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_string(o->v.With.type_comment);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_type_comment, value) == -1)
goto failed;
Py_DECREF(value);
break;
case AsyncWith_kind:
result = PyType_GenericNew(AsyncWith_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.AsyncWith.items, ast2obj_withitem);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_items, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.AsyncWith.body, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_string(o->v.AsyncWith.type_comment);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_type_comment, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Raise_kind:
result = PyType_GenericNew(Raise_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.Raise.exc);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_exc, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.Raise.cause);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_cause, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Try_kind:
result = PyType_GenericNew(Try_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.Try.body, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.Try.handlers, ast2obj_excepthandler);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_handlers, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.Try.orelse, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_orelse, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.Try.finalbody, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_finalbody, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Assert_kind:
result = PyType_GenericNew(Assert_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.Assert.test);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_test, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.Assert.msg);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_msg, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Import_kind:
result = PyType_GenericNew(Import_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.Import.names, ast2obj_alias);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_names, value) == -1)
goto failed;
Py_DECREF(value);
break;
case ImportFrom_kind:
result = PyType_GenericNew(ImportFrom_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_identifier(o->v.ImportFrom.module);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_module, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.ImportFrom.names, ast2obj_alias);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_names, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_int(o->v.ImportFrom.level);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_level, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Global_kind:
result = PyType_GenericNew(Global_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.Global.names, ast2obj_identifier);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_names, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Nonlocal_kind:
result = PyType_GenericNew(Nonlocal_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.Nonlocal.names, ast2obj_identifier);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_names, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Expr_kind:
result = PyType_GenericNew(Expr_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.Expr.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Pass_kind:
result = PyType_GenericNew(Pass_type, NULL, NULL);
if (!result) goto failed;
break;
case Break_kind:
result = PyType_GenericNew(Break_type, NULL, NULL);
if (!result) goto failed;
break;
case Continue_kind:
result = PyType_GenericNew(Continue_type, NULL, NULL);
if (!result) goto failed;
break;
}
value = ast2obj_int(o->lineno);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_lineno, value) < 0)
goto failed;
Py_DECREF(value);
value = ast2obj_int(o->col_offset);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_col_offset, value) < 0)
goto failed;
Py_DECREF(value);
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
value = ast2obj_int(o->end_lineno);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_end_lineno, value) < 0)
goto failed;
Py_DECREF(value);
value = ast2obj_int(o->end_col_offset);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_end_col_offset, value) < 0)
goto failed;
Py_DECREF(value);
return result;
failed:
Py_XDECREF(value);
Py_XDECREF(result);
return NULL;
}
PyObject*
ast2obj_expr(void* _o)
{
expr_ty o = (expr_ty)_o;
PyObject *result = NULL, *value = NULL;
if (!o) {
Py_RETURN_NONE;
}
switch (o->kind) {
case BoolOp_kind:
result = PyType_GenericNew(BoolOp_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_boolop(o->v.BoolOp.op);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_op, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.BoolOp.values, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_values, value) == -1)
goto failed;
Py_DECREF(value);
break;
bpo-35224: PEP 572 Implementation (#10497) * Add tokenization of := - Add token to Include/token.h. Add token to documentation in Doc/library/token.rst. - Run `./python Lib/token.py` to regenerate Lib/token.py. - Update Parser/tokenizer.c: add case to handle `:=`. * Add initial usage of := in grammar. * Update Python.asdl to match the grammar updates. Regenerated Include/Python-ast.h and Python/Python-ast.c * Update AST and compiler files in Python/ast.c and Python/compile.c. Basic functionality, this isn't scoped properly * Regenerate Lib/symbol.py using `./python Lib/symbol.py` * Tests - Fix failing tests in test_parser.py due to changes in token numbers for internal representation * Tests - Add simple test for := token * Tests - Add simple tests for named expressions using expr and suite * Tests - Update number of levels for nested expressions to prevent stack overflow * Update symbol table to handle NamedExpr * Update Grammar to allow assignment expressions in if statements. Regenerate Python/graminit.c accordingly using `make regen-grammar` * Tests - Add additional tests for named expressions in RoundtripLegalSyntaxTestCase, based on examples and information directly from PEP 572 Note: failing tests are currently commented out (4 out of 24 tests currently fail) * Tests - Add temporary syntax test failure tests in test_parser.py Note: There is an outstanding TODO for this -- syntax tests need to be moved to a different file (presumably test_syntax.py), but this is covering what needs to be tested at the moment, and it's more convenient to run a single test for the time being * Add support for allowing assignment expressions as function argument annotations. Uncomment tests for these cases because they all pass now! * Tests - Move existing syntax tests out of test_parser.py and into test_named_expressions.py. Refactor syntax tests to use unittest * Add TargetScopeError exception to extend SyntaxError Note: This simply creates the TargetScopeError exception, it is not yet used anywhere * Tests - Update tests per PEP 572 Continue refactoring test suite: The named expression test suite now checks for any invalid cases that throw exceptions (no longer limited to SyntaxErrors), assignment tests to ensure that variables are properly assigned, and scope tests to ensure that variable availability and values are correct Note: - There are still tests that are marked to skip, as they are not yet implemented - There are approximately 300 lines of the PEP that have not yet been addressed, though these may be deferred * Documentation - Small updates to XXX/todo comments - Remove XXX from child description in ast.c - Add comment with number of previously supported nested expressions for 3.7.X in test_parser.py * Fix assert in seq_for_testlist() * Cleanup - Denote "Not implemented -- No keyword args" on failing test case. Fix PEP8 error for blank lines at beginning of test classes in test_parser.py * Tests - Wrap all file opens in `with...as` to ensure files are closed * WIP: handle f(a := 1) * Tests and Cleanup - No longer skips keyword arg test. Keyword arg test now uses a simpler test case and does not rely on an external file. Remove print statements from ast.c * Tests - Refactor last remaining test case that relied on on external file to use a simpler test case without the dependency * Tests - Add better description of remaning skipped tests. Add test checking scope when using assignment expression in a function argument * Tests - Add test for nested comprehension, testing value and scope. Fix variable name in skipped comprehension scope test * Handle restriction of LHS for named expressions - can only assign to LHS of type NAME. Specifically, restrict assignment to tuples This adds an alternative set_context specifically for named expressions, set_namedexpr_context. Thus, context is now set differently for standard assignment versus assignment for named expressions in order to handle restrictions. * Tests - Update negative test case for assigning to lambda to match new error message. Add negative test case for assigning to tuple * Tests - Reorder test cases to group invalid syntax cases and named assignment target errors * Tests - Update test case for named expression in function argument - check that result and variable are set correctly * Todo - Add todo for TargetScopeError based on Guido's comment (https://github.com/python/cpython/commit/2b3acd37bdfc2d35e5094228c6684050d2aa8b0a#r30472562) * Tests - Add named expression tests for assignment operator in function arguments Note: One of two tests are skipped, as function arguments are currently treating an assignment expression inside of parenthesis as one child, which does not properly catch the named expression, nor does it count arguments properly * Add NamedStore to expr_context. Regenerate related code with `make regen-ast` * Add usage of NamedStore to ast_for_named_expr in ast.c. Update occurances of checking for Store to also handle NamedStore where appropriate * Add ste_comprehension to _symtable_entry to track if the namespace is a comprehension. Initialize ste_comprehension to 0. Set set_comprehension to 1 in symtable_handle_comprehension * s/symtable_add_def/symtable_add_def_helper. Add symtable_add_def to handle grabbing st->st_cur and passing it to symtable_add_def_helper. This now allows us to call the original code from symtable_add_def by instead calling symtable_add_def_helper with a different ste. * Refactor symtable_record_directive to take lineno and col_offset as arguments instead of stmt_ty. This allows symtable_record_directive to be used for stmt_ty and expr_ty * Handle elevating scope for named expressions in comprehensions. * Handle error for usage of named expression inside a class block * Tests - No longer skip scope tests. Add additional scope tests * Cleanup - Update error message for named expression within a comprehension within a class. Update comments. Add assert for symtable_extend_namedexpr_scope to validate that we always find at least a ModuleScope if we don't find a Class or FunctionScope * Cleanup - Add missing case for NamedStore in expr_context_name. Remove unused var in set_namedexpr_content * Refactor - Consolidate set_context and set_namedexpr_context to reduce duplicated code. Special cases for named expressions are handled by checking if ctx is NamedStore * Cleanup - Add additional use cases for ast_for_namedexpr in usage comment. Fix multiple blank lines in test_named_expressions * Tests - Remove unnecessary test case. Renumber test case function names * Remove TargetScopeError for now. Will add back if needed * Cleanup - Small comment nit for consistency * Handle positional argument check with named expression * Add TargetScopeError exception definition. Add documentation for TargetScopeError in c-api docs. Throw TargetScopeError instead of SyntaxError when using a named expression in a comprehension within a class scope * Increase stack size for parser by 200. This is a minimal change (approx. 5kb) and should not have an impact on any systems. Update parser test to allow 99 nested levels again * Add TargetScopeError to exception_hierarchy.txt for test_baseexception.py_ * Tests - Major update for named expression tests, both in test_named_expressions and test_parser - Add test for TargetScopeError - Add tests for named expressions in comprehension scope and edge cases - Add tests for named expressions in function arguments (declarations and call sites) - Reorganize tests to group them more logically * Cleanup - Remove unnecessary comment * Cleanup - Comment nitpicks * Explicitly disallow assignment expressions to a name inside parentheses, e.g.: ((x) := 0) - Add check for LHS types to detect a parenthesis then a name (see note) - Add test for this scenario - Update tests for changed error message for named assignment to a tuple (also, see note) Note: This caused issues with the previous error handling for named assignment to a LHS that contained an expression, such as a tuple. Thus, the check for the LHS of a named expression must be changed to be more specific if we wish to maintain the previous error messages * Cleanup - Wrap lines more strictly in test file * Revert "Explicitly disallow assignment expressions to a name inside parentheses, e.g.: ((x) := 0)" This reverts commit f1531400ca7d7a2d148830c8ac703f041740896d. * Add NEWS.d entry * Tests - Fix error in test_pickle.test_exceptions by adding TargetScopeError to list of exceptions * Tests - Update error message tests to reflect improved messaging convention (s/can't/cannot) * Remove cases that cannot be reached in compile.c. Small linting update. * Update Grammar/Tokens to add COLONEQUAL. Regenerate all files * Update TargetScopeError PRE_INIT and POST_INIT, as this was purposefully left out when fixing rebase conflicts * Add NamedStore back and regenerate files * Pass along line number and end col info for named expression * Simplify News entry * Fix compiler warning and explicity mark fallthrough
2019-01-25 07:49:56 +08:00
case NamedExpr_kind:
result = PyType_GenericNew(NamedExpr_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.NamedExpr.target);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_target, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.NamedExpr.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
break;
case BinOp_kind:
result = PyType_GenericNew(BinOp_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.BinOp.left);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_left, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_operator(o->v.BinOp.op);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_op, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.BinOp.right);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_right, value) == -1)
goto failed;
Py_DECREF(value);
break;
case UnaryOp_kind:
result = PyType_GenericNew(UnaryOp_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_unaryop(o->v.UnaryOp.op);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_op, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.UnaryOp.operand);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_operand, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Lambda_kind:
result = PyType_GenericNew(Lambda_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_arguments(o->v.Lambda.args);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_args, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.Lambda.body);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
Py_DECREF(value);
break;
case IfExp_kind:
result = PyType_GenericNew(IfExp_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.IfExp.test);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_test, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.IfExp.body);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.IfExp.orelse);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_orelse, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Dict_kind:
result = PyType_GenericNew(Dict_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.Dict.keys, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_keys, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.Dict.values, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_values, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Set_kind:
result = PyType_GenericNew(Set_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.Set.elts, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_elts, value) == -1)
goto failed;
Py_DECREF(value);
break;
case ListComp_kind:
result = PyType_GenericNew(ListComp_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.ListComp.elt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_elt, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.ListComp.generators, ast2obj_comprehension);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_generators, value) == -1)
goto failed;
Py_DECREF(value);
break;
case SetComp_kind:
result = PyType_GenericNew(SetComp_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.SetComp.elt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_elt, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.SetComp.generators, ast2obj_comprehension);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_generators, value) == -1)
goto failed;
Py_DECREF(value);
break;
case DictComp_kind:
result = PyType_GenericNew(DictComp_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.DictComp.key);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_key, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.DictComp.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.DictComp.generators, ast2obj_comprehension);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_generators, value) == -1)
goto failed;
Py_DECREF(value);
break;
case GeneratorExp_kind:
result = PyType_GenericNew(GeneratorExp_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.GeneratorExp.elt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_elt, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.GeneratorExp.generators,
ast2obj_comprehension);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_generators, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Await_kind:
result = PyType_GenericNew(Await_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.Await.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Yield_kind:
result = PyType_GenericNew(Yield_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.Yield.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
break;
case YieldFrom_kind:
result = PyType_GenericNew(YieldFrom_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.YieldFrom.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Compare_kind:
result = PyType_GenericNew(Compare_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.Compare.left);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_left, value) == -1)
goto failed;
Py_DECREF(value);
{
Py_ssize_t i, n = asdl_seq_LEN(o->v.Compare.ops);
value = PyList_New(n);
if (!value) goto failed;
for(i = 0; i < n; i++)
PyList_SET_ITEM(value, i, ast2obj_cmpop((cmpop_ty)asdl_seq_GET(o->v.Compare.ops, i)));
}
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_ops, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.Compare.comparators, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_comparators, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Call_kind:
result = PyType_GenericNew(Call_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.Call.func);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_func, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.Call.args, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_args, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.Call.keywords, ast2obj_keyword);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_keywords, value) == -1)
goto failed;
Py_DECREF(value);
break;
case FormattedValue_kind:
result = PyType_GenericNew(FormattedValue_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.FormattedValue.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_int(o->v.FormattedValue.conversion);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_conversion, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.FormattedValue.format_spec);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_format_spec, value) == -1)
goto failed;
Py_DECREF(value);
break;
case JoinedStr_kind:
result = PyType_GenericNew(JoinedStr_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.JoinedStr.values, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_values, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Constant_kind:
result = PyType_GenericNew(Constant_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_constant(o->v.Constant.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Attribute_kind:
result = PyType_GenericNew(Attribute_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.Attribute.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_identifier(o->v.Attribute.attr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_attr, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr_context(o->v.Attribute.ctx);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_ctx, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Subscript_kind:
result = PyType_GenericNew(Subscript_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.Subscript.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_slice(o->v.Subscript.slice);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_slice, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr_context(o->v.Subscript.ctx);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_ctx, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Starred_kind:
result = PyType_GenericNew(Starred_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.Starred.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr_context(o->v.Starred.ctx);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_ctx, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Name_kind:
result = PyType_GenericNew(Name_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_identifier(o->v.Name.id);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_id, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr_context(o->v.Name.ctx);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_ctx, value) == -1)
goto failed;
Py_DECREF(value);
break;
case List_kind:
result = PyType_GenericNew(List_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.List.elts, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_elts, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr_context(o->v.List.ctx);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_ctx, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Tuple_kind:
result = PyType_GenericNew(Tuple_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.Tuple.elts, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_elts, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr_context(o->v.Tuple.ctx);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_ctx, value) == -1)
goto failed;
Py_DECREF(value);
break;
}
value = ast2obj_int(o->lineno);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_lineno, value) < 0)
goto failed;
Py_DECREF(value);
value = ast2obj_int(o->col_offset);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_col_offset, value) < 0)
goto failed;
Py_DECREF(value);
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
value = ast2obj_int(o->end_lineno);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_end_lineno, value) < 0)
goto failed;
Py_DECREF(value);
value = ast2obj_int(o->end_col_offset);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_end_col_offset, value) < 0)
goto failed;
Py_DECREF(value);
return result;
failed:
Py_XDECREF(value);
Py_XDECREF(result);
return NULL;
}
PyObject* ast2obj_expr_context(expr_context_ty o)
{
switch(o) {
case Load:
Py_INCREF(Load_singleton);
return Load_singleton;
case Store:
Py_INCREF(Store_singleton);
return Store_singleton;
case Del:
Py_INCREF(Del_singleton);
return Del_singleton;
case AugLoad:
Py_INCREF(AugLoad_singleton);
return AugLoad_singleton;
case AugStore:
Py_INCREF(AugStore_singleton);
return AugStore_singleton;
case Param:
Py_INCREF(Param_singleton);
return Param_singleton;
default:
/* should never happen, but just in case ... */
PyErr_Format(PyExc_SystemError, "unknown expr_context found");
return NULL;
}
}
PyObject*
ast2obj_slice(void* _o)
{
slice_ty o = (slice_ty)_o;
PyObject *result = NULL, *value = NULL;
if (!o) {
Py_RETURN_NONE;
}
switch (o->kind) {
case Slice_kind:
result = PyType_GenericNew(Slice_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.Slice.lower);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_lower, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.Slice.upper);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_upper, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.Slice.step);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_step, value) == -1)
goto failed;
Py_DECREF(value);
break;
case ExtSlice_kind:
result = PyType_GenericNew(ExtSlice_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_list(o->v.ExtSlice.dims, ast2obj_slice);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_dims, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Index_kind:
result = PyType_GenericNew(Index_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.Index.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
break;
}
return result;
failed:
Py_XDECREF(value);
Py_XDECREF(result);
return NULL;
}
PyObject* ast2obj_boolop(boolop_ty o)
{
switch(o) {
case And:
Py_INCREF(And_singleton);
return And_singleton;
case Or:
Py_INCREF(Or_singleton);
return Or_singleton;
default:
/* should never happen, but just in case ... */
PyErr_Format(PyExc_SystemError, "unknown boolop found");
return NULL;
}
}
PyObject* ast2obj_operator(operator_ty o)
{
switch(o) {
case Add:
Py_INCREF(Add_singleton);
return Add_singleton;
case Sub:
Py_INCREF(Sub_singleton);
return Sub_singleton;
case Mult:
Py_INCREF(Mult_singleton);
return Mult_singleton;
case MatMult:
Py_INCREF(MatMult_singleton);
return MatMult_singleton;
case Div:
Py_INCREF(Div_singleton);
return Div_singleton;
case Mod:
Py_INCREF(Mod_singleton);
return Mod_singleton;
case Pow:
Py_INCREF(Pow_singleton);
return Pow_singleton;
case LShift:
Py_INCREF(LShift_singleton);
return LShift_singleton;
case RShift:
Py_INCREF(RShift_singleton);
return RShift_singleton;
case BitOr:
Py_INCREF(BitOr_singleton);
return BitOr_singleton;
case BitXor:
Py_INCREF(BitXor_singleton);
return BitXor_singleton;
case BitAnd:
Py_INCREF(BitAnd_singleton);
return BitAnd_singleton;
case FloorDiv:
Py_INCREF(FloorDiv_singleton);
return FloorDiv_singleton;
default:
/* should never happen, but just in case ... */
PyErr_Format(PyExc_SystemError, "unknown operator found");
return NULL;
}
}
PyObject* ast2obj_unaryop(unaryop_ty o)
{
switch(o) {
case Invert:
Py_INCREF(Invert_singleton);
return Invert_singleton;
case Not:
Py_INCREF(Not_singleton);
return Not_singleton;
case UAdd:
Py_INCREF(UAdd_singleton);
return UAdd_singleton;
case USub:
Py_INCREF(USub_singleton);
return USub_singleton;
default:
/* should never happen, but just in case ... */
PyErr_Format(PyExc_SystemError, "unknown unaryop found");
return NULL;
}
}
PyObject* ast2obj_cmpop(cmpop_ty o)
{
switch(o) {
case Eq:
Py_INCREF(Eq_singleton);
return Eq_singleton;
case NotEq:
Py_INCREF(NotEq_singleton);
return NotEq_singleton;
case Lt:
Py_INCREF(Lt_singleton);
return Lt_singleton;
case LtE:
Py_INCREF(LtE_singleton);
return LtE_singleton;
case Gt:
Py_INCREF(Gt_singleton);
return Gt_singleton;
case GtE:
Py_INCREF(GtE_singleton);
return GtE_singleton;
case Is:
Py_INCREF(Is_singleton);
return Is_singleton;
case IsNot:
Py_INCREF(IsNot_singleton);
return IsNot_singleton;
case In:
Py_INCREF(In_singleton);
return In_singleton;
case NotIn:
Py_INCREF(NotIn_singleton);
return NotIn_singleton;
default:
/* should never happen, but just in case ... */
PyErr_Format(PyExc_SystemError, "unknown cmpop found");
return NULL;
}
}
PyObject*
ast2obj_comprehension(void* _o)
{
comprehension_ty o = (comprehension_ty)_o;
PyObject *result = NULL, *value = NULL;
if (!o) {
Py_RETURN_NONE;
}
result = PyType_GenericNew(comprehension_type, NULL, NULL);
if (!result) return NULL;
value = ast2obj_expr(o->target);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_target, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->iter);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_iter, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->ifs, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_ifs, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_int(o->is_async);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_is_async, value) == -1)
goto failed;
Py_DECREF(value);
return result;
failed:
Py_XDECREF(value);
Py_XDECREF(result);
return NULL;
}
PyObject*
ast2obj_excepthandler(void* _o)
{
excepthandler_ty o = (excepthandler_ty)_o;
PyObject *result = NULL, *value = NULL;
if (!o) {
Py_RETURN_NONE;
}
switch (o->kind) {
case ExceptHandler_kind:
result = PyType_GenericNew(ExceptHandler_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_expr(o->v.ExceptHandler.type);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_type, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_identifier(o->v.ExceptHandler.name);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_name, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->v.ExceptHandler.body, ast2obj_stmt);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_body, value) == -1)
goto failed;
Py_DECREF(value);
break;
}
value = ast2obj_int(o->lineno);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_lineno, value) < 0)
goto failed;
Py_DECREF(value);
value = ast2obj_int(o->col_offset);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_col_offset, value) < 0)
goto failed;
Py_DECREF(value);
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
value = ast2obj_int(o->end_lineno);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_end_lineno, value) < 0)
goto failed;
Py_DECREF(value);
value = ast2obj_int(o->end_col_offset);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_end_col_offset, value) < 0)
goto failed;
Py_DECREF(value);
return result;
failed:
Py_XDECREF(value);
Py_XDECREF(result);
return NULL;
}
PyObject*
ast2obj_arguments(void* _o)
{
arguments_ty o = (arguments_ty)_o;
PyObject *result = NULL, *value = NULL;
if (!o) {
Py_RETURN_NONE;
}
result = PyType_GenericNew(arguments_type, NULL, NULL);
if (!result) return NULL;
value = ast2obj_list(o->args, ast2obj_arg);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_args, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_arg(o->vararg);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_vararg, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->kwonlyargs, ast2obj_arg);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_kwonlyargs, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->kw_defaults, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_kw_defaults, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_arg(o->kwarg);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_kwarg, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_list(o->defaults, ast2obj_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_defaults, value) == -1)
goto failed;
Py_DECREF(value);
return result;
failed:
Py_XDECREF(value);
Py_XDECREF(result);
return NULL;
}
PyObject*
ast2obj_arg(void* _o)
{
arg_ty o = (arg_ty)_o;
PyObject *result = NULL, *value = NULL;
if (!o) {
Py_RETURN_NONE;
}
result = PyType_GenericNew(arg_type, NULL, NULL);
if (!result) return NULL;
value = ast2obj_identifier(o->arg);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_arg, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->annotation);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_annotation, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_string(o->type_comment);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_type_comment, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_int(o->lineno);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_lineno, value) < 0)
goto failed;
Py_DECREF(value);
value = ast2obj_int(o->col_offset);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_col_offset, value) < 0)
goto failed;
Py_DECREF(value);
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
value = ast2obj_int(o->end_lineno);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_end_lineno, value) < 0)
goto failed;
Py_DECREF(value);
value = ast2obj_int(o->end_col_offset);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_end_col_offset, value) < 0)
goto failed;
Py_DECREF(value);
return result;
failed:
Py_XDECREF(value);
Py_XDECREF(result);
return NULL;
}
PyObject*
ast2obj_keyword(void* _o)
{
keyword_ty o = (keyword_ty)_o;
PyObject *result = NULL, *value = NULL;
if (!o) {
Py_RETURN_NONE;
}
result = PyType_GenericNew(keyword_type, NULL, NULL);
if (!result) return NULL;
value = ast2obj_identifier(o->arg);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_arg, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
return result;
failed:
Py_XDECREF(value);
Py_XDECREF(result);
return NULL;
}
PyObject*
ast2obj_alias(void* _o)
{
alias_ty o = (alias_ty)_o;
PyObject *result = NULL, *value = NULL;
if (!o) {
Py_RETURN_NONE;
}
result = PyType_GenericNew(alias_type, NULL, NULL);
if (!result) return NULL;
value = ast2obj_identifier(o->name);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_name, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_identifier(o->asname);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_asname, value) == -1)
goto failed;
Py_DECREF(value);
return result;
failed:
Py_XDECREF(value);
Py_XDECREF(result);
return NULL;
}
PyObject*
ast2obj_withitem(void* _o)
{
withitem_ty o = (withitem_ty)_o;
PyObject *result = NULL, *value = NULL;
if (!o) {
Py_RETURN_NONE;
}
result = PyType_GenericNew(withitem_type, NULL, NULL);
if (!result) return NULL;
value = ast2obj_expr(o->context_expr);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_context_expr, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->optional_vars);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_optional_vars, value) == -1)
goto failed;
Py_DECREF(value);
return result;
failed:
Py_XDECREF(value);
Py_XDECREF(result);
return NULL;
}
PyObject*
ast2obj_type_ignore(void* _o)
{
type_ignore_ty o = (type_ignore_ty)_o;
PyObject *result = NULL, *value = NULL;
if (!o) {
Py_RETURN_NONE;
}
switch (o->kind) {
case TypeIgnore_kind:
result = PyType_GenericNew(TypeIgnore_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_int(o->v.TypeIgnore.lineno);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_lineno, value) == -1)
goto failed;
Py_DECREF(value);
break;
}
return result;
failed:
Py_XDECREF(value);
Py_XDECREF(result);
return NULL;
}
int
obj2ast_mod(PyObject* obj, mod_ty* out, PyArena* arena)
{
int isinstance;
PyObject *tmp = NULL;
if (obj == Py_None) {
*out = NULL;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Module_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* body;
asdl_seq* type_ignores;
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from Module");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Module field \"body\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
body = _Py_asdl_seq_new(len, arena);
if (body == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Module field \"body\" changed size during iteration");
goto failed;
}
asdl_seq_SET(body, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_type_ignores, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"type_ignores\" missing from Module");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Module field \"type_ignores\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
type_ignores = _Py_asdl_seq_new(len, arena);
if (type_ignores == NULL) goto failed;
for (i = 0; i < len; i++) {
type_ignore_ty val;
res = obj2ast_type_ignore(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Module field \"type_ignores\" changed size during iteration");
goto failed;
}
asdl_seq_SET(type_ignores, i, val);
}
Py_CLEAR(tmp);
}
*out = Module(body, type_ignores, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Interactive_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* body;
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from Interactive");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Interactive field \"body\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
body = _Py_asdl_seq_new(len, arena);
if (body == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Interactive field \"body\" changed size during iteration");
goto failed;
}
asdl_seq_SET(body, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
*out = Interactive(body, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Expression_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty body;
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from Expression");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &body, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
*out = Expression(body, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)FunctionType_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* argtypes;
expr_ty returns;
if (_PyObject_LookupAttrId(obj, &PyId_argtypes, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"argtypes\" missing from FunctionType");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "FunctionType field \"argtypes\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
argtypes = _Py_asdl_seq_new(len, arena);
if (argtypes == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "FunctionType field \"argtypes\" changed size during iteration");
goto failed;
}
asdl_seq_SET(argtypes, i, val);
}
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_returns, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"returns\" missing from FunctionType");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &returns, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
*out = FunctionType(argtypes, returns, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Suite_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* body;
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from Suite");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Suite field \"body\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
body = _Py_asdl_seq_new(len, arena);
if (body == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Suite field \"body\" changed size during iteration");
goto failed;
}
asdl_seq_SET(body, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
*out = Suite(body, arena);
if (*out == NULL) goto failed;
return 0;
}
PyErr_Format(PyExc_TypeError, "expected some sort of mod, but got %R", obj);
failed:
Py_XDECREF(tmp);
return 1;
}
int
obj2ast_stmt(PyObject* obj, stmt_ty* out, PyArena* arena)
{
int isinstance;
PyObject *tmp = NULL;
int lineno;
int col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
int end_lineno;
int end_col_offset;
if (obj == Py_None) {
*out = NULL;
return 0;
}
if (_PyObject_LookupAttrId(obj, &PyId_lineno, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"lineno\" missing from stmt");
return 1;
}
else {
int res;
res = obj2ast_int(tmp, &lineno, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_col_offset, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"col_offset\" missing from stmt");
return 1;
}
else {
int res;
res = obj2ast_int(tmp, &col_offset, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
if (_PyObject_LookupAttrId(obj, &PyId_end_lineno, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
end_lineno = 0;
}
else {
int res;
res = obj2ast_int(tmp, &end_lineno, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_end_col_offset, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
end_col_offset = 0;
}
else {
int res;
res = obj2ast_int(tmp, &end_col_offset, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
isinstance = PyObject_IsInstance(obj, (PyObject*)FunctionDef_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
identifier name;
arguments_ty args;
asdl_seq* body;
asdl_seq* decorator_list;
expr_ty returns;
string type_comment;
if (_PyObject_LookupAttrId(obj, &PyId_name, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"name\" missing from FunctionDef");
return 1;
}
else {
int res;
res = obj2ast_identifier(tmp, &name, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_args, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"args\" missing from FunctionDef");
return 1;
}
else {
int res;
res = obj2ast_arguments(tmp, &args, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from FunctionDef");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "FunctionDef field \"body\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
body = _Py_asdl_seq_new(len, arena);
if (body == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "FunctionDef field \"body\" changed size during iteration");
goto failed;
}
asdl_seq_SET(body, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_decorator_list, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"decorator_list\" missing from FunctionDef");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "FunctionDef field \"decorator_list\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
decorator_list = _Py_asdl_seq_new(len, arena);
if (decorator_list == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "FunctionDef field \"decorator_list\" changed size during iteration");
goto failed;
}
asdl_seq_SET(decorator_list, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_returns, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
returns = NULL;
}
else {
int res;
res = obj2ast_expr(tmp, &returns, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_type_comment, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
type_comment = NULL;
}
else {
int res;
res = obj2ast_string(tmp, &type_comment, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
*out = FunctionDef(name, args, body, decorator_list, returns,
type_comment, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)AsyncFunctionDef_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
identifier name;
arguments_ty args;
asdl_seq* body;
asdl_seq* decorator_list;
expr_ty returns;
string type_comment;
if (_PyObject_LookupAttrId(obj, &PyId_name, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"name\" missing from AsyncFunctionDef");
return 1;
}
else {
int res;
res = obj2ast_identifier(tmp, &name, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_args, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"args\" missing from AsyncFunctionDef");
return 1;
}
else {
int res;
res = obj2ast_arguments(tmp, &args, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from AsyncFunctionDef");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "AsyncFunctionDef field \"body\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
body = _Py_asdl_seq_new(len, arena);
if (body == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "AsyncFunctionDef field \"body\" changed size during iteration");
goto failed;
}
asdl_seq_SET(body, i, val);
}
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_decorator_list, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"decorator_list\" missing from AsyncFunctionDef");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "AsyncFunctionDef field \"decorator_list\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
decorator_list = _Py_asdl_seq_new(len, arena);
if (decorator_list == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "AsyncFunctionDef field \"decorator_list\" changed size during iteration");
goto failed;
}
asdl_seq_SET(decorator_list, i, val);
}
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_returns, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
returns = NULL;
}
else {
int res;
res = obj2ast_expr(tmp, &returns, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_type_comment, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
type_comment = NULL;
}
else {
int res;
res = obj2ast_string(tmp, &type_comment, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
*out = AsyncFunctionDef(name, args, body, decorator_list, returns,
type_comment, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)ClassDef_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
identifier name;
asdl_seq* bases;
asdl_seq* keywords;
asdl_seq* body;
asdl_seq* decorator_list;
if (_PyObject_LookupAttrId(obj, &PyId_name, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"name\" missing from ClassDef");
return 1;
}
else {
int res;
res = obj2ast_identifier(tmp, &name, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_bases, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"bases\" missing from ClassDef");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "ClassDef field \"bases\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
bases = _Py_asdl_seq_new(len, arena);
if (bases == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "ClassDef field \"bases\" changed size during iteration");
goto failed;
}
asdl_seq_SET(bases, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_keywords, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"keywords\" missing from ClassDef");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "ClassDef field \"keywords\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
keywords = _Py_asdl_seq_new(len, arena);
if (keywords == NULL) goto failed;
for (i = 0; i < len; i++) {
keyword_ty val;
res = obj2ast_keyword(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "ClassDef field \"keywords\" changed size during iteration");
goto failed;
}
asdl_seq_SET(keywords, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from ClassDef");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "ClassDef field \"body\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
body = _Py_asdl_seq_new(len, arena);
if (body == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "ClassDef field \"body\" changed size during iteration");
goto failed;
}
asdl_seq_SET(body, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_decorator_list, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"decorator_list\" missing from ClassDef");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "ClassDef field \"decorator_list\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
decorator_list = _Py_asdl_seq_new(len, arena);
if (decorator_list == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "ClassDef field \"decorator_list\" changed size during iteration");
goto failed;
}
asdl_seq_SET(decorator_list, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
*out = ClassDef(name, bases, keywords, body, decorator_list, lineno,
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
col_offset, end_lineno, end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Return_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty value;
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
value = NULL;
}
else {
int res;
res = obj2ast_expr(tmp, &value, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Return(value, lineno, col_offset, end_lineno, end_col_offset,
arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Delete_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* targets;
if (_PyObject_LookupAttrId(obj, &PyId_targets, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"targets\" missing from Delete");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Delete field \"targets\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
targets = _Py_asdl_seq_new(len, arena);
if (targets == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Delete field \"targets\" changed size during iteration");
goto failed;
}
asdl_seq_SET(targets, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Delete(targets, lineno, col_offset, end_lineno, end_col_offset,
arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Assign_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* targets;
expr_ty value;
string type_comment;
if (_PyObject_LookupAttrId(obj, &PyId_targets, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"targets\" missing from Assign");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Assign field \"targets\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
targets = _Py_asdl_seq_new(len, arena);
if (targets == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Assign field \"targets\" changed size during iteration");
goto failed;
}
asdl_seq_SET(targets, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from Assign");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &value, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
if (_PyObject_LookupAttrId(obj, &PyId_type_comment, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
type_comment = NULL;
}
else {
int res;
res = obj2ast_string(tmp, &type_comment, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
*out = Assign(targets, value, type_comment, lineno, col_offset,
end_lineno, end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)AugAssign_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty target;
operator_ty op;
expr_ty value;
if (_PyObject_LookupAttrId(obj, &PyId_target, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"target\" missing from AugAssign");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &target, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_op, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"op\" missing from AugAssign");
return 1;
}
else {
int res;
res = obj2ast_operator(tmp, &op, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from AugAssign");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &value, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = AugAssign(target, op, value, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)AnnAssign_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty target;
expr_ty annotation;
expr_ty value;
int simple;
if (_PyObject_LookupAttrId(obj, &PyId_target, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"target\" missing from AnnAssign");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &target, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_annotation, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"annotation\" missing from AnnAssign");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &annotation, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
value = NULL;
}
else {
int res;
res = obj2ast_expr(tmp, &value, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_simple, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"simple\" missing from AnnAssign");
return 1;
}
else {
int res;
res = obj2ast_int(tmp, &simple, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
*out = AnnAssign(target, annotation, value, simple, lineno, col_offset,
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
end_lineno, end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)For_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty target;
expr_ty iter;
asdl_seq* body;
asdl_seq* orelse;
string type_comment;
if (_PyObject_LookupAttrId(obj, &PyId_target, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"target\" missing from For");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &target, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_iter, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"iter\" missing from For");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &iter, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from For");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "For field \"body\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
body = _Py_asdl_seq_new(len, arena);
if (body == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "For field \"body\" changed size during iteration");
goto failed;
}
asdl_seq_SET(body, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_orelse, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"orelse\" missing from For");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "For field \"orelse\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
orelse = _Py_asdl_seq_new(len, arena);
if (orelse == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "For field \"orelse\" changed size during iteration");
goto failed;
}
asdl_seq_SET(orelse, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
if (_PyObject_LookupAttrId(obj, &PyId_type_comment, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
type_comment = NULL;
}
else {
int res;
res = obj2ast_string(tmp, &type_comment, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
*out = For(target, iter, body, orelse, type_comment, lineno,
col_offset, end_lineno, end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)AsyncFor_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty target;
expr_ty iter;
asdl_seq* body;
asdl_seq* orelse;
string type_comment;
if (_PyObject_LookupAttrId(obj, &PyId_target, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"target\" missing from AsyncFor");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &target, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_iter, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"iter\" missing from AsyncFor");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &iter, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from AsyncFor");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "AsyncFor field \"body\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
body = _Py_asdl_seq_new(len, arena);
if (body == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "AsyncFor field \"body\" changed size during iteration");
goto failed;
}
asdl_seq_SET(body, i, val);
}
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_orelse, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"orelse\" missing from AsyncFor");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "AsyncFor field \"orelse\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
orelse = _Py_asdl_seq_new(len, arena);
if (orelse == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "AsyncFor field \"orelse\" changed size during iteration");
goto failed;
}
asdl_seq_SET(orelse, i, val);
}
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_type_comment, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
type_comment = NULL;
}
else {
int res;
res = obj2ast_string(tmp, &type_comment, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
*out = AsyncFor(target, iter, body, orelse, type_comment, lineno,
col_offset, end_lineno, end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)While_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty test;
asdl_seq* body;
asdl_seq* orelse;
if (_PyObject_LookupAttrId(obj, &PyId_test, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"test\" missing from While");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &test, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from While");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "While field \"body\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
body = _Py_asdl_seq_new(len, arena);
if (body == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "While field \"body\" changed size during iteration");
goto failed;
}
asdl_seq_SET(body, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_orelse, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"orelse\" missing from While");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "While field \"orelse\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
orelse = _Py_asdl_seq_new(len, arena);
if (orelse == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "While field \"orelse\" changed size during iteration");
goto failed;
}
asdl_seq_SET(orelse, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = While(test, body, orelse, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)If_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty test;
asdl_seq* body;
asdl_seq* orelse;
if (_PyObject_LookupAttrId(obj, &PyId_test, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"test\" missing from If");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &test, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from If");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "If field \"body\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
body = _Py_asdl_seq_new(len, arena);
if (body == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "If field \"body\" changed size during iteration");
goto failed;
}
asdl_seq_SET(body, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_orelse, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"orelse\" missing from If");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "If field \"orelse\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
orelse = _Py_asdl_seq_new(len, arena);
if (orelse == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "If field \"orelse\" changed size during iteration");
goto failed;
}
asdl_seq_SET(orelse, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = If(test, body, orelse, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)With_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* items;
asdl_seq* body;
string type_comment;
if (_PyObject_LookupAttrId(obj, &PyId_items, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"items\" missing from With");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "With field \"items\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
items = _Py_asdl_seq_new(len, arena);
if (items == NULL) goto failed;
for (i = 0; i < len; i++) {
withitem_ty val;
res = obj2ast_withitem(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "With field \"items\" changed size during iteration");
goto failed;
}
asdl_seq_SET(items, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from With");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "With field \"body\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
body = _Py_asdl_seq_new(len, arena);
if (body == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "With field \"body\" changed size during iteration");
goto failed;
}
asdl_seq_SET(body, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_type_comment, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
type_comment = NULL;
}
else {
int res;
res = obj2ast_string(tmp, &type_comment, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
*out = With(items, body, type_comment, lineno, col_offset, end_lineno,
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)AsyncWith_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* items;
asdl_seq* body;
string type_comment;
if (_PyObject_LookupAttrId(obj, &PyId_items, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"items\" missing from AsyncWith");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "AsyncWith field \"items\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
items = _Py_asdl_seq_new(len, arena);
if (items == NULL) goto failed;
for (i = 0; i < len; i++) {
withitem_ty val;
res = obj2ast_withitem(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "AsyncWith field \"items\" changed size during iteration");
goto failed;
}
asdl_seq_SET(items, i, val);
}
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from AsyncWith");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "AsyncWith field \"body\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
body = _Py_asdl_seq_new(len, arena);
if (body == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "AsyncWith field \"body\" changed size during iteration");
goto failed;
}
asdl_seq_SET(body, i, val);
}
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_type_comment, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
type_comment = NULL;
}
else {
int res;
res = obj2ast_string(tmp, &type_comment, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
*out = AsyncWith(items, body, type_comment, lineno, col_offset,
end_lineno, end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Raise_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty exc;
expr_ty cause;
if (_PyObject_LookupAttrId(obj, &PyId_exc, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
exc = NULL;
}
else {
int res;
res = obj2ast_expr(tmp, &exc, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_cause, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
cause = NULL;
}
else {
int res;
res = obj2ast_expr(tmp, &cause, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Raise(exc, cause, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Try_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* body;
asdl_seq* handlers;
asdl_seq* orelse;
asdl_seq* finalbody;
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from Try");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Try field \"body\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
body = _Py_asdl_seq_new(len, arena);
if (body == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Try field \"body\" changed size during iteration");
goto failed;
}
asdl_seq_SET(body, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_handlers, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"handlers\" missing from Try");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Try field \"handlers\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
handlers = _Py_asdl_seq_new(len, arena);
if (handlers == NULL) goto failed;
for (i = 0; i < len; i++) {
excepthandler_ty val;
res = obj2ast_excepthandler(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Try field \"handlers\" changed size during iteration");
goto failed;
}
asdl_seq_SET(handlers, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_orelse, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"orelse\" missing from Try");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Try field \"orelse\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
orelse = _Py_asdl_seq_new(len, arena);
if (orelse == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Try field \"orelse\" changed size during iteration");
goto failed;
}
asdl_seq_SET(orelse, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_finalbody, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"finalbody\" missing from Try");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Try field \"finalbody\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
finalbody = _Py_asdl_seq_new(len, arena);
if (finalbody == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Try field \"finalbody\" changed size during iteration");
goto failed;
}
asdl_seq_SET(finalbody, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
*out = Try(body, handlers, orelse, finalbody, lineno, col_offset,
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
end_lineno, end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Assert_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty test;
expr_ty msg;
if (_PyObject_LookupAttrId(obj, &PyId_test, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"test\" missing from Assert");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &test, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_msg, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
msg = NULL;
}
else {
int res;
res = obj2ast_expr(tmp, &msg, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Assert(test, msg, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Import_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* names;
if (_PyObject_LookupAttrId(obj, &PyId_names, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"names\" missing from Import");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Import field \"names\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
names = _Py_asdl_seq_new(len, arena);
if (names == NULL) goto failed;
for (i = 0; i < len; i++) {
alias_ty val;
res = obj2ast_alias(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Import field \"names\" changed size during iteration");
goto failed;
}
asdl_seq_SET(names, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Import(names, lineno, col_offset, end_lineno, end_col_offset,
arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)ImportFrom_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
identifier module;
asdl_seq* names;
int level;
if (_PyObject_LookupAttrId(obj, &PyId_module, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
module = NULL;
}
else {
int res;
res = obj2ast_identifier(tmp, &module, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_names, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"names\" missing from ImportFrom");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "ImportFrom field \"names\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
names = _Py_asdl_seq_new(len, arena);
if (names == NULL) goto failed;
for (i = 0; i < len; i++) {
alias_ty val;
res = obj2ast_alias(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "ImportFrom field \"names\" changed size during iteration");
goto failed;
}
asdl_seq_SET(names, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_level, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
level = 0;
}
else {
int res;
res = obj2ast_int(tmp, &level, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = ImportFrom(module, names, level, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Global_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* names;
if (_PyObject_LookupAttrId(obj, &PyId_names, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"names\" missing from Global");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Global field \"names\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
names = _Py_asdl_seq_new(len, arena);
if (names == NULL) goto failed;
for (i = 0; i < len; i++) {
identifier val;
res = obj2ast_identifier(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Global field \"names\" changed size during iteration");
goto failed;
}
asdl_seq_SET(names, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Global(names, lineno, col_offset, end_lineno, end_col_offset,
arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Nonlocal_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* names;
if (_PyObject_LookupAttrId(obj, &PyId_names, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"names\" missing from Nonlocal");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Nonlocal field \"names\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
names = _Py_asdl_seq_new(len, arena);
if (names == NULL) goto failed;
for (i = 0; i < len; i++) {
identifier val;
res = obj2ast_identifier(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Nonlocal field \"names\" changed size during iteration");
goto failed;
}
asdl_seq_SET(names, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Nonlocal(names, lineno, col_offset, end_lineno, end_col_offset,
arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Expr_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty value;
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from Expr");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &value, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Expr(value, lineno, col_offset, end_lineno, end_col_offset,
arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Pass_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Pass(lineno, col_offset, end_lineno, end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Break_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Break(lineno, col_offset, end_lineno, end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Continue_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Continue(lineno, col_offset, end_lineno, end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
PyErr_Format(PyExc_TypeError, "expected some sort of stmt, but got %R", obj);
failed:
Py_XDECREF(tmp);
return 1;
}
int
obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena)
{
int isinstance;
PyObject *tmp = NULL;
int lineno;
int col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
int end_lineno;
int end_col_offset;
if (obj == Py_None) {
*out = NULL;
return 0;
}
if (_PyObject_LookupAttrId(obj, &PyId_lineno, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"lineno\" missing from expr");
return 1;
}
else {
int res;
res = obj2ast_int(tmp, &lineno, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_col_offset, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"col_offset\" missing from expr");
return 1;
}
else {
int res;
res = obj2ast_int(tmp, &col_offset, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
if (_PyObject_LookupAttrId(obj, &PyId_end_lineno, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
end_lineno = 0;
}
else {
int res;
res = obj2ast_int(tmp, &end_lineno, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_end_col_offset, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
end_col_offset = 0;
}
else {
int res;
res = obj2ast_int(tmp, &end_col_offset, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
isinstance = PyObject_IsInstance(obj, (PyObject*)BoolOp_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
boolop_ty op;
asdl_seq* values;
if (_PyObject_LookupAttrId(obj, &PyId_op, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"op\" missing from BoolOp");
return 1;
}
else {
int res;
res = obj2ast_boolop(tmp, &op, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_values, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"values\" missing from BoolOp");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "BoolOp field \"values\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
values = _Py_asdl_seq_new(len, arena);
if (values == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "BoolOp field \"values\" changed size during iteration");
goto failed;
}
asdl_seq_SET(values, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = BoolOp(op, values, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
bpo-35224: PEP 572 Implementation (#10497) * Add tokenization of := - Add token to Include/token.h. Add token to documentation in Doc/library/token.rst. - Run `./python Lib/token.py` to regenerate Lib/token.py. - Update Parser/tokenizer.c: add case to handle `:=`. * Add initial usage of := in grammar. * Update Python.asdl to match the grammar updates. Regenerated Include/Python-ast.h and Python/Python-ast.c * Update AST and compiler files in Python/ast.c and Python/compile.c. Basic functionality, this isn't scoped properly * Regenerate Lib/symbol.py using `./python Lib/symbol.py` * Tests - Fix failing tests in test_parser.py due to changes in token numbers for internal representation * Tests - Add simple test for := token * Tests - Add simple tests for named expressions using expr and suite * Tests - Update number of levels for nested expressions to prevent stack overflow * Update symbol table to handle NamedExpr * Update Grammar to allow assignment expressions in if statements. Regenerate Python/graminit.c accordingly using `make regen-grammar` * Tests - Add additional tests for named expressions in RoundtripLegalSyntaxTestCase, based on examples and information directly from PEP 572 Note: failing tests are currently commented out (4 out of 24 tests currently fail) * Tests - Add temporary syntax test failure tests in test_parser.py Note: There is an outstanding TODO for this -- syntax tests need to be moved to a different file (presumably test_syntax.py), but this is covering what needs to be tested at the moment, and it's more convenient to run a single test for the time being * Add support for allowing assignment expressions as function argument annotations. Uncomment tests for these cases because they all pass now! * Tests - Move existing syntax tests out of test_parser.py and into test_named_expressions.py. Refactor syntax tests to use unittest * Add TargetScopeError exception to extend SyntaxError Note: This simply creates the TargetScopeError exception, it is not yet used anywhere * Tests - Update tests per PEP 572 Continue refactoring test suite: The named expression test suite now checks for any invalid cases that throw exceptions (no longer limited to SyntaxErrors), assignment tests to ensure that variables are properly assigned, and scope tests to ensure that variable availability and values are correct Note: - There are still tests that are marked to skip, as they are not yet implemented - There are approximately 300 lines of the PEP that have not yet been addressed, though these may be deferred * Documentation - Small updates to XXX/todo comments - Remove XXX from child description in ast.c - Add comment with number of previously supported nested expressions for 3.7.X in test_parser.py * Fix assert in seq_for_testlist() * Cleanup - Denote "Not implemented -- No keyword args" on failing test case. Fix PEP8 error for blank lines at beginning of test classes in test_parser.py * Tests - Wrap all file opens in `with...as` to ensure files are closed * WIP: handle f(a := 1) * Tests and Cleanup - No longer skips keyword arg test. Keyword arg test now uses a simpler test case and does not rely on an external file. Remove print statements from ast.c * Tests - Refactor last remaining test case that relied on on external file to use a simpler test case without the dependency * Tests - Add better description of remaning skipped tests. Add test checking scope when using assignment expression in a function argument * Tests - Add test for nested comprehension, testing value and scope. Fix variable name in skipped comprehension scope test * Handle restriction of LHS for named expressions - can only assign to LHS of type NAME. Specifically, restrict assignment to tuples This adds an alternative set_context specifically for named expressions, set_namedexpr_context. Thus, context is now set differently for standard assignment versus assignment for named expressions in order to handle restrictions. * Tests - Update negative test case for assigning to lambda to match new error message. Add negative test case for assigning to tuple * Tests - Reorder test cases to group invalid syntax cases and named assignment target errors * Tests - Update test case for named expression in function argument - check that result and variable are set correctly * Todo - Add todo for TargetScopeError based on Guido's comment (https://github.com/python/cpython/commit/2b3acd37bdfc2d35e5094228c6684050d2aa8b0a#r30472562) * Tests - Add named expression tests for assignment operator in function arguments Note: One of two tests are skipped, as function arguments are currently treating an assignment expression inside of parenthesis as one child, which does not properly catch the named expression, nor does it count arguments properly * Add NamedStore to expr_context. Regenerate related code with `make regen-ast` * Add usage of NamedStore to ast_for_named_expr in ast.c. Update occurances of checking for Store to also handle NamedStore where appropriate * Add ste_comprehension to _symtable_entry to track if the namespace is a comprehension. Initialize ste_comprehension to 0. Set set_comprehension to 1 in symtable_handle_comprehension * s/symtable_add_def/symtable_add_def_helper. Add symtable_add_def to handle grabbing st->st_cur and passing it to symtable_add_def_helper. This now allows us to call the original code from symtable_add_def by instead calling symtable_add_def_helper with a different ste. * Refactor symtable_record_directive to take lineno and col_offset as arguments instead of stmt_ty. This allows symtable_record_directive to be used for stmt_ty and expr_ty * Handle elevating scope for named expressions in comprehensions. * Handle error for usage of named expression inside a class block * Tests - No longer skip scope tests. Add additional scope tests * Cleanup - Update error message for named expression within a comprehension within a class. Update comments. Add assert for symtable_extend_namedexpr_scope to validate that we always find at least a ModuleScope if we don't find a Class or FunctionScope * Cleanup - Add missing case for NamedStore in expr_context_name. Remove unused var in set_namedexpr_content * Refactor - Consolidate set_context and set_namedexpr_context to reduce duplicated code. Special cases for named expressions are handled by checking if ctx is NamedStore * Cleanup - Add additional use cases for ast_for_namedexpr in usage comment. Fix multiple blank lines in test_named_expressions * Tests - Remove unnecessary test case. Renumber test case function names * Remove TargetScopeError for now. Will add back if needed * Cleanup - Small comment nit for consistency * Handle positional argument check with named expression * Add TargetScopeError exception definition. Add documentation for TargetScopeError in c-api docs. Throw TargetScopeError instead of SyntaxError when using a named expression in a comprehension within a class scope * Increase stack size for parser by 200. This is a minimal change (approx. 5kb) and should not have an impact on any systems. Update parser test to allow 99 nested levels again * Add TargetScopeError to exception_hierarchy.txt for test_baseexception.py_ * Tests - Major update for named expression tests, both in test_named_expressions and test_parser - Add test for TargetScopeError - Add tests for named expressions in comprehension scope and edge cases - Add tests for named expressions in function arguments (declarations and call sites) - Reorganize tests to group them more logically * Cleanup - Remove unnecessary comment * Cleanup - Comment nitpicks * Explicitly disallow assignment expressions to a name inside parentheses, e.g.: ((x) := 0) - Add check for LHS types to detect a parenthesis then a name (see note) - Add test for this scenario - Update tests for changed error message for named assignment to a tuple (also, see note) Note: This caused issues with the previous error handling for named assignment to a LHS that contained an expression, such as a tuple. Thus, the check for the LHS of a named expression must be changed to be more specific if we wish to maintain the previous error messages * Cleanup - Wrap lines more strictly in test file * Revert "Explicitly disallow assignment expressions to a name inside parentheses, e.g.: ((x) := 0)" This reverts commit f1531400ca7d7a2d148830c8ac703f041740896d. * Add NEWS.d entry * Tests - Fix error in test_pickle.test_exceptions by adding TargetScopeError to list of exceptions * Tests - Update error message tests to reflect improved messaging convention (s/can't/cannot) * Remove cases that cannot be reached in compile.c. Small linting update. * Update Grammar/Tokens to add COLONEQUAL. Regenerate all files * Update TargetScopeError PRE_INIT and POST_INIT, as this was purposefully left out when fixing rebase conflicts * Add NamedStore back and regenerate files * Pass along line number and end col info for named expression * Simplify News entry * Fix compiler warning and explicity mark fallthrough
2019-01-25 07:49:56 +08:00
isinstance = PyObject_IsInstance(obj, (PyObject*)NamedExpr_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty target;
expr_ty value;
if (_PyObject_LookupAttrId(obj, &PyId_target, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"target\" missing from NamedExpr");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &target, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from NamedExpr");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &value, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
*out = NamedExpr(target, value, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)BinOp_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty left;
operator_ty op;
expr_ty right;
if (_PyObject_LookupAttrId(obj, &PyId_left, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"left\" missing from BinOp");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &left, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_op, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"op\" missing from BinOp");
return 1;
}
else {
int res;
res = obj2ast_operator(tmp, &op, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_right, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"right\" missing from BinOp");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &right, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = BinOp(left, op, right, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)UnaryOp_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
unaryop_ty op;
expr_ty operand;
if (_PyObject_LookupAttrId(obj, &PyId_op, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"op\" missing from UnaryOp");
return 1;
}
else {
int res;
res = obj2ast_unaryop(tmp, &op, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_operand, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"operand\" missing from UnaryOp");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &operand, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = UnaryOp(op, operand, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Lambda_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
arguments_ty args;
expr_ty body;
if (_PyObject_LookupAttrId(obj, &PyId_args, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"args\" missing from Lambda");
return 1;
}
else {
int res;
res = obj2ast_arguments(tmp, &args, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from Lambda");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &body, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Lambda(args, body, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)IfExp_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty test;
expr_ty body;
expr_ty orelse;
if (_PyObject_LookupAttrId(obj, &PyId_test, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"test\" missing from IfExp");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &test, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from IfExp");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &body, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_orelse, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"orelse\" missing from IfExp");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &orelse, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = IfExp(test, body, orelse, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Dict_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* keys;
asdl_seq* values;
if (_PyObject_LookupAttrId(obj, &PyId_keys, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"keys\" missing from Dict");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Dict field \"keys\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
keys = _Py_asdl_seq_new(len, arena);
if (keys == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Dict field \"keys\" changed size during iteration");
goto failed;
}
asdl_seq_SET(keys, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_values, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"values\" missing from Dict");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Dict field \"values\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
values = _Py_asdl_seq_new(len, arena);
if (values == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Dict field \"values\" changed size during iteration");
goto failed;
}
asdl_seq_SET(values, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Dict(keys, values, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Set_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* elts;
if (_PyObject_LookupAttrId(obj, &PyId_elts, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"elts\" missing from Set");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Set field \"elts\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
elts = _Py_asdl_seq_new(len, arena);
if (elts == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Set field \"elts\" changed size during iteration");
goto failed;
}
asdl_seq_SET(elts, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Set(elts, lineno, col_offset, end_lineno, end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)ListComp_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty elt;
asdl_seq* generators;
if (_PyObject_LookupAttrId(obj, &PyId_elt, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"elt\" missing from ListComp");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &elt, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_generators, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"generators\" missing from ListComp");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "ListComp field \"generators\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
generators = _Py_asdl_seq_new(len, arena);
if (generators == NULL) goto failed;
for (i = 0; i < len; i++) {
comprehension_ty val;
res = obj2ast_comprehension(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "ListComp field \"generators\" changed size during iteration");
goto failed;
}
asdl_seq_SET(generators, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = ListComp(elt, generators, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)SetComp_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty elt;
asdl_seq* generators;
if (_PyObject_LookupAttrId(obj, &PyId_elt, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"elt\" missing from SetComp");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &elt, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_generators, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"generators\" missing from SetComp");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "SetComp field \"generators\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
generators = _Py_asdl_seq_new(len, arena);
if (generators == NULL) goto failed;
for (i = 0; i < len; i++) {
comprehension_ty val;
res = obj2ast_comprehension(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "SetComp field \"generators\" changed size during iteration");
goto failed;
}
asdl_seq_SET(generators, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = SetComp(elt, generators, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)DictComp_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty key;
expr_ty value;
asdl_seq* generators;
if (_PyObject_LookupAttrId(obj, &PyId_key, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"key\" missing from DictComp");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &key, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from DictComp");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &value, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_generators, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"generators\" missing from DictComp");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "DictComp field \"generators\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
generators = _Py_asdl_seq_new(len, arena);
if (generators == NULL) goto failed;
for (i = 0; i < len; i++) {
comprehension_ty val;
res = obj2ast_comprehension(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "DictComp field \"generators\" changed size during iteration");
goto failed;
}
asdl_seq_SET(generators, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = DictComp(key, value, generators, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)GeneratorExp_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty elt;
asdl_seq* generators;
if (_PyObject_LookupAttrId(obj, &PyId_elt, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"elt\" missing from GeneratorExp");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &elt, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_generators, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"generators\" missing from GeneratorExp");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "GeneratorExp field \"generators\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
generators = _Py_asdl_seq_new(len, arena);
if (generators == NULL) goto failed;
for (i = 0; i < len; i++) {
comprehension_ty val;
res = obj2ast_comprehension(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "GeneratorExp field \"generators\" changed size during iteration");
goto failed;
}
asdl_seq_SET(generators, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = GeneratorExp(elt, generators, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Await_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty value;
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from Await");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &value, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Await(value, lineno, col_offset, end_lineno, end_col_offset,
arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Yield_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty value;
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
value = NULL;
}
else {
int res;
res = obj2ast_expr(tmp, &value, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Yield(value, lineno, col_offset, end_lineno, end_col_offset,
arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)YieldFrom_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty value;
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from YieldFrom");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &value, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = YieldFrom(value, lineno, col_offset, end_lineno, end_col_offset,
arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Compare_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty left;
asdl_int_seq* ops;
asdl_seq* comparators;
if (_PyObject_LookupAttrId(obj, &PyId_left, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"left\" missing from Compare");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &left, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_ops, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"ops\" missing from Compare");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Compare field \"ops\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
ops = _Py_asdl_int_seq_new(len, arena);
if (ops == NULL) goto failed;
for (i = 0; i < len; i++) {
cmpop_ty val;
res = obj2ast_cmpop(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Compare field \"ops\" changed size during iteration");
goto failed;
}
asdl_seq_SET(ops, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_comparators, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"comparators\" missing from Compare");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Compare field \"comparators\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
comparators = _Py_asdl_seq_new(len, arena);
if (comparators == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Compare field \"comparators\" changed size during iteration");
goto failed;
}
asdl_seq_SET(comparators, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Compare(left, ops, comparators, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Call_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty func;
asdl_seq* args;
asdl_seq* keywords;
if (_PyObject_LookupAttrId(obj, &PyId_func, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"func\" missing from Call");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &func, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_args, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"args\" missing from Call");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Call field \"args\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
args = _Py_asdl_seq_new(len, arena);
if (args == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Call field \"args\" changed size during iteration");
goto failed;
}
asdl_seq_SET(args, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_keywords, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"keywords\" missing from Call");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Call field \"keywords\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
keywords = _Py_asdl_seq_new(len, arena);
if (keywords == NULL) goto failed;
for (i = 0; i < len; i++) {
keyword_ty val;
res = obj2ast_keyword(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Call field \"keywords\" changed size during iteration");
goto failed;
}
asdl_seq_SET(keywords, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Call(func, args, keywords, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)FormattedValue_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty value;
int conversion;
expr_ty format_spec;
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from FormattedValue");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &value, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_conversion, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
conversion = 0;
}
else {
int res;
res = obj2ast_int(tmp, &conversion, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_format_spec, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
format_spec = NULL;
}
else {
int res;
res = obj2ast_expr(tmp, &format_spec, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
*out = FormattedValue(value, conversion, format_spec, lineno,
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
col_offset, end_lineno, end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)JoinedStr_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* values;
if (_PyObject_LookupAttrId(obj, &PyId_values, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"values\" missing from JoinedStr");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "JoinedStr field \"values\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
values = _Py_asdl_seq_new(len, arena);
if (values == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "JoinedStr field \"values\" changed size during iteration");
goto failed;
}
asdl_seq_SET(values, i, val);
}
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = JoinedStr(values, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Constant_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
constant value;
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from Constant");
return 1;
}
else {
int res;
res = obj2ast_constant(tmp, &value, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Constant(value, lineno, col_offset, end_lineno, end_col_offset,
arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Attribute_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty value;
identifier attr;
expr_context_ty ctx;
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from Attribute");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &value, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_attr, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"attr\" missing from Attribute");
return 1;
}
else {
int res;
res = obj2ast_identifier(tmp, &attr, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_ctx, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"ctx\" missing from Attribute");
return 1;
}
else {
int res;
res = obj2ast_expr_context(tmp, &ctx, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Attribute(value, attr, ctx, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Subscript_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty value;
slice_ty slice;
expr_context_ty ctx;
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from Subscript");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &value, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_slice, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"slice\" missing from Subscript");
return 1;
}
else {
int res;
res = obj2ast_slice(tmp, &slice, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_ctx, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"ctx\" missing from Subscript");
return 1;
}
else {
int res;
res = obj2ast_expr_context(tmp, &ctx, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Subscript(value, slice, ctx, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Starred_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty value;
expr_context_ty ctx;
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from Starred");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &value, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_ctx, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"ctx\" missing from Starred");
return 1;
}
else {
int res;
res = obj2ast_expr_context(tmp, &ctx, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Starred(value, ctx, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Name_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
identifier id;
expr_context_ty ctx;
if (_PyObject_LookupAttrId(obj, &PyId_id, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"id\" missing from Name");
return 1;
}
else {
int res;
res = obj2ast_identifier(tmp, &id, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_ctx, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"ctx\" missing from Name");
return 1;
}
else {
int res;
res = obj2ast_expr_context(tmp, &ctx, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Name(id, ctx, lineno, col_offset, end_lineno, end_col_offset,
arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)List_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* elts;
expr_context_ty ctx;
if (_PyObject_LookupAttrId(obj, &PyId_elts, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"elts\" missing from List");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "List field \"elts\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
elts = _Py_asdl_seq_new(len, arena);
if (elts == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "List field \"elts\" changed size during iteration");
goto failed;
}
asdl_seq_SET(elts, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_ctx, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"ctx\" missing from List");
return 1;
}
else {
int res;
res = obj2ast_expr_context(tmp, &ctx, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = List(elts, ctx, lineno, col_offset, end_lineno, end_col_offset,
arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Tuple_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* elts;
expr_context_ty ctx;
if (_PyObject_LookupAttrId(obj, &PyId_elts, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"elts\" missing from Tuple");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "Tuple field \"elts\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
elts = _Py_asdl_seq_new(len, arena);
if (elts == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "Tuple field \"elts\" changed size during iteration");
goto failed;
}
asdl_seq_SET(elts, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_ctx, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"ctx\" missing from Tuple");
return 1;
}
else {
int res;
res = obj2ast_expr_context(tmp, &ctx, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = Tuple(elts, ctx, lineno, col_offset, end_lineno, end_col_offset,
arena);
if (*out == NULL) goto failed;
return 0;
}
PyErr_Format(PyExc_TypeError, "expected some sort of expr, but got %R", obj);
failed:
Py_XDECREF(tmp);
return 1;
}
int
obj2ast_expr_context(PyObject* obj, expr_context_ty* out, PyArena* arena)
{
int isinstance;
isinstance = PyObject_IsInstance(obj, (PyObject *)Load_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Load;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)Store_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Store;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)Del_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Del;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)AugLoad_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = AugLoad;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)AugStore_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = AugStore;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)Param_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Param;
return 0;
}
PyErr_Format(PyExc_TypeError, "expected some sort of expr_context, but got %R", obj);
return 1;
}
int
obj2ast_slice(PyObject* obj, slice_ty* out, PyArena* arena)
{
int isinstance;
PyObject *tmp = NULL;
if (obj == Py_None) {
*out = NULL;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Slice_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty lower;
expr_ty upper;
expr_ty step;
if (_PyObject_LookupAttrId(obj, &PyId_lower, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
lower = NULL;
}
else {
int res;
res = obj2ast_expr(tmp, &lower, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_upper, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
upper = NULL;
}
else {
int res;
res = obj2ast_expr(tmp, &upper, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_step, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
step = NULL;
}
else {
int res;
res = obj2ast_expr(tmp, &step, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
*out = Slice(lower, upper, step, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)ExtSlice_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
asdl_seq* dims;
if (_PyObject_LookupAttrId(obj, &PyId_dims, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"dims\" missing from ExtSlice");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "ExtSlice field \"dims\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
dims = _Py_asdl_seq_new(len, arena);
if (dims == NULL) goto failed;
for (i = 0; i < len; i++) {
slice_ty val;
res = obj2ast_slice(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "ExtSlice field \"dims\" changed size during iteration");
goto failed;
}
asdl_seq_SET(dims, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
2009-12-13 09:24:58 +08:00
}
*out = ExtSlice(dims, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Index_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty value;
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from Index");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &value, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
*out = Index(value, arena);
if (*out == NULL) goto failed;
return 0;
}
PyErr_Format(PyExc_TypeError, "expected some sort of slice, but got %R", obj);
failed:
Py_XDECREF(tmp);
return 1;
}
int
obj2ast_boolop(PyObject* obj, boolop_ty* out, PyArena* arena)
{
int isinstance;
isinstance = PyObject_IsInstance(obj, (PyObject *)And_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = And;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)Or_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Or;
return 0;
}
PyErr_Format(PyExc_TypeError, "expected some sort of boolop, but got %R", obj);
return 1;
}
int
obj2ast_operator(PyObject* obj, operator_ty* out, PyArena* arena)
{
int isinstance;
isinstance = PyObject_IsInstance(obj, (PyObject *)Add_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Add;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)Sub_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Sub;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)Mult_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Mult;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)MatMult_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = MatMult;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)Div_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Div;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)Mod_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Mod;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)Pow_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Pow;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)LShift_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = LShift;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)RShift_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = RShift;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)BitOr_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = BitOr;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)BitXor_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = BitXor;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)BitAnd_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = BitAnd;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)FloorDiv_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = FloorDiv;
return 0;
}
PyErr_Format(PyExc_TypeError, "expected some sort of operator, but got %R", obj);
return 1;
}
int
obj2ast_unaryop(PyObject* obj, unaryop_ty* out, PyArena* arena)
{
int isinstance;
isinstance = PyObject_IsInstance(obj, (PyObject *)Invert_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Invert;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)Not_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Not;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)UAdd_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = UAdd;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)USub_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = USub;
return 0;
}
PyErr_Format(PyExc_TypeError, "expected some sort of unaryop, but got %R", obj);
return 1;
}
int
obj2ast_cmpop(PyObject* obj, cmpop_ty* out, PyArena* arena)
{
int isinstance;
isinstance = PyObject_IsInstance(obj, (PyObject *)Eq_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Eq;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)NotEq_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = NotEq;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)Lt_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Lt;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)LtE_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = LtE;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)Gt_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Gt;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)GtE_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = GtE;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)Is_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = Is;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)IsNot_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = IsNot;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)In_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = In;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject *)NotIn_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
*out = NotIn;
return 0;
}
PyErr_Format(PyExc_TypeError, "expected some sort of cmpop, but got %R", obj);
return 1;
}
int
obj2ast_comprehension(PyObject* obj, comprehension_ty* out, PyArena* arena)
{
PyObject* tmp = NULL;
expr_ty target;
expr_ty iter;
asdl_seq* ifs;
int is_async;
if (_PyObject_LookupAttrId(obj, &PyId_target, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"target\" missing from comprehension");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &target, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_iter, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"iter\" missing from comprehension");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &iter, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_ifs, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"ifs\" missing from comprehension");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "comprehension field \"ifs\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
ifs = _Py_asdl_seq_new(len, arena);
if (ifs == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "comprehension field \"ifs\" changed size during iteration");
goto failed;
}
asdl_seq_SET(ifs, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_is_async, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"is_async\" missing from comprehension");
return 1;
}
else {
int res;
res = obj2ast_int(tmp, &is_async, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
*out = comprehension(target, iter, ifs, is_async, arena);
return 0;
failed:
Py_XDECREF(tmp);
return 1;
}
int
obj2ast_excepthandler(PyObject* obj, excepthandler_ty* out, PyArena* arena)
{
int isinstance;
PyObject *tmp = NULL;
int lineno;
int col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
int end_lineno;
int end_col_offset;
if (obj == Py_None) {
*out = NULL;
return 0;
}
if (_PyObject_LookupAttrId(obj, &PyId_lineno, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"lineno\" missing from excepthandler");
return 1;
}
else {
int res;
res = obj2ast_int(tmp, &lineno, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_col_offset, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"col_offset\" missing from excepthandler");
return 1;
}
else {
int res;
res = obj2ast_int(tmp, &col_offset, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
if (_PyObject_LookupAttrId(obj, &PyId_end_lineno, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
end_lineno = 0;
}
else {
int res;
res = obj2ast_int(tmp, &end_lineno, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_end_col_offset, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
end_col_offset = 0;
}
else {
int res;
res = obj2ast_int(tmp, &end_col_offset, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
isinstance = PyObject_IsInstance(obj, (PyObject*)ExceptHandler_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
expr_ty type;
identifier name;
asdl_seq* body;
if (_PyObject_LookupAttrId(obj, &PyId_type, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
type = NULL;
}
else {
int res;
res = obj2ast_expr(tmp, &type, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_name, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
name = NULL;
}
else {
int res;
res = obj2ast_identifier(tmp, &name, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_body, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"body\" missing from ExceptHandler");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "ExceptHandler field \"body\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
body = _Py_asdl_seq_new(len, arena);
if (body == NULL) goto failed;
for (i = 0; i < len; i++) {
stmt_ty val;
res = obj2ast_stmt(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "ExceptHandler field \"body\" changed size during iteration");
goto failed;
}
asdl_seq_SET(body, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
*out = ExceptHandler(type, name, body, lineno, col_offset, end_lineno,
end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
PyErr_Format(PyExc_TypeError, "expected some sort of excepthandler, but got %R", obj);
failed:
Py_XDECREF(tmp);
return 1;
}
int
obj2ast_arguments(PyObject* obj, arguments_ty* out, PyArena* arena)
{
PyObject* tmp = NULL;
asdl_seq* args;
arg_ty vararg;
asdl_seq* kwonlyargs;
asdl_seq* kw_defaults;
arg_ty kwarg;
asdl_seq* defaults;
if (_PyObject_LookupAttrId(obj, &PyId_args, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"args\" missing from arguments");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "arguments field \"args\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
args = _Py_asdl_seq_new(len, arena);
if (args == NULL) goto failed;
for (i = 0; i < len; i++) {
arg_ty val;
res = obj2ast_arg(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "arguments field \"args\" changed size during iteration");
goto failed;
}
asdl_seq_SET(args, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_vararg, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
vararg = NULL;
}
else {
int res;
res = obj2ast_arg(tmp, &vararg, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_kwonlyargs, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"kwonlyargs\" missing from arguments");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "arguments field \"kwonlyargs\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
kwonlyargs = _Py_asdl_seq_new(len, arena);
if (kwonlyargs == NULL) goto failed;
for (i = 0; i < len; i++) {
arg_ty val;
res = obj2ast_arg(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "arguments field \"kwonlyargs\" changed size during iteration");
goto failed;
}
asdl_seq_SET(kwonlyargs, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_kw_defaults, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"kw_defaults\" missing from arguments");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "arguments field \"kw_defaults\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
kw_defaults = _Py_asdl_seq_new(len, arena);
if (kw_defaults == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "arguments field \"kw_defaults\" changed size during iteration");
goto failed;
}
asdl_seq_SET(kw_defaults, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_kwarg, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
kwarg = NULL;
}
else {
int res;
res = obj2ast_arg(tmp, &kwarg, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_defaults, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"defaults\" missing from arguments");
return 1;
}
else {
int res;
Py_ssize_t len;
Py_ssize_t i;
if (!PyList_Check(tmp)) {
PyErr_Format(PyExc_TypeError, "arguments field \"defaults\" must be a list, not a %.200s", tmp->ob_type->tp_name);
goto failed;
}
len = PyList_GET_SIZE(tmp);
defaults = _Py_asdl_seq_new(len, arena);
if (defaults == NULL) goto failed;
for (i = 0; i < len; i++) {
expr_ty val;
res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &val, arena);
if (res != 0) goto failed;
if (len != PyList_GET_SIZE(tmp)) {
PyErr_SetString(PyExc_RuntimeError, "arguments field \"defaults\" changed size during iteration");
goto failed;
}
asdl_seq_SET(defaults, i, val);
}
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
*out = arguments(args, vararg, kwonlyargs, kw_defaults, kwarg, defaults,
arena);
return 0;
failed:
Py_XDECREF(tmp);
return 1;
}
int
obj2ast_arg(PyObject* obj, arg_ty* out, PyArena* arena)
{
PyObject* tmp = NULL;
identifier arg;
expr_ty annotation;
string type_comment;
int lineno;
int col_offset;
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
int end_lineno;
int end_col_offset;
if (_PyObject_LookupAttrId(obj, &PyId_arg, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"arg\" missing from arg");
return 1;
}
else {
int res;
res = obj2ast_identifier(tmp, &arg, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_annotation, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
annotation = NULL;
}
else {
int res;
res = obj2ast_expr(tmp, &annotation, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_type_comment, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
type_comment = NULL;
}
else {
int res;
res = obj2ast_string(tmp, &type_comment, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_lineno, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"lineno\" missing from arg");
return 1;
}
else {
int res;
res = obj2ast_int(tmp, &lineno, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_col_offset, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"col_offset\" missing from arg");
return 1;
}
else {
int res;
res = obj2ast_int(tmp, &col_offset, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 19:18:22 +08:00
if (_PyObject_LookupAttrId(obj, &PyId_end_lineno, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
end_lineno = 0;
}
else {
int res;
res = obj2ast_int(tmp, &end_lineno, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_end_col_offset, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
end_col_offset = 0;
}
else {
int res;
res = obj2ast_int(tmp, &end_col_offset, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
*out = arg(arg, annotation, type_comment, lineno, col_offset, end_lineno,
end_col_offset, arena);
return 0;
failed:
Py_XDECREF(tmp);
return 1;
}
int
obj2ast_keyword(PyObject* obj, keyword_ty* out, PyArena* arena)
{
PyObject* tmp = NULL;
identifier arg;
expr_ty value;
if (_PyObject_LookupAttrId(obj, &PyId_arg, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
arg = NULL;
}
else {
int res;
res = obj2ast_identifier(tmp, &arg, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from keyword");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &value, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
*out = keyword(arg, value, arena);
return 0;
failed:
Py_XDECREF(tmp);
return 1;
}
int
obj2ast_alias(PyObject* obj, alias_ty* out, PyArena* arena)
{
PyObject* tmp = NULL;
identifier name;
identifier asname;
if (_PyObject_LookupAttrId(obj, &PyId_name, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"name\" missing from alias");
return 1;
}
else {
int res;
res = obj2ast_identifier(tmp, &name, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_asname, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
asname = NULL;
}
else {
int res;
res = obj2ast_identifier(tmp, &asname, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
*out = alias(name, asname, arena);
return 0;
failed:
Py_XDECREF(tmp);
return 1;
}
int
obj2ast_withitem(PyObject* obj, withitem_ty* out, PyArena* arena)
{
PyObject* tmp = NULL;
expr_ty context_expr;
expr_ty optional_vars;
if (_PyObject_LookupAttrId(obj, &PyId_context_expr, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"context_expr\" missing from withitem");
return 1;
}
else {
int res;
res = obj2ast_expr(tmp, &context_expr, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
if (_PyObject_LookupAttrId(obj, &PyId_optional_vars, &tmp) < 0) {
return 1;
}
if (tmp == NULL || tmp == Py_None) {
Py_CLEAR(tmp);
optional_vars = NULL;
}
else {
int res;
res = obj2ast_expr(tmp, &optional_vars, arena);
if (res != 0) goto failed;
2013-07-27 06:03:47 +08:00
Py_CLEAR(tmp);
}
*out = withitem(context_expr, optional_vars, arena);
return 0;
failed:
Py_XDECREF(tmp);
return 1;
}
int
obj2ast_type_ignore(PyObject* obj, type_ignore_ty* out, PyArena* arena)
{
int isinstance;
PyObject *tmp = NULL;
if (obj == Py_None) {
*out = NULL;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)TypeIgnore_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
int lineno;
if (_PyObject_LookupAttrId(obj, &PyId_lineno, &tmp) < 0) {
return 1;
}
if (tmp == NULL) {
PyErr_SetString(PyExc_TypeError, "required field \"lineno\" missing from TypeIgnore");
return 1;
}
else {
int res;
res = obj2ast_int(tmp, &lineno, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
*out = TypeIgnore(lineno, arena);
if (*out == NULL) goto failed;
return 0;
}
PyErr_Format(PyExc_TypeError, "expected some sort of type_ignore, but got %R", obj);
failed:
Py_XDECREF(tmp);
return 1;
}
static struct PyModuleDef _astmodule = {
PyModuleDef_HEAD_INIT, "_ast"
};
PyMODINIT_FUNC
PyInit__ast(void)
{
PyObject *m, *d;
if (!init_types()) return NULL;
m = PyModule_Create(&_astmodule);
if (!m) return NULL;
d = PyModule_GetDict(m);
if (PyDict_SetItemString(d, "AST", (PyObject*)&AST_type) < 0) return NULL;
if (PyModule_AddIntMacro(m, PyCF_ONLY_AST) < 0)
return NULL;
if (PyModule_AddIntMacro(m, PyCF_TYPE_COMMENTS) < 0)
return NULL;
if (PyDict_SetItemString(d, "mod", (PyObject*)mod_type) < 0) return NULL;
if (PyDict_SetItemString(d, "Module", (PyObject*)Module_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Interactive", (PyObject*)Interactive_type) <
0) return NULL;
if (PyDict_SetItemString(d, "Expression", (PyObject*)Expression_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "FunctionType", (PyObject*)FunctionType_type) <
0) return NULL;
if (PyDict_SetItemString(d, "Suite", (PyObject*)Suite_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "stmt", (PyObject*)stmt_type) < 0) return NULL;
if (PyDict_SetItemString(d, "FunctionDef", (PyObject*)FunctionDef_type) <
0) return NULL;
if (PyDict_SetItemString(d, "AsyncFunctionDef",
(PyObject*)AsyncFunctionDef_type) < 0) return NULL;
if (PyDict_SetItemString(d, "ClassDef", (PyObject*)ClassDef_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "Return", (PyObject*)Return_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Delete", (PyObject*)Delete_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Assign", (PyObject*)Assign_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "AugAssign", (PyObject*)AugAssign_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "AnnAssign", (PyObject*)AnnAssign_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "For", (PyObject*)For_type) < 0) return NULL;
if (PyDict_SetItemString(d, "AsyncFor", (PyObject*)AsyncFor_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "While", (PyObject*)While_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "If", (PyObject*)If_type) < 0) return NULL;
if (PyDict_SetItemString(d, "With", (PyObject*)With_type) < 0) return NULL;
if (PyDict_SetItemString(d, "AsyncWith", (PyObject*)AsyncWith_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "Raise", (PyObject*)Raise_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Try", (PyObject*)Try_type) < 0) return NULL;
if (PyDict_SetItemString(d, "Assert", (PyObject*)Assert_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Import", (PyObject*)Import_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "ImportFrom", (PyObject*)ImportFrom_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "Global", (PyObject*)Global_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Nonlocal", (PyObject*)Nonlocal_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "Expr", (PyObject*)Expr_type) < 0) return NULL;
if (PyDict_SetItemString(d, "Pass", (PyObject*)Pass_type) < 0) return NULL;
if (PyDict_SetItemString(d, "Break", (PyObject*)Break_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Continue", (PyObject*)Continue_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "expr", (PyObject*)expr_type) < 0) return NULL;
if (PyDict_SetItemString(d, "BoolOp", (PyObject*)BoolOp_type) < 0) return
NULL;
bpo-35224: PEP 572 Implementation (#10497) * Add tokenization of := - Add token to Include/token.h. Add token to documentation in Doc/library/token.rst. - Run `./python Lib/token.py` to regenerate Lib/token.py. - Update Parser/tokenizer.c: add case to handle `:=`. * Add initial usage of := in grammar. * Update Python.asdl to match the grammar updates. Regenerated Include/Python-ast.h and Python/Python-ast.c * Update AST and compiler files in Python/ast.c and Python/compile.c. Basic functionality, this isn't scoped properly * Regenerate Lib/symbol.py using `./python Lib/symbol.py` * Tests - Fix failing tests in test_parser.py due to changes in token numbers for internal representation * Tests - Add simple test for := token * Tests - Add simple tests for named expressions using expr and suite * Tests - Update number of levels for nested expressions to prevent stack overflow * Update symbol table to handle NamedExpr * Update Grammar to allow assignment expressions in if statements. Regenerate Python/graminit.c accordingly using `make regen-grammar` * Tests - Add additional tests for named expressions in RoundtripLegalSyntaxTestCase, based on examples and information directly from PEP 572 Note: failing tests are currently commented out (4 out of 24 tests currently fail) * Tests - Add temporary syntax test failure tests in test_parser.py Note: There is an outstanding TODO for this -- syntax tests need to be moved to a different file (presumably test_syntax.py), but this is covering what needs to be tested at the moment, and it's more convenient to run a single test for the time being * Add support for allowing assignment expressions as function argument annotations. Uncomment tests for these cases because they all pass now! * Tests - Move existing syntax tests out of test_parser.py and into test_named_expressions.py. Refactor syntax tests to use unittest * Add TargetScopeError exception to extend SyntaxError Note: This simply creates the TargetScopeError exception, it is not yet used anywhere * Tests - Update tests per PEP 572 Continue refactoring test suite: The named expression test suite now checks for any invalid cases that throw exceptions (no longer limited to SyntaxErrors), assignment tests to ensure that variables are properly assigned, and scope tests to ensure that variable availability and values are correct Note: - There are still tests that are marked to skip, as they are not yet implemented - There are approximately 300 lines of the PEP that have not yet been addressed, though these may be deferred * Documentation - Small updates to XXX/todo comments - Remove XXX from child description in ast.c - Add comment with number of previously supported nested expressions for 3.7.X in test_parser.py * Fix assert in seq_for_testlist() * Cleanup - Denote "Not implemented -- No keyword args" on failing test case. Fix PEP8 error for blank lines at beginning of test classes in test_parser.py * Tests - Wrap all file opens in `with...as` to ensure files are closed * WIP: handle f(a := 1) * Tests and Cleanup - No longer skips keyword arg test. Keyword arg test now uses a simpler test case and does not rely on an external file. Remove print statements from ast.c * Tests - Refactor last remaining test case that relied on on external file to use a simpler test case without the dependency * Tests - Add better description of remaning skipped tests. Add test checking scope when using assignment expression in a function argument * Tests - Add test for nested comprehension, testing value and scope. Fix variable name in skipped comprehension scope test * Handle restriction of LHS for named expressions - can only assign to LHS of type NAME. Specifically, restrict assignment to tuples This adds an alternative set_context specifically for named expressions, set_namedexpr_context. Thus, context is now set differently for standard assignment versus assignment for named expressions in order to handle restrictions. * Tests - Update negative test case for assigning to lambda to match new error message. Add negative test case for assigning to tuple * Tests - Reorder test cases to group invalid syntax cases and named assignment target errors * Tests - Update test case for named expression in function argument - check that result and variable are set correctly * Todo - Add todo for TargetScopeError based on Guido's comment (https://github.com/python/cpython/commit/2b3acd37bdfc2d35e5094228c6684050d2aa8b0a#r30472562) * Tests - Add named expression tests for assignment operator in function arguments Note: One of two tests are skipped, as function arguments are currently treating an assignment expression inside of parenthesis as one child, which does not properly catch the named expression, nor does it count arguments properly * Add NamedStore to expr_context. Regenerate related code with `make regen-ast` * Add usage of NamedStore to ast_for_named_expr in ast.c. Update occurances of checking for Store to also handle NamedStore where appropriate * Add ste_comprehension to _symtable_entry to track if the namespace is a comprehension. Initialize ste_comprehension to 0. Set set_comprehension to 1 in symtable_handle_comprehension * s/symtable_add_def/symtable_add_def_helper. Add symtable_add_def to handle grabbing st->st_cur and passing it to symtable_add_def_helper. This now allows us to call the original code from symtable_add_def by instead calling symtable_add_def_helper with a different ste. * Refactor symtable_record_directive to take lineno and col_offset as arguments instead of stmt_ty. This allows symtable_record_directive to be used for stmt_ty and expr_ty * Handle elevating scope for named expressions in comprehensions. * Handle error for usage of named expression inside a class block * Tests - No longer skip scope tests. Add additional scope tests * Cleanup - Update error message for named expression within a comprehension within a class. Update comments. Add assert for symtable_extend_namedexpr_scope to validate that we always find at least a ModuleScope if we don't find a Class or FunctionScope * Cleanup - Add missing case for NamedStore in expr_context_name. Remove unused var in set_namedexpr_content * Refactor - Consolidate set_context and set_namedexpr_context to reduce duplicated code. Special cases for named expressions are handled by checking if ctx is NamedStore * Cleanup - Add additional use cases for ast_for_namedexpr in usage comment. Fix multiple blank lines in test_named_expressions * Tests - Remove unnecessary test case. Renumber test case function names * Remove TargetScopeError for now. Will add back if needed * Cleanup - Small comment nit for consistency * Handle positional argument check with named expression * Add TargetScopeError exception definition. Add documentation for TargetScopeError in c-api docs. Throw TargetScopeError instead of SyntaxError when using a named expression in a comprehension within a class scope * Increase stack size for parser by 200. This is a minimal change (approx. 5kb) and should not have an impact on any systems. Update parser test to allow 99 nested levels again * Add TargetScopeError to exception_hierarchy.txt for test_baseexception.py_ * Tests - Major update for named expression tests, both in test_named_expressions and test_parser - Add test for TargetScopeError - Add tests for named expressions in comprehension scope and edge cases - Add tests for named expressions in function arguments (declarations and call sites) - Reorganize tests to group them more logically * Cleanup - Remove unnecessary comment * Cleanup - Comment nitpicks * Explicitly disallow assignment expressions to a name inside parentheses, e.g.: ((x) := 0) - Add check for LHS types to detect a parenthesis then a name (see note) - Add test for this scenario - Update tests for changed error message for named assignment to a tuple (also, see note) Note: This caused issues with the previous error handling for named assignment to a LHS that contained an expression, such as a tuple. Thus, the check for the LHS of a named expression must be changed to be more specific if we wish to maintain the previous error messages * Cleanup - Wrap lines more strictly in test file * Revert "Explicitly disallow assignment expressions to a name inside parentheses, e.g.: ((x) := 0)" This reverts commit f1531400ca7d7a2d148830c8ac703f041740896d. * Add NEWS.d entry * Tests - Fix error in test_pickle.test_exceptions by adding TargetScopeError to list of exceptions * Tests - Update error message tests to reflect improved messaging convention (s/can't/cannot) * Remove cases that cannot be reached in compile.c. Small linting update. * Update Grammar/Tokens to add COLONEQUAL. Regenerate all files * Update TargetScopeError PRE_INIT and POST_INIT, as this was purposefully left out when fixing rebase conflicts * Add NamedStore back and regenerate files * Pass along line number and end col info for named expression * Simplify News entry * Fix compiler warning and explicity mark fallthrough
2019-01-25 07:49:56 +08:00
if (PyDict_SetItemString(d, "NamedExpr", (PyObject*)NamedExpr_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "BinOp", (PyObject*)BinOp_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "UnaryOp", (PyObject*)UnaryOp_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Lambda", (PyObject*)Lambda_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "IfExp", (PyObject*)IfExp_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Dict", (PyObject*)Dict_type) < 0) return NULL;
if (PyDict_SetItemString(d, "Set", (PyObject*)Set_type) < 0) return NULL;
if (PyDict_SetItemString(d, "ListComp", (PyObject*)ListComp_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "SetComp", (PyObject*)SetComp_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "DictComp", (PyObject*)DictComp_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "GeneratorExp", (PyObject*)GeneratorExp_type) <
0) return NULL;
if (PyDict_SetItemString(d, "Await", (PyObject*)Await_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Yield", (PyObject*)Yield_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "YieldFrom", (PyObject*)YieldFrom_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "Compare", (PyObject*)Compare_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Call", (PyObject*)Call_type) < 0) return NULL;
if (PyDict_SetItemString(d, "FormattedValue",
(PyObject*)FormattedValue_type) < 0) return NULL;
if (PyDict_SetItemString(d, "JoinedStr", (PyObject*)JoinedStr_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "Constant", (PyObject*)Constant_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "Attribute", (PyObject*)Attribute_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "Subscript", (PyObject*)Subscript_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "Starred", (PyObject*)Starred_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Name", (PyObject*)Name_type) < 0) return NULL;
if (PyDict_SetItemString(d, "List", (PyObject*)List_type) < 0) return NULL;
if (PyDict_SetItemString(d, "Tuple", (PyObject*)Tuple_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "expr_context", (PyObject*)expr_context_type) <
0) return NULL;
if (PyDict_SetItemString(d, "Load", (PyObject*)Load_type) < 0) return NULL;
if (PyDict_SetItemString(d, "Store", (PyObject*)Store_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Del", (PyObject*)Del_type) < 0) return NULL;
if (PyDict_SetItemString(d, "AugLoad", (PyObject*)AugLoad_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "AugStore", (PyObject*)AugStore_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "Param", (PyObject*)Param_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "slice", (PyObject*)slice_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Slice", (PyObject*)Slice_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "ExtSlice", (PyObject*)ExtSlice_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "Index", (PyObject*)Index_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "boolop", (PyObject*)boolop_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "And", (PyObject*)And_type) < 0) return NULL;
if (PyDict_SetItemString(d, "Or", (PyObject*)Or_type) < 0) return NULL;
if (PyDict_SetItemString(d, "operator", (PyObject*)operator_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "Add", (PyObject*)Add_type) < 0) return NULL;
if (PyDict_SetItemString(d, "Sub", (PyObject*)Sub_type) < 0) return NULL;
if (PyDict_SetItemString(d, "Mult", (PyObject*)Mult_type) < 0) return NULL;
if (PyDict_SetItemString(d, "MatMult", (PyObject*)MatMult_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Div", (PyObject*)Div_type) < 0) return NULL;
if (PyDict_SetItemString(d, "Mod", (PyObject*)Mod_type) < 0) return NULL;
if (PyDict_SetItemString(d, "Pow", (PyObject*)Pow_type) < 0) return NULL;
if (PyDict_SetItemString(d, "LShift", (PyObject*)LShift_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "RShift", (PyObject*)RShift_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "BitOr", (PyObject*)BitOr_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "BitXor", (PyObject*)BitXor_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "BitAnd", (PyObject*)BitAnd_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "FloorDiv", (PyObject*)FloorDiv_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "unaryop", (PyObject*)unaryop_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Invert", (PyObject*)Invert_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Not", (PyObject*)Not_type) < 0) return NULL;
if (PyDict_SetItemString(d, "UAdd", (PyObject*)UAdd_type) < 0) return NULL;
if (PyDict_SetItemString(d, "USub", (PyObject*)USub_type) < 0) return NULL;
if (PyDict_SetItemString(d, "cmpop", (PyObject*)cmpop_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Eq", (PyObject*)Eq_type) < 0) return NULL;
if (PyDict_SetItemString(d, "NotEq", (PyObject*)NotEq_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "Lt", (PyObject*)Lt_type) < 0) return NULL;
if (PyDict_SetItemString(d, "LtE", (PyObject*)LtE_type) < 0) return NULL;
if (PyDict_SetItemString(d, "Gt", (PyObject*)Gt_type) < 0) return NULL;
if (PyDict_SetItemString(d, "GtE", (PyObject*)GtE_type) < 0) return NULL;
if (PyDict_SetItemString(d, "Is", (PyObject*)Is_type) < 0) return NULL;
if (PyDict_SetItemString(d, "IsNot", (PyObject*)IsNot_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "In", (PyObject*)In_type) < 0) return NULL;
if (PyDict_SetItemString(d, "NotIn", (PyObject*)NotIn_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "comprehension", (PyObject*)comprehension_type)
< 0) return NULL;
if (PyDict_SetItemString(d, "excepthandler", (PyObject*)excepthandler_type)
< 0) return NULL;
if (PyDict_SetItemString(d, "ExceptHandler", (PyObject*)ExceptHandler_type)
< 0) return NULL;
if (PyDict_SetItemString(d, "arguments", (PyObject*)arguments_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "arg", (PyObject*)arg_type) < 0) return NULL;
if (PyDict_SetItemString(d, "keyword", (PyObject*)keyword_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "alias", (PyObject*)alias_type) < 0) return
NULL;
if (PyDict_SetItemString(d, "withitem", (PyObject*)withitem_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "type_ignore", (PyObject*)type_ignore_type) <
0) return NULL;
if (PyDict_SetItemString(d, "TypeIgnore", (PyObject*)TypeIgnore_type) < 0)
return NULL;
return m;
}
PyObject* PyAST_mod2obj(mod_ty t)
{
if (!init_types())
return NULL;
return ast2obj_mod(t);
}
/* mode is 0 for "exec", 1 for "eval" and 2 for "single" input */
mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode)
{
mod_ty res;
PyObject *req_type[3];
char *req_name[] = {"Module", "Expression", "Interactive"};
2009-12-13 09:24:58 +08:00
int isinstance;
2014-02-11 11:41:40 +08:00
req_type[0] = (PyObject*)Module_type;
req_type[1] = (PyObject*)Expression_type;
req_type[2] = (PyObject*)Interactive_type;
2014-02-11 11:41:40 +08:00
assert(0 <= mode && mode <= 2);
if (!init_types())
return NULL;
2009-12-13 09:24:58 +08:00
isinstance = PyObject_IsInstance(ast, req_type[mode]);
if (isinstance == -1)
return NULL;
if (!isinstance) {
PyErr_Format(PyExc_TypeError, "expected %s node, got %.400s",
req_name[mode], Py_TYPE(ast)->tp_name);
return NULL;
}
if (obj2ast_mod(ast, &res, arena) != 0)
return NULL;
else
return res;
}
int PyAST_Check(PyObject* obj)
{
if (!init_types())
return -1;
return PyObject_IsInstance(obj, (PyObject*)&AST_type);
}