From 12360aa159c42c7798fd14225d271e6fd84db7eb Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 14 Feb 2022 17:36:51 -0700 Subject: [PATCH] bpo-46541: Discover the global strings. (gh-31346) Instead of manually enumerating the global strings in generate_global_objects.py, we extrapolate the list from usage of _Py_ID() and _Py_STR() in the source files. This is partly inspired by gh-31261. https://bugs.python.org/issue46541 --- Include/internal/pycore_global_strings.h | 13 +- Include/internal/pycore_runtime_init.h | 11 +- Objects/typeobject.c | 1 + Objects/weakrefobject.c | 4 +- Python/_warnings.c | 4 +- Python/ast_opt.c | 2 + Python/compile.c | 7 + Python/pythonrun.c | 2 + Tools/scripts/generate_global_objects.py | 333 ++++++----------------- 9 files changed, 103 insertions(+), 274 deletions(-) diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 17241b3a3dd..aa597bc8281 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -28,13 +28,6 @@ extern "C" { /* The following is auto-generated by Tools/scripts/generate_global_objects.py. */ struct _Py_global_strings { struct { - STRUCT_FOR_STR(empty, "") - STRUCT_FOR_STR(dot, ".") - STRUCT_FOR_STR(comma_sep, ", ") - STRUCT_FOR_STR(percent, "%") - STRUCT_FOR_STR(dbl_percent, "%%") - - // "anonymous" labels STRUCT_FOR_STR(anon_dictcomp, "") STRUCT_FOR_STR(anon_genexpr, "") STRUCT_FOR_STR(anon_lambda, "") @@ -42,7 +35,12 @@ struct _Py_global_strings { STRUCT_FOR_STR(anon_module, "") STRUCT_FOR_STR(anon_setcomp, "") STRUCT_FOR_STR(anon_string, "") + STRUCT_FOR_STR(comma_sep, ", ") + STRUCT_FOR_STR(dbl_percent, "%%") + STRUCT_FOR_STR(dot, ".") STRUCT_FOR_STR(dot_locals, ".") + STRUCT_FOR_STR(empty, "") + STRUCT_FOR_STR(percent, "%") } literals; struct { @@ -330,6 +328,7 @@ struct _Py_global_strings { #define _Py_STR(NAME) \ (_Py_SINGLETON(strings.literals._ ## NAME._ascii.ob_base)) +#define _Py_DECLARE_STR(name, str) #ifdef __cplusplus } diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 045ae5d2835..04c1e671235 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -644,12 +644,6 @@ extern "C" { \ .strings = { \ .literals = { \ - INIT_STR(empty, ""), \ - INIT_STR(dot, "."), \ - INIT_STR(comma_sep, ", "), \ - INIT_STR(percent, "%"), \ - INIT_STR(dbl_percent, "%%"), \ - \ INIT_STR(anon_dictcomp, ""), \ INIT_STR(anon_genexpr, ""), \ INIT_STR(anon_lambda, ""), \ @@ -657,7 +651,12 @@ extern "C" { INIT_STR(anon_module, ""), \ INIT_STR(anon_setcomp, ""), \ INIT_STR(anon_string, ""), \ + INIT_STR(comma_sep, ", "), \ + INIT_STR(dbl_percent, "%%"), \ + INIT_STR(dot, "."), \ INIT_STR(dot_locals, "."), \ + INIT_STR(empty, ""), \ + INIT_STR(percent, "%"), \ }, \ .identifiers = { \ INIT_ID(Py_Repr), \ diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 3f8f36a9c46..8c4901119de 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -4546,6 +4546,7 @@ object_new(PyTypeObject *type, PyObject *args, PyObject *kwds) Py_DECREF(sorted_methods); return NULL; } + _Py_DECLARE_STR(comma_sep, ", "); joined = PyUnicode_Join(&_Py_STR(comma_sep), sorted_methods); method_count = PyObject_Length(sorted_methods); Py_DECREF(sorted_methods); diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c index 71dfa640ebf..1712533a39d 100644 --- a/Objects/weakrefobject.c +++ b/Objects/weakrefobject.c @@ -458,12 +458,12 @@ proxy_checkref(PyWeakReference *proxy) return res; \ } -#define WRAP_METHOD(method, special) \ +#define WRAP_METHOD(method, SPECIAL) \ static PyObject * \ method(PyObject *proxy, PyObject *Py_UNUSED(ignored)) { \ UNWRAP(proxy); \ Py_INCREF(proxy); \ - PyObject* res = PyObject_CallMethodNoArgs(proxy, &_Py_ID(special)); \ + PyObject* res = PyObject_CallMethodNoArgs(proxy, &_Py_ID(SPECIAL)); \ Py_DECREF(proxy); \ return res; \ } diff --git a/Python/_warnings.c b/Python/_warnings.c index a47e5fef686..03e6ffcee0a 100644 --- a/Python/_warnings.c +++ b/Python/_warnings.c @@ -186,8 +186,8 @@ check_matched(PyInterpreterState *interp, PyObject *obj, PyObject *arg) return rc; } -#define GET_WARNINGS_ATTR(interp, attr, try_import) \ - get_warnings_attr(interp, &_Py_ID(attr), try_import) +#define GET_WARNINGS_ATTR(interp, ATTR, try_import) \ + get_warnings_attr(interp, &_Py_ID(ATTR), try_import) /* Returns a new reference. diff --git a/Python/ast_opt.c b/Python/ast_opt.c index 29113706497..77ed29d0cdd 100644 --- a/Python/ast_opt.c +++ b/Python/ast_opt.c @@ -268,6 +268,8 @@ parse_literal(PyObject *fmt, Py_ssize_t *ppos, PyArena *arena) PyObject *str = PyUnicode_Substring(fmt, start, pos); /* str = str.replace('%%', '%') */ if (str && has_percents) { + _Py_DECLARE_STR(percent, "%"); + _Py_DECLARE_STR(dbl_percent, "%%"); Py_SETREF(str, PyUnicode_Replace(str, &_Py_STR(dbl_percent), &_Py_STR(percent), -1)); } diff --git a/Python/compile.c b/Python/compile.c index ac4960b5df3..1cf20d3a36a 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -667,6 +667,7 @@ compiler_set_qualname(struct compiler *c) || parent->u_scope_type == COMPILER_SCOPE_ASYNC_FUNCTION || parent->u_scope_type == COMPILER_SCOPE_LAMBDA) { + _Py_DECLARE_STR(dot_locals, "."); base = PyUnicode_Concat(parent->u_qualname, &_Py_STR(dot_locals)); if (base == NULL) @@ -2022,6 +2023,7 @@ compiler_mod(struct compiler *c, mod_ty mod) { PyCodeObject *co; int addNone = 1; + _Py_DECLARE_STR(anon_module, ""); if (!compiler_enter_scope(c, &_Py_STR(anon_module), COMPILER_SCOPE_MODULE, mod, 1)) { return NULL; @@ -2876,6 +2878,7 @@ compiler_lambda(struct compiler *c, expr_ty e) return 0; } + _Py_DECLARE_STR(anon_lambda, ""); if (!compiler_enter_scope(c, &_Py_STR(anon_lambda), COMPILER_SCOPE_LAMBDA, (void *)e, e->lineno)) { return 0; @@ -5347,6 +5350,7 @@ static int compiler_genexp(struct compiler *c, expr_ty e) { assert(e->kind == GeneratorExp_kind); + _Py_DECLARE_STR(anon_genexpr, ""); return compiler_comprehension(c, e, COMP_GENEXP, &_Py_STR(anon_genexpr), e->v.GeneratorExp.generators, e->v.GeneratorExp.elt, NULL); @@ -5356,6 +5360,7 @@ static int compiler_listcomp(struct compiler *c, expr_ty e) { assert(e->kind == ListComp_kind); + _Py_DECLARE_STR(anon_listcomp, ""); return compiler_comprehension(c, e, COMP_LISTCOMP, &_Py_STR(anon_listcomp), e->v.ListComp.generators, e->v.ListComp.elt, NULL); @@ -5365,6 +5370,7 @@ static int compiler_setcomp(struct compiler *c, expr_ty e) { assert(e->kind == SetComp_kind); + _Py_DECLARE_STR(anon_setcomp, ""); return compiler_comprehension(c, e, COMP_SETCOMP, &_Py_STR(anon_setcomp), e->v.SetComp.generators, e->v.SetComp.elt, NULL); @@ -5375,6 +5381,7 @@ static int compiler_dictcomp(struct compiler *c, expr_ty e) { assert(e->kind == DictComp_kind); + _Py_DECLARE_STR(anon_dictcomp, ""); return compiler_comprehension(c, e, COMP_DICTCOMP, &_Py_STR(anon_dictcomp), e->v.DictComp.generators, e->v.DictComp.key, e->v.DictComp.value); diff --git a/Python/pythonrun.c b/Python/pythonrun.c index b34a2239182..38ca952838a 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -515,6 +515,7 @@ parse_syntax_error(PyObject *err, PyObject **message, PyObject **filename, goto finally; if (v == Py_None) { Py_DECREF(v); + _Py_DECLARE_STR(anon_string, ""); *filename = &_Py_STR(anon_string); Py_INCREF(*filename); } @@ -1562,6 +1563,7 @@ PyRun_StringFlags(const char *str, int start, PyObject *globals, if (arena == NULL) return NULL; + _Py_DECLARE_STR(anon_string, ""); mod = _PyParser_ASTFromString( str, &_Py_STR(anon_string), start, flags, arena); diff --git a/Tools/scripts/generate_global_objects.py b/Tools/scripts/generate_global_objects.py index 73068894d97..e989f3c086f 100644 --- a/Tools/scripts/generate_global_objects.py +++ b/Tools/scripts/generate_global_objects.py @@ -13,298 +13,112 @@ INTERNAL = os.path.join(ROOT, 'Include', 'internal') STRING_LITERALS = { 'empty': '', 'dot': '.', - 'comma_sep': ', ', - 'percent': '%', - 'dbl_percent': '%%', - - '"anonymous" labels': None, - 'anon_dictcomp': '', - 'anon_genexpr': '', - 'anon_lambda': '', - 'anon_listcomp': '', - 'anon_module': '', - 'anon_setcomp': '', - 'anon_string': '', - 'dot_locals': '.', +} +IGNORED = { + 'ACTION', # Python/_warnings.c + 'ATTR', # Python/_warnings.c and Objects/funcobject.c + 'DUNDER', # Objects/typeobject.c + 'RDUNDER', # Objects/typeobject.c + 'SPECIAL', # Objects/weakrefobject.c } IDENTIFIERS = [ - 'Py_Repr', - 'TextIOWrapper', + # from ADD() Python/_warnings.c + 'default', + 'ignore', + + # from GET_WARNINGS_ATTR() in Python/_warnings.c 'WarningMessage', - '_', - '__IOBase_closed', - '__abc_tpflags__', - '__abs__', - '__abstractmethods__', - '__add__', - '__aenter__', - '__aexit__', - '__aiter__', - '__all__', - '__and__', - '__anext__', - '__annotations__', - '__args__', - '__await__', - '__bases__', - '__bool__', - '__build_class__', - '__builtins__', + '_showwarnmsg', + '_warn_unawaited_coroutine', + 'defaultaction', + 'filters', + 'onceregistry', + + # from WRAP_METHOD() in Objects/weakrefobject.c '__bytes__', - '__call__', - '__cantrace__', - '__class__', - '__class_getitem__', - '__classcell__', - '__complex__', - '__contains__', - '__copy__', - '__del__', - '__delattr__', - '__delete__', - '__delitem__', - '__dict__', - '__dir__', - '__divmod__', + '__reversed__', + + # from COPY_ATTR() in Objects/funcobject.c + '__module__', + '__name__', + '__qualname__', '__doc__', - '__enter__', - '__eq__', - '__exit__', - '__file__', + '__annotations__', + + # from SLOT* in Objects/typeobject.c + '__abs__', + '__add__', + '__and__', + '__divmod__', '__float__', '__floordiv__', - '__format__', - '__fspath__', - '__ge__', - '__get__', - '__getattr__', - '__getattribute__', - '__getinitargs__', '__getitem__', - '__getnewargs__', - '__getnewargs_ex__', - '__getstate__', - '__gt__', - '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__ilshift__', '__imatmul__', '__imod__', - '__import__', '__imul__', - '__index__', - '__init__', - '__init_subclass__', - '__instancecheck__', '__int__', '__invert__', '__ior__', - '__ipow__', '__irshift__', - '__isabstractmethod__', '__isub__', - '__iter__', '__itruediv__', '__ixor__', - '__le__', - '__len__', - '__length_hint__', - '__loader__', '__lshift__', - '__lt__', - '__ltrace__', - '__main__', '__matmul__', - '__missing__', '__mod__', - '__module__', - '__mro_entries__', '__mul__', - '__name__', - '__ne__', '__neg__', - '__new__', - '__newobj__', - '__newobj_ex__', - '__next__', - '__note__', '__or__', - '__origin__', - '__package__', - '__parameters__', - '__path__', '__pos__', '__pow__', - '__prepare__', - '__qualname__', '__radd__', '__rand__', '__rdivmod__', - '__reduce__', - '__reduce_ex__', - '__repr__', - '__reversed__', '__rfloordiv__', '__rlshift__', '__rmatmul__', '__rmod__', '__rmul__', '__ror__', - '__round__', '__rpow__', '__rrshift__', '__rshift__', '__rsub__', '__rtruediv__', '__rxor__', - '__set__', - '__set_name__', - '__setattr__', - '__setitem__', - '__setstate__', - '__sizeof__', - '__slotnames__', - '__slots__', - '__spec__', '__str__', '__sub__', - '__subclasscheck__', - '__subclasshook__', '__truediv__', - '__trunc__', - '__warningregistry__', - '__weakref__', '__xor__', - '_abc_impl', - '_blksize', - '_dealloc_warn', - '_finalizing', - '_find_and_load', - '_fix_up_module', - '_get_sourcefile', - '_handle_fromlist', - '_initializing', - '_is_text_encoding', - '_lock_unlock_module', - '_showwarnmsg', - '_shutdown', - '_slotnames', - '_strptime_time', - '_uninitialized_submodules', - '_warn_unawaited_coroutine', - '_xoptions', - 'add', - 'append', - 'big', - 'buffer', - 'builtins', - 'clear', - 'close', - 'code', - 'copy', - 'copyreg', - 'decode', - 'default', - 'defaultaction', - 'difference_update', - 'dispatch_table', - 'displayhook', - 'enable', - 'encoding', - 'end_lineno', - 'end_offset', - 'errors', - 'excepthook', - 'extend', - 'filename', - 'fileno', - 'fillvalue', - 'filters', - 'find_class', - 'flush', - 'get', - 'get_source', - 'getattr', - 'ignore', - 'importlib', - 'intersection', - 'isatty', - 'items', - 'iter', - 'keys', - 'last_traceback', - 'last_type', - 'last_value', - 'latin1', - 'lineno', - 'little', - 'match', - 'metaclass', - 'mode', - 'modules', - 'mro', - 'msg', - 'n_fields', - 'n_sequence_fields', - 'n_unnamed_fields', - 'name', - 'obj', - 'offset', - 'onceregistry', - 'open', - 'parent', - 'partial', - 'path', - 'peek', - 'persistent_id', - 'persistent_load', - 'print_file_and_line', - 'ps1', - 'ps2', - 'raw', - 'read', - 'read1', - 'readable', - 'readall', - 'readinto', - 'readinto1', - 'readline', - 'reducer_override', - 'reload', - 'replace', - 'reset', - 'return', - 'reversed', - 'seek', - 'seekable', - 'send', - 'setstate', - 'sort', - 'stderr', - 'stdin', - 'stdout', - 'strict', - 'symmetric_difference_update', - 'tell', - 'text', - 'threading', - 'throw', - 'unraisablehook', - 'values', - 'version', - 'warnings', - 'warnoptions', - 'writable', - 'write', - 'zipimporter', ] ####################################### # helpers +def iter_global_strings(): + id_regex = re.compile(r'\b_Py_ID\((\w+)\)') + str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)') + for dirname, _, files in os.walk(ROOT): + if os.path.relpath(dirname, ROOT).startswith('Include'): + continue + for name in files: + if not name.endswith(('.c', '.h')): + continue + filename = os.path.join(dirname, name) + with open(os.path.join(filename), encoding='utf-8') as infile: + for lno, line in enumerate(infile, 1): + for m in id_regex.finditer(line): + identifier, = m.groups() + yield identifier, None, filename, lno, line + for m in str_regex.finditer(line): + varname, string = m.groups() + yield varname, string, filename, lno, line + def iter_to_marker(lines, marker): for line in lines: if line.rstrip() == marker: @@ -354,7 +168,7 @@ START = '/* The following is auto-generated by Tools/scripts/generate_global_obj END = '/* End auto-generated code */' -def generate_global_strings(): +def generate_global_strings(identifiers, strings): filename = os.path.join(INTERNAL, 'pycore_global_strings.h') # Read the non-generated part of the file. @@ -371,22 +185,18 @@ def generate_global_strings(): printer.write(START) with printer.block('struct _Py_global_strings', ';'): with printer.block('struct', ' literals;'): - for name, literal in STRING_LITERALS.items(): - if literal is None: - outfile.write('\n') - printer.write(f'// {name}') - else: - printer.write(f'STRUCT_FOR_STR({name}, "{literal}")') + for name, literal in sorted(strings.items()): + printer.write(f'STRUCT_FOR_STR({name}, "{literal}")') outfile.write('\n') with printer.block('struct', ' identifiers;'): - for name in sorted(IDENTIFIERS): + for name in sorted(identifiers): assert name.isidentifier(), name printer.write(f'STRUCT_FOR_ID({name})') printer.write(END) printer.write(after) -def generate_runtime_init(): +def generate_runtime_init(identifiers, strings): # First get some info from the declarations. nsmallposints = None nsmallnegints = None @@ -432,13 +242,10 @@ def generate_runtime_init(): # Global strings. with printer.block('.strings =', ','): with printer.block('.literals =', ','): - for name, literal in STRING_LITERALS.items(): - if literal is None: - printer.write('') - else: - printer.write(f'INIT_STR({name}, "{literal}"),') + for name, literal in sorted(strings.items()): + printer.write(f'INIT_STR({name}, "{literal}"),') with printer.block('.identifiers =', ','): - for name in sorted(IDENTIFIERS): + for name in sorted(identifiers): assert name.isidentifier(), name printer.write(f'INIT_ID({name}),') printer.write(END) @@ -507,9 +314,9 @@ TYPESLOTS_RE = re.compile(r''' ) ''', re.VERBOSE) -def check_orphan_strings(): +def check_orphan_strings(identifiers): literals = set(n for n, s in STRING_LITERALS.items() if s) - identifiers = set(IDENTIFIERS) + identifiers = set(identifiers) files = glob.iglob(os.path.join(ROOT, '**', '*.[ch]'), recursive=True) for i, filename in enumerate(files, start=1): print('.', end='') @@ -586,11 +393,23 @@ def check_orphan_strings(): # the script def main(*, check=False) -> None: - generate_global_strings() - generate_runtime_init() + identifiers = set(IDENTIFIERS) + strings = dict(STRING_LITERALS) + for name, string, filename, lno, _ in iter_global_strings(): + if string is None: + if name not in IGNORED: + identifiers.add(name) + else: + if name not in strings: + strings[name] = string + elif string != strings[name]: + raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}') + + generate_global_strings(identifiers, strings) + generate_runtime_init(identifiers, strings) if check: - check_orphan_strings() + check_orphan_strings(identifiers) if __name__ == '__main__':