bpo-46541: Discover the global strings. (gh-31346)

Instead of manually enumerating the global strings in generate_global_objects.py, we extrapolate the list from usage of _Py_ID() and _Py_STR() in the source files.

This is partly inspired by gh-31261.

https://bugs.python.org/issue46541
This commit is contained in:
Eric Snow 2022-02-14 17:36:51 -07:00 committed by GitHub
parent 278fdd3e3a
commit 12360aa159
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 103 additions and 274 deletions

View File

@ -28,13 +28,6 @@ extern "C" {
/* The following is auto-generated by Tools/scripts/generate_global_objects.py. */
struct _Py_global_strings {
struct {
STRUCT_FOR_STR(empty, "")
STRUCT_FOR_STR(dot, ".")
STRUCT_FOR_STR(comma_sep, ", ")
STRUCT_FOR_STR(percent, "%")
STRUCT_FOR_STR(dbl_percent, "%%")
// "anonymous" labels
STRUCT_FOR_STR(anon_dictcomp, "<dictcomp>")
STRUCT_FOR_STR(anon_genexpr, "<genexpr>")
STRUCT_FOR_STR(anon_lambda, "<lambda>")
@ -42,7 +35,12 @@ struct _Py_global_strings {
STRUCT_FOR_STR(anon_module, "<module>")
STRUCT_FOR_STR(anon_setcomp, "<setcomp>")
STRUCT_FOR_STR(anon_string, "<string>")
STRUCT_FOR_STR(comma_sep, ", ")
STRUCT_FOR_STR(dbl_percent, "%%")
STRUCT_FOR_STR(dot, ".")
STRUCT_FOR_STR(dot_locals, ".<locals>")
STRUCT_FOR_STR(empty, "")
STRUCT_FOR_STR(percent, "%")
} literals;
struct {
@ -330,6 +328,7 @@ struct _Py_global_strings {
#define _Py_STR(NAME) \
(_Py_SINGLETON(strings.literals._ ## NAME._ascii.ob_base))
#define _Py_DECLARE_STR(name, str)
#ifdef __cplusplus
}

View File

@ -644,12 +644,6 @@ extern "C" {
\
.strings = { \
.literals = { \
INIT_STR(empty, ""), \
INIT_STR(dot, "."), \
INIT_STR(comma_sep, ", "), \
INIT_STR(percent, "%"), \
INIT_STR(dbl_percent, "%%"), \
\
INIT_STR(anon_dictcomp, "<dictcomp>"), \
INIT_STR(anon_genexpr, "<genexpr>"), \
INIT_STR(anon_lambda, "<lambda>"), \
@ -657,7 +651,12 @@ extern "C" {
INIT_STR(anon_module, "<module>"), \
INIT_STR(anon_setcomp, "<setcomp>"), \
INIT_STR(anon_string, "<string>"), \
INIT_STR(comma_sep, ", "), \
INIT_STR(dbl_percent, "%%"), \
INIT_STR(dot, "."), \
INIT_STR(dot_locals, ".<locals>"), \
INIT_STR(empty, ""), \
INIT_STR(percent, "%"), \
}, \
.identifiers = { \
INIT_ID(Py_Repr), \

View File

@ -4546,6 +4546,7 @@ object_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Py_DECREF(sorted_methods);
return NULL;
}
_Py_DECLARE_STR(comma_sep, ", ");
joined = PyUnicode_Join(&_Py_STR(comma_sep), sorted_methods);
method_count = PyObject_Length(sorted_methods);
Py_DECREF(sorted_methods);

View File

@ -458,12 +458,12 @@ proxy_checkref(PyWeakReference *proxy)
return res; \
}
#define WRAP_METHOD(method, special) \
#define WRAP_METHOD(method, SPECIAL) \
static PyObject * \
method(PyObject *proxy, PyObject *Py_UNUSED(ignored)) { \
UNWRAP(proxy); \
Py_INCREF(proxy); \
PyObject* res = PyObject_CallMethodNoArgs(proxy, &_Py_ID(special)); \
PyObject* res = PyObject_CallMethodNoArgs(proxy, &_Py_ID(SPECIAL)); \
Py_DECREF(proxy); \
return res; \
}

View File

@ -186,8 +186,8 @@ check_matched(PyInterpreterState *interp, PyObject *obj, PyObject *arg)
return rc;
}
#define GET_WARNINGS_ATTR(interp, attr, try_import) \
get_warnings_attr(interp, &_Py_ID(attr), try_import)
#define GET_WARNINGS_ATTR(interp, ATTR, try_import) \
get_warnings_attr(interp, &_Py_ID(ATTR), try_import)
/*
Returns a new reference.

View File

@ -268,6 +268,8 @@ parse_literal(PyObject *fmt, Py_ssize_t *ppos, PyArena *arena)
PyObject *str = PyUnicode_Substring(fmt, start, pos);
/* str = str.replace('%%', '%') */
if (str && has_percents) {
_Py_DECLARE_STR(percent, "%");
_Py_DECLARE_STR(dbl_percent, "%%");
Py_SETREF(str, PyUnicode_Replace(str, &_Py_STR(dbl_percent),
&_Py_STR(percent), -1));
}

View File

@ -667,6 +667,7 @@ compiler_set_qualname(struct compiler *c)
|| parent->u_scope_type == COMPILER_SCOPE_ASYNC_FUNCTION
|| parent->u_scope_type == COMPILER_SCOPE_LAMBDA)
{
_Py_DECLARE_STR(dot_locals, ".<locals>");
base = PyUnicode_Concat(parent->u_qualname,
&_Py_STR(dot_locals));
if (base == NULL)
@ -2022,6 +2023,7 @@ compiler_mod(struct compiler *c, mod_ty mod)
{
PyCodeObject *co;
int addNone = 1;
_Py_DECLARE_STR(anon_module, "<module>");
if (!compiler_enter_scope(c, &_Py_STR(anon_module), COMPILER_SCOPE_MODULE,
mod, 1)) {
return NULL;
@ -2876,6 +2878,7 @@ compiler_lambda(struct compiler *c, expr_ty e)
return 0;
}
_Py_DECLARE_STR(anon_lambda, "<lambda>");
if (!compiler_enter_scope(c, &_Py_STR(anon_lambda), COMPILER_SCOPE_LAMBDA,
(void *)e, e->lineno)) {
return 0;
@ -5347,6 +5350,7 @@ static int
compiler_genexp(struct compiler *c, expr_ty e)
{
assert(e->kind == GeneratorExp_kind);
_Py_DECLARE_STR(anon_genexpr, "<genexpr>");
return compiler_comprehension(c, e, COMP_GENEXP, &_Py_STR(anon_genexpr),
e->v.GeneratorExp.generators,
e->v.GeneratorExp.elt, NULL);
@ -5356,6 +5360,7 @@ static int
compiler_listcomp(struct compiler *c, expr_ty e)
{
assert(e->kind == ListComp_kind);
_Py_DECLARE_STR(anon_listcomp, "<listcomp>");
return compiler_comprehension(c, e, COMP_LISTCOMP, &_Py_STR(anon_listcomp),
e->v.ListComp.generators,
e->v.ListComp.elt, NULL);
@ -5365,6 +5370,7 @@ static int
compiler_setcomp(struct compiler *c, expr_ty e)
{
assert(e->kind == SetComp_kind);
_Py_DECLARE_STR(anon_setcomp, "<setcomp>");
return compiler_comprehension(c, e, COMP_SETCOMP, &_Py_STR(anon_setcomp),
e->v.SetComp.generators,
e->v.SetComp.elt, NULL);
@ -5375,6 +5381,7 @@ static int
compiler_dictcomp(struct compiler *c, expr_ty e)
{
assert(e->kind == DictComp_kind);
_Py_DECLARE_STR(anon_dictcomp, "<dictcomp>");
return compiler_comprehension(c, e, COMP_DICTCOMP, &_Py_STR(anon_dictcomp),
e->v.DictComp.generators,
e->v.DictComp.key, e->v.DictComp.value);

View File

@ -515,6 +515,7 @@ parse_syntax_error(PyObject *err, PyObject **message, PyObject **filename,
goto finally;
if (v == Py_None) {
Py_DECREF(v);
_Py_DECLARE_STR(anon_string, "<string>");
*filename = &_Py_STR(anon_string);
Py_INCREF(*filename);
}
@ -1562,6 +1563,7 @@ PyRun_StringFlags(const char *str, int start, PyObject *globals,
if (arena == NULL)
return NULL;
_Py_DECLARE_STR(anon_string, "<string>");
mod = _PyParser_ASTFromString(
str, &_Py_STR(anon_string), start, flags, arena);

View File

@ -13,298 +13,112 @@ INTERNAL = os.path.join(ROOT, 'Include', 'internal')
STRING_LITERALS = {
'empty': '',
'dot': '.',
'comma_sep': ', ',
'percent': '%',
'dbl_percent': '%%',
'"anonymous" labels': None,
'anon_dictcomp': '<dictcomp>',
'anon_genexpr': '<genexpr>',
'anon_lambda': '<lambda>',
'anon_listcomp': '<listcomp>',
'anon_module': '<module>',
'anon_setcomp': '<setcomp>',
'anon_string': '<string>',
'dot_locals': '.<locals>',
}
IGNORED = {
'ACTION', # Python/_warnings.c
'ATTR', # Python/_warnings.c and Objects/funcobject.c
'DUNDER', # Objects/typeobject.c
'RDUNDER', # Objects/typeobject.c
'SPECIAL', # Objects/weakrefobject.c
}
IDENTIFIERS = [
'Py_Repr',
'TextIOWrapper',
# from ADD() Python/_warnings.c
'default',
'ignore',
# from GET_WARNINGS_ATTR() in Python/_warnings.c
'WarningMessage',
'_',
'__IOBase_closed',
'__abc_tpflags__',
'__abs__',
'__abstractmethods__',
'__add__',
'__aenter__',
'__aexit__',
'__aiter__',
'__all__',
'__and__',
'__anext__',
'__annotations__',
'__args__',
'__await__',
'__bases__',
'__bool__',
'__build_class__',
'__builtins__',
'_showwarnmsg',
'_warn_unawaited_coroutine',
'defaultaction',
'filters',
'onceregistry',
# from WRAP_METHOD() in Objects/weakrefobject.c
'__bytes__',
'__call__',
'__cantrace__',
'__class__',
'__class_getitem__',
'__classcell__',
'__complex__',
'__contains__',
'__copy__',
'__del__',
'__delattr__',
'__delete__',
'__delitem__',
'__dict__',
'__dir__',
'__divmod__',
'__reversed__',
# from COPY_ATTR() in Objects/funcobject.c
'__module__',
'__name__',
'__qualname__',
'__doc__',
'__enter__',
'__eq__',
'__exit__',
'__file__',
'__annotations__',
# from SLOT* in Objects/typeobject.c
'__abs__',
'__add__',
'__and__',
'__divmod__',
'__float__',
'__floordiv__',
'__format__',
'__fspath__',
'__ge__',
'__get__',
'__getattr__',
'__getattribute__',
'__getinitargs__',
'__getitem__',
'__getnewargs__',
'__getnewargs_ex__',
'__getstate__',
'__gt__',
'__hash__',
'__iadd__',
'__iand__',
'__ifloordiv__',
'__ilshift__',
'__imatmul__',
'__imod__',
'__import__',
'__imul__',
'__index__',
'__init__',
'__init_subclass__',
'__instancecheck__',
'__int__',
'__invert__',
'__ior__',
'__ipow__',
'__irshift__',
'__isabstractmethod__',
'__isub__',
'__iter__',
'__itruediv__',
'__ixor__',
'__le__',
'__len__',
'__length_hint__',
'__loader__',
'__lshift__',
'__lt__',
'__ltrace__',
'__main__',
'__matmul__',
'__missing__',
'__mod__',
'__module__',
'__mro_entries__',
'__mul__',
'__name__',
'__ne__',
'__neg__',
'__new__',
'__newobj__',
'__newobj_ex__',
'__next__',
'__note__',
'__or__',
'__origin__',
'__package__',
'__parameters__',
'__path__',
'__pos__',
'__pow__',
'__prepare__',
'__qualname__',
'__radd__',
'__rand__',
'__rdivmod__',
'__reduce__',
'__reduce_ex__',
'__repr__',
'__reversed__',
'__rfloordiv__',
'__rlshift__',
'__rmatmul__',
'__rmod__',
'__rmul__',
'__ror__',
'__round__',
'__rpow__',
'__rrshift__',
'__rshift__',
'__rsub__',
'__rtruediv__',
'__rxor__',
'__set__',
'__set_name__',
'__setattr__',
'__setitem__',
'__setstate__',
'__sizeof__',
'__slotnames__',
'__slots__',
'__spec__',
'__str__',
'__sub__',
'__subclasscheck__',
'__subclasshook__',
'__truediv__',
'__trunc__',
'__warningregistry__',
'__weakref__',
'__xor__',
'_abc_impl',
'_blksize',
'_dealloc_warn',
'_finalizing',
'_find_and_load',
'_fix_up_module',
'_get_sourcefile',
'_handle_fromlist',
'_initializing',
'_is_text_encoding',
'_lock_unlock_module',
'_showwarnmsg',
'_shutdown',
'_slotnames',
'_strptime_time',
'_uninitialized_submodules',
'_warn_unawaited_coroutine',
'_xoptions',
'add',
'append',
'big',
'buffer',
'builtins',
'clear',
'close',
'code',
'copy',
'copyreg',
'decode',
'default',
'defaultaction',
'difference_update',
'dispatch_table',
'displayhook',
'enable',
'encoding',
'end_lineno',
'end_offset',
'errors',
'excepthook',
'extend',
'filename',
'fileno',
'fillvalue',
'filters',
'find_class',
'flush',
'get',
'get_source',
'getattr',
'ignore',
'importlib',
'intersection',
'isatty',
'items',
'iter',
'keys',
'last_traceback',
'last_type',
'last_value',
'latin1',
'lineno',
'little',
'match',
'metaclass',
'mode',
'modules',
'mro',
'msg',
'n_fields',
'n_sequence_fields',
'n_unnamed_fields',
'name',
'obj',
'offset',
'onceregistry',
'open',
'parent',
'partial',
'path',
'peek',
'persistent_id',
'persistent_load',
'print_file_and_line',
'ps1',
'ps2',
'raw',
'read',
'read1',
'readable',
'readall',
'readinto',
'readinto1',
'readline',
'reducer_override',
'reload',
'replace',
'reset',
'return',
'reversed',
'seek',
'seekable',
'send',
'setstate',
'sort',
'stderr',
'stdin',
'stdout',
'strict',
'symmetric_difference_update',
'tell',
'text',
'threading',
'throw',
'unraisablehook',
'values',
'version',
'warnings',
'warnoptions',
'writable',
'write',
'zipimporter',
]
#######################################
# helpers
def iter_global_strings():
id_regex = re.compile(r'\b_Py_ID\((\w+)\)')
str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')
for dirname, _, files in os.walk(ROOT):
if os.path.relpath(dirname, ROOT).startswith('Include'):
continue
for name in files:
if not name.endswith(('.c', '.h')):
continue
filename = os.path.join(dirname, name)
with open(os.path.join(filename), encoding='utf-8') as infile:
for lno, line in enumerate(infile, 1):
for m in id_regex.finditer(line):
identifier, = m.groups()
yield identifier, None, filename, lno, line
for m in str_regex.finditer(line):
varname, string = m.groups()
yield varname, string, filename, lno, line
def iter_to_marker(lines, marker):
for line in lines:
if line.rstrip() == marker:
@ -354,7 +168,7 @@ START = '/* The following is auto-generated by Tools/scripts/generate_global_obj
END = '/* End auto-generated code */'
def generate_global_strings():
def generate_global_strings(identifiers, strings):
filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
# Read the non-generated part of the file.
@ -371,22 +185,18 @@ def generate_global_strings():
printer.write(START)
with printer.block('struct _Py_global_strings', ';'):
with printer.block('struct', ' literals;'):
for name, literal in STRING_LITERALS.items():
if literal is None:
outfile.write('\n')
printer.write(f'// {name}')
else:
for name, literal in sorted(strings.items()):
printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
outfile.write('\n')
with printer.block('struct', ' identifiers;'):
for name in sorted(IDENTIFIERS):
for name in sorted(identifiers):
assert name.isidentifier(), name
printer.write(f'STRUCT_FOR_ID({name})')
printer.write(END)
printer.write(after)
def generate_runtime_init():
def generate_runtime_init(identifiers, strings):
# First get some info from the declarations.
nsmallposints = None
nsmallnegints = None
@ -432,13 +242,10 @@ def generate_runtime_init():
# Global strings.
with printer.block('.strings =', ','):
with printer.block('.literals =', ','):
for name, literal in STRING_LITERALS.items():
if literal is None:
printer.write('')
else:
for name, literal in sorted(strings.items()):
printer.write(f'INIT_STR({name}, "{literal}"),')
with printer.block('.identifiers =', ','):
for name in sorted(IDENTIFIERS):
for name in sorted(identifiers):
assert name.isidentifier(), name
printer.write(f'INIT_ID({name}),')
printer.write(END)
@ -507,9 +314,9 @@ TYPESLOTS_RE = re.compile(r'''
)
''', re.VERBOSE)
def check_orphan_strings():
def check_orphan_strings(identifiers):
literals = set(n for n, s in STRING_LITERALS.items() if s)
identifiers = set(IDENTIFIERS)
identifiers = set(identifiers)
files = glob.iglob(os.path.join(ROOT, '**', '*.[ch]'), recursive=True)
for i, filename in enumerate(files, start=1):
print('.', end='')
@ -586,11 +393,23 @@ def check_orphan_strings():
# the script
def main(*, check=False) -> None:
generate_global_strings()
generate_runtime_init()
identifiers = set(IDENTIFIERS)
strings = dict(STRING_LITERALS)
for name, string, filename, lno, _ in iter_global_strings():
if string is None:
if name not in IGNORED:
identifiers.add(name)
else:
if name not in strings:
strings[name] = string
elif string != strings[name]:
raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
generate_global_strings(identifiers, strings)
generate_runtime_init(identifiers, strings)
if check:
check_orphan_strings()
check_orphan_strings(identifiers)
if __name__ == '__main__':