mirror of
https://github.com/python/cpython.git
synced 2024-12-01 05:45:40 +08:00
da5727a120
Remove the token.h header file. There was never any public tokenizer C API. The token.h header file was only designed to be used by Python internals. Move Include/token.h to Include/internal/pycore_token.h. Including this header file now requires that the Py_BUILD_CORE macro is defined. It no longer checks for the Py_LIMITED_API macro. Rename functions: * PyToken_OneChar() => _PyToken_OneChar() * PyToken_TwoChars() => _PyToken_TwoChars() * PyToken_ThreeChars() => _PyToken_ThreeChars()
276 lines
6.9 KiB
Python
Executable File
276 lines
6.9 KiB
Python
Executable File
#! /usr/bin/env python3
|
|
# This script generates token related files from Grammar/Tokens:
|
|
#
|
|
# Doc/library/token-list.inc
|
|
# Include/token.h
|
|
# Parser/token.c
|
|
# Lib/token.py
|
|
|
|
|
|
NT_OFFSET = 256
|
|
|
|
def load_tokens(path):
|
|
tok_names = []
|
|
string_to_tok = {}
|
|
ERRORTOKEN = None
|
|
with open(path) as fp:
|
|
for line in fp:
|
|
line = line.strip()
|
|
# strip comments
|
|
i = line.find('#')
|
|
if i >= 0:
|
|
line = line[:i].strip()
|
|
if not line:
|
|
continue
|
|
fields = line.split()
|
|
name = fields[0]
|
|
value = len(tok_names)
|
|
if name == 'ERRORTOKEN':
|
|
ERRORTOKEN = value
|
|
string = fields[1] if len(fields) > 1 else None
|
|
if string:
|
|
string = eval(string)
|
|
string_to_tok[string] = value
|
|
tok_names.append(name)
|
|
return tok_names, ERRORTOKEN, string_to_tok
|
|
|
|
|
|
def update_file(file, content):
|
|
try:
|
|
with open(file, 'r') as fobj:
|
|
if fobj.read() == content:
|
|
return False
|
|
except (OSError, ValueError):
|
|
pass
|
|
with open(file, 'w') as fobj:
|
|
fobj.write(content)
|
|
return True
|
|
|
|
|
|
token_h_template = """\
|
|
/* Auto-generated by Tools/scripts/generate_token.py */
|
|
|
|
/* Token types */
|
|
#ifndef Py_INTERNAL_TOKEN_H
|
|
#define Py_INTERNAL_TOKEN_H
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#ifndef Py_BUILD_CORE
|
|
# error "this header requires Py_BUILD_CORE define"
|
|
#endif
|
|
|
|
#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
|
|
|
|
%s\
|
|
#define N_TOKENS %d
|
|
#define NT_OFFSET %d
|
|
|
|
/* Special definitions for cooperation with parser */
|
|
|
|
#define ISTERMINAL(x) ((x) < NT_OFFSET)
|
|
#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
|
|
#define ISEOF(x) ((x) == ENDMARKER)
|
|
#define ISWHITESPACE(x) ((x) == ENDMARKER || \\
|
|
(x) == NEWLINE || \\
|
|
(x) == INDENT || \\
|
|
(x) == DEDENT)
|
|
|
|
|
|
// Symbols exported for test_peg_generator
|
|
PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
|
|
PyAPI_FUNC(int) _PyToken_OneChar(int);
|
|
PyAPI_FUNC(int) _PyToken_TwoChars(int, int);
|
|
PyAPI_FUNC(int) _PyToken_ThreeChars(int, int, int);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
#endif // !Py_INTERNAL_TOKEN_H
|
|
"""
|
|
|
|
def make_h(infile, outfile='Include/internal/pycore_token.h'):
|
|
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
|
|
|
|
defines = []
|
|
for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
|
|
defines.append("#define %-15s %d\n" % (name, value))
|
|
|
|
if update_file(outfile, token_h_template % (
|
|
''.join(defines),
|
|
len(tok_names),
|
|
NT_OFFSET
|
|
)):
|
|
print("%s regenerated from %s" % (outfile, infile))
|
|
|
|
|
|
token_c_template = """\
|
|
/* Auto-generated by Tools/scripts/generate_token.py */
|
|
|
|
#include "Python.h"
|
|
#include "pycore_token.h"
|
|
|
|
/* Token names */
|
|
|
|
const char * const _PyParser_TokenNames[] = {
|
|
%s\
|
|
};
|
|
|
|
/* Return the token corresponding to a single character */
|
|
|
|
int
|
|
_PyToken_OneChar(int c1)
|
|
{
|
|
%s\
|
|
return OP;
|
|
}
|
|
|
|
int
|
|
_PyToken_TwoChars(int c1, int c2)
|
|
{
|
|
%s\
|
|
return OP;
|
|
}
|
|
|
|
int
|
|
_PyToken_ThreeChars(int c1, int c2, int c3)
|
|
{
|
|
%s\
|
|
return OP;
|
|
}
|
|
"""
|
|
|
|
def generate_chars_to_token(mapping, n=1):
|
|
result = []
|
|
write = result.append
|
|
indent = ' ' * n
|
|
write(indent)
|
|
write('switch (c%d) {\n' % (n,))
|
|
for c in sorted(mapping):
|
|
write(indent)
|
|
value = mapping[c]
|
|
if isinstance(value, dict):
|
|
write("case '%s':\n" % (c,))
|
|
write(generate_chars_to_token(value, n + 1))
|
|
write(indent)
|
|
write(' break;\n')
|
|
else:
|
|
write("case '%s': return %s;\n" % (c, value))
|
|
write(indent)
|
|
write('}\n')
|
|
return ''.join(result)
|
|
|
|
def make_c(infile, outfile='Parser/token.c'):
|
|
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
|
|
string_to_tok['<>'] = string_to_tok['!=']
|
|
chars_to_token = {}
|
|
for string, value in string_to_tok.items():
|
|
assert 1 <= len(string) <= 3
|
|
name = tok_names[value]
|
|
m = chars_to_token.setdefault(len(string), {})
|
|
for c in string[:-1]:
|
|
m = m.setdefault(c, {})
|
|
m[string[-1]] = name
|
|
|
|
names = []
|
|
for value, name in enumerate(tok_names):
|
|
if value >= ERRORTOKEN:
|
|
name = '<%s>' % name
|
|
names.append(' "%s",\n' % name)
|
|
names.append(' "<N_TOKENS>",\n')
|
|
|
|
if update_file(outfile, token_c_template % (
|
|
''.join(names),
|
|
generate_chars_to_token(chars_to_token[1]),
|
|
generate_chars_to_token(chars_to_token[2]),
|
|
generate_chars_to_token(chars_to_token[3])
|
|
)):
|
|
print("%s regenerated from %s" % (outfile, infile))
|
|
|
|
|
|
token_inc_template = """\
|
|
.. Auto-generated by Tools/scripts/generate_token.py
|
|
%s
|
|
.. data:: N_TOKENS
|
|
|
|
.. data:: NT_OFFSET
|
|
"""
|
|
|
|
def make_rst(infile, outfile='Doc/library/token-list.inc'):
|
|
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
|
|
tok_to_string = {value: s for s, value in string_to_tok.items()}
|
|
|
|
names = []
|
|
for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
|
|
names.append('.. data:: %s' % (name,))
|
|
if value in tok_to_string:
|
|
names.append('')
|
|
names.append(' Token value for ``"%s"``.' % tok_to_string[value])
|
|
names.append('')
|
|
|
|
if update_file(outfile, token_inc_template % '\n'.join(names)):
|
|
print("%s regenerated from %s" % (outfile, infile))
|
|
|
|
|
|
token_py_template = '''\
|
|
"""Token constants."""
|
|
# Auto-generated by Tools/scripts/generate_token.py
|
|
|
|
__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
|
|
|
|
%s
|
|
N_TOKENS = %d
|
|
# Special definitions for cooperation with parser
|
|
NT_OFFSET = %d
|
|
|
|
tok_name = {value: name
|
|
for name, value in globals().items()
|
|
if isinstance(value, int) and not name.startswith('_')}
|
|
__all__.extend(tok_name.values())
|
|
|
|
EXACT_TOKEN_TYPES = {
|
|
%s
|
|
}
|
|
|
|
def ISTERMINAL(x):
|
|
return x < NT_OFFSET
|
|
|
|
def ISNONTERMINAL(x):
|
|
return x >= NT_OFFSET
|
|
|
|
def ISEOF(x):
|
|
return x == ENDMARKER
|
|
'''
|
|
|
|
def make_py(infile, outfile='Lib/token.py'):
|
|
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
|
|
|
|
constants = []
|
|
for value, name in enumerate(tok_names):
|
|
constants.append('%s = %d' % (name, value))
|
|
constants.insert(ERRORTOKEN,
|
|
"# These aren't used by the C tokenizer but are needed for tokenize.py")
|
|
|
|
token_types = []
|
|
for s, value in sorted(string_to_tok.items()):
|
|
token_types.append(' %r: %s,' % (s, tok_names[value]))
|
|
|
|
if update_file(outfile, token_py_template % (
|
|
'\n'.join(constants),
|
|
len(tok_names),
|
|
NT_OFFSET,
|
|
'\n'.join(token_types),
|
|
)):
|
|
print("%s regenerated from %s" % (outfile, infile))
|
|
|
|
|
|
def main(op, infile='Grammar/Tokens', *args):
|
|
make = globals()['make_' + op]
|
|
make(infile, *args)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
import sys
|
|
main(*sys.argv[1:])
|