cpython/Lib/token.py
Marta Gómez Macías 6715f91edc
gh-102856: Python tokenizer implementation for PEP 701 (#104323)
This commit replaces the Python implementation of the tokenize module with an implementation
that reuses the real C tokenizer via a private extension module. The tokenize module now implements
a compatibility layer that transforms tokens from the C tokenizer into Python tokenize tokens for backward
compatibility.

As the C tokenizer does not emit some tokens that the Python tokenizer provides (such as comments and non-semantic newlines), a new special mode has been added to the C tokenizer mode that currently is only used via
the extension module that exposes it to the Python layer. This new mode forces the C tokenizer to emit these new extra tokens and add the appropriate metadata that is needed to match the old Python implementation.

Co-authored-by: Pablo Galindo <pablogsal@gmail.com>
2023-05-21 01:03:02 +01:00

143 lines
2.4 KiB
Python
Generated

"""Token constants."""
# Auto-generated by Tools/build/generate_token.py
__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
ENDMARKER = 0
NAME = 1
NUMBER = 2
STRING = 3
NEWLINE = 4
INDENT = 5
DEDENT = 6
LPAR = 7
RPAR = 8
LSQB = 9
RSQB = 10
COLON = 11
COMMA = 12
SEMI = 13
PLUS = 14
MINUS = 15
STAR = 16
SLASH = 17
VBAR = 18
AMPER = 19
LESS = 20
GREATER = 21
EQUAL = 22
DOT = 23
PERCENT = 24
LBRACE = 25
RBRACE = 26
EQEQUAL = 27
NOTEQUAL = 28
LESSEQUAL = 29
GREATEREQUAL = 30
TILDE = 31
CIRCUMFLEX = 32
LEFTSHIFT = 33
RIGHTSHIFT = 34
DOUBLESTAR = 35
PLUSEQUAL = 36
MINEQUAL = 37
STAREQUAL = 38
SLASHEQUAL = 39
PERCENTEQUAL = 40
AMPEREQUAL = 41
VBAREQUAL = 42
CIRCUMFLEXEQUAL = 43
LEFTSHIFTEQUAL = 44
RIGHTSHIFTEQUAL = 45
DOUBLESTAREQUAL = 46
DOUBLESLASH = 47
DOUBLESLASHEQUAL = 48
AT = 49
ATEQUAL = 50
RARROW = 51
ELLIPSIS = 52
COLONEQUAL = 53
EXCLAMATION = 54
OP = 55
AWAIT = 56
ASYNC = 57
TYPE_IGNORE = 58
TYPE_COMMENT = 59
SOFT_KEYWORD = 60
FSTRING_START = 61
FSTRING_MIDDLE = 62
FSTRING_END = 63
COMMENT = 64
NL = 65
# These aren't used by the C tokenizer but are needed for tokenize.py
ERRORTOKEN = 66
ENCODING = 67
N_TOKENS = 68
# Special definitions for cooperation with parser
NT_OFFSET = 256
tok_name = {value: name
for name, value in globals().items()
if isinstance(value, int) and not name.startswith('_')}
__all__.extend(tok_name.values())
EXACT_TOKEN_TYPES = {
'!': EXCLAMATION,
'!=': NOTEQUAL,
'%': PERCENT,
'%=': PERCENTEQUAL,
'&': AMPER,
'&=': AMPEREQUAL,
'(': LPAR,
')': RPAR,
'*': STAR,
'**': DOUBLESTAR,
'**=': DOUBLESTAREQUAL,
'*=': STAREQUAL,
'+': PLUS,
'+=': PLUSEQUAL,
',': COMMA,
'-': MINUS,
'-=': MINEQUAL,
'->': RARROW,
'.': DOT,
'...': ELLIPSIS,
'/': SLASH,
'//': DOUBLESLASH,
'//=': DOUBLESLASHEQUAL,
'/=': SLASHEQUAL,
':': COLON,
':=': COLONEQUAL,
';': SEMI,
'<': LESS,
'<<': LEFTSHIFT,
'<<=': LEFTSHIFTEQUAL,
'<=': LESSEQUAL,
'=': EQUAL,
'==': EQEQUAL,
'>': GREATER,
'>=': GREATEREQUAL,
'>>': RIGHTSHIFT,
'>>=': RIGHTSHIFTEQUAL,
'@': AT,
'@=': ATEQUAL,
'[': LSQB,
']': RSQB,
'^': CIRCUMFLEX,
'^=': CIRCUMFLEXEQUAL,
'{': LBRACE,
'|': VBAR,
'|=': VBAREQUAL,
'}': RBRACE,
'~': TILDE,
}
def ISTERMINAL(x):
return x < NT_OFFSET
def ISNONTERMINAL(x):
return x >= NT_OFFSET
def ISEOF(x):
return x == ENDMARKER