cpython/Parser/token.c
Marta Gómez Macías 6715f91edc
gh-102856: Python tokenizer implementation for PEP 701 (#104323)
This commit replaces the Python implementation of the tokenize module with an implementation
that reuses the real C tokenizer via a private extension module. The tokenize module now implements
a compatibility layer that transforms tokens from the C tokenizer into Python tokenize tokens for backward
compatibility.

As the C tokenizer does not emit some tokens that the Python tokenizer provides (such as comments and non-semantic newlines), a new special mode has been added to the C tokenizer mode that currently is only used via
the extension module that exposes it to the Python layer. This new mode forces the C tokenizer to emit these new extra tokens and add the appropriate metadata that is needed to match the old Python implementation.

Co-authored-by: Pablo Galindo <pablogsal@gmail.com>
2023-05-21 01:03:02 +01:00

250 lines
4.6 KiB
C
Generated

/* Auto-generated by Tools/build/generate_token.py */
#include "Python.h"
#include "pycore_token.h"
/* Token names */
const char * const _PyParser_TokenNames[] = {
"ENDMARKER",
"NAME",
"NUMBER",
"STRING",
"NEWLINE",
"INDENT",
"DEDENT",
"LPAR",
"RPAR",
"LSQB",
"RSQB",
"COLON",
"COMMA",
"SEMI",
"PLUS",
"MINUS",
"STAR",
"SLASH",
"VBAR",
"AMPER",
"LESS",
"GREATER",
"EQUAL",
"DOT",
"PERCENT",
"LBRACE",
"RBRACE",
"EQEQUAL",
"NOTEQUAL",
"LESSEQUAL",
"GREATEREQUAL",
"TILDE",
"CIRCUMFLEX",
"LEFTSHIFT",
"RIGHTSHIFT",
"DOUBLESTAR",
"PLUSEQUAL",
"MINEQUAL",
"STAREQUAL",
"SLASHEQUAL",
"PERCENTEQUAL",
"AMPEREQUAL",
"VBAREQUAL",
"CIRCUMFLEXEQUAL",
"LEFTSHIFTEQUAL",
"RIGHTSHIFTEQUAL",
"DOUBLESTAREQUAL",
"DOUBLESLASH",
"DOUBLESLASHEQUAL",
"AT",
"ATEQUAL",
"RARROW",
"ELLIPSIS",
"COLONEQUAL",
"EXCLAMATION",
"OP",
"AWAIT",
"ASYNC",
"TYPE_IGNORE",
"TYPE_COMMENT",
"SOFT_KEYWORD",
"FSTRING_START",
"FSTRING_MIDDLE",
"FSTRING_END",
"COMMENT",
"NL",
"<ERRORTOKEN>",
"<ENCODING>",
"<N_TOKENS>",
};
/* Return the token corresponding to a single character */
int
_PyToken_OneChar(int c1)
{
switch (c1) {
case '!': return EXCLAMATION;
case '%': return PERCENT;
case '&': return AMPER;
case '(': return LPAR;
case ')': return RPAR;
case '*': return STAR;
case '+': return PLUS;
case ',': return COMMA;
case '-': return MINUS;
case '.': return DOT;
case '/': return SLASH;
case ':': return COLON;
case ';': return SEMI;
case '<': return LESS;
case '=': return EQUAL;
case '>': return GREATER;
case '@': return AT;
case '[': return LSQB;
case ']': return RSQB;
case '^': return CIRCUMFLEX;
case '{': return LBRACE;
case '|': return VBAR;
case '}': return RBRACE;
case '~': return TILDE;
}
return OP;
}
int
_PyToken_TwoChars(int c1, int c2)
{
switch (c1) {
case '!':
switch (c2) {
case '=': return NOTEQUAL;
}
break;
case '%':
switch (c2) {
case '=': return PERCENTEQUAL;
}
break;
case '&':
switch (c2) {
case '=': return AMPEREQUAL;
}
break;
case '*':
switch (c2) {
case '*': return DOUBLESTAR;
case '=': return STAREQUAL;
}
break;
case '+':
switch (c2) {
case '=': return PLUSEQUAL;
}
break;
case '-':
switch (c2) {
case '=': return MINEQUAL;
case '>': return RARROW;
}
break;
case '/':
switch (c2) {
case '/': return DOUBLESLASH;
case '=': return SLASHEQUAL;
}
break;
case ':':
switch (c2) {
case '=': return COLONEQUAL;
}
break;
case '<':
switch (c2) {
case '<': return LEFTSHIFT;
case '=': return LESSEQUAL;
case '>': return NOTEQUAL;
}
break;
case '=':
switch (c2) {
case '=': return EQEQUAL;
}
break;
case '>':
switch (c2) {
case '=': return GREATEREQUAL;
case '>': return RIGHTSHIFT;
}
break;
case '@':
switch (c2) {
case '=': return ATEQUAL;
}
break;
case '^':
switch (c2) {
case '=': return CIRCUMFLEXEQUAL;
}
break;
case '|':
switch (c2) {
case '=': return VBAREQUAL;
}
break;
}
return OP;
}
int
_PyToken_ThreeChars(int c1, int c2, int c3)
{
switch (c1) {
case '*':
switch (c2) {
case '*':
switch (c3) {
case '=': return DOUBLESTAREQUAL;
}
break;
}
break;
case '.':
switch (c2) {
case '.':
switch (c3) {
case '.': return ELLIPSIS;
}
break;
}
break;
case '/':
switch (c2) {
case '/':
switch (c3) {
case '=': return DOUBLESLASHEQUAL;
}
break;
}
break;
case '<':
switch (c2) {
case '<':
switch (c3) {
case '=': return LEFTSHIFTEQUAL;
}
break;
}
break;
case '>':
switch (c2) {
case '>':
switch (c3) {
case '=': return RIGHTSHIFTEQUAL;
}
break;
}
break;
}
return OP;
}