gh-102856: Initial implementation of PEP 701 (#102855)

Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> Co-authored-by: Batuhan Taskaya <isidentical@gmail.com> Co-authored-by: Marta Gómez Macías <mgmacias@google.com> Co-authored-by: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
2024-11-23 09:54:58 +08:00 · 2023-04-19 17:18:16 +01:00 · 2023-04-19 17:18:16 +01:00 · 1ef61cf71a
commit 1ef61cf71a
parent a6b07b5a34
27 changed files with 8859 additions and 6573 deletions
--- a/Doc/library/token-list.inc
+++ b/Doc/library/token-list.inc
@ -201,6 +201,10 @@

   Token value for ``":="``.

+.. data:: EXCLAMATION
+
+   Token value for ``"!"``.
+
 .. data:: OP

 .. data:: AWAIT
@ -213,6 +217,12 @@

 .. data:: SOFT_KEYWORD

+.. data:: FSTRING_START
+
+.. data:: FSTRING_MIDDLE
+
+.. data:: FSTRING_END
+
 .. data:: ERRORTOKEN

 .. data:: N_TOKENS
--- a/Grammar/Tokens
+++ b/Grammar/Tokens
@ -53,6 +53,7 @@ ATEQUAL                 '@='
 RARROW                  '->'
 ELLIPSIS                '...'
 COLONEQUAL              ':='
+EXCLAMATION             '!'

 OP
 AWAIT
@ -60,6 +61,9 @@ ASYNC
 TYPE_IGNORE
 TYPE_COMMENT
 SOFT_KEYWORD
+FSTRING_START
+FSTRING_MIDDLE
+FSTRING_END
 ERRORTOKEN

 # These aren't used by the C tokenizer but are needed for tokenize.py
--- a/Grammar/python.gram
+++ b/Grammar/python.gram
@ -807,7 +807,7 @@ atom[expr_ty]:
    | 'True' { _PyAST_Constant(Py_True, NULL, EXTRA) }
    | 'False' { _PyAST_Constant(Py_False, NULL, EXTRA) }
    | 'None' { _PyAST_Constant(Py_None, NULL, EXTRA) }
-    | &STRING strings
+    | &(STRING|FSTRING_START) strings
    | NUMBER
    | &'(' (tuple | group | genexp)
    | &'[' (list | listcomp)
@ -877,7 +877,26 @@ lambda_param[arg_ty]: a=NAME { _PyAST_arg(a->v.Name.id, NULL, NULL, EXTRA) }
 # LITERALS
 # ========

-strings[expr_ty] (memo): a=STRING+ { _PyPegen_concatenate_strings(p, a) }
+fstring_middle[expr_ty]:
+    | fstring_replacement_field
+    | t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
+fstring_replacement_field[expr_ty]:
+    | '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] '}' {
+        _PyPegen_formatted_value(p, a, debug_expr, conversion, format, EXTRA)
+    }
+    | invalid_replacement_field
+fstring_conversion[expr_ty]:
+    | conv_token="!" conv=NAME { _PyPegen_check_fstring_conversion(p, conv_token, conv) }
+fstring_full_format_spec[expr_ty]:
+    | ':' spec=fstring_format_spec* { spec ? _PyAST_JoinedStr((asdl_expr_seq*)spec, EXTRA) : NULL }
+fstring_format_spec[expr_ty]:
+    | t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
+    | fstring_replacement_field
+fstring[expr_ty]:
+    | a=FSTRING_START b=fstring_middle* c=FSTRING_END { _PyPegen_joined_str(p, a, (asdl_expr_seq*)b, c) }
+
+string[expr_ty]: s[Token*]=STRING { _PyPegen_constant_from_string(p, s) }
+strings[expr_ty] (memo): a[asdl_expr_seq*]=(fstring|string)+ { _PyPegen_concatenate_strings(p, a, EXTRA) }

 list[expr_ty]:
    | '[' a=[star_named_expressions] ']' { _PyAST_List(a, Load, EXTRA) }
@ -1118,6 +1137,8 @@ invalid_expression:
        _PyPegen_check_legacy_stmt(p, a) ? NULL : p->tokens[p->mark-1]->level == 0 ? NULL :
        RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") }
   | a=disjunction 'if' b=disjunction !('else'|':') { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "expected 'else' after 'if' expression") }
+   | a='lambda' [lambda_params] b=':' &(FSTRING_MIDDLE | fstring_replacement_field)  {
+        RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "f-string: lambda expressions are not allowed without parentheses") }

 invalid_named_expression(memo):
    | a=expression ':=' expression {
@ -1335,3 +1356,24 @@ invalid_kvpair:
    | expression a=':' &('}'|',') {RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "expression expected after dictionary key and ':'") }
 invalid_starred_expression:
    | a='*' expression '=' b=expression { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "cannot assign to iterable argument unpacking") }
+invalid_replacement_field:
+    | '{' a='=' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before '='") }
+    | '{' a='!' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before '!'") }
+    | '{' a=':' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before ':'") }
+    | '{' a='}' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before '}'") }
+    | '{' !(yield_expr | star_expressions) { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting a valid expression after '{'")}
+    | '{' (yield_expr | star_expressions) !('=' | '!' | ':' | '}') {
+        PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting '=', or '!', or ':', or '}'") }
+    | '{' (yield_expr | star_expressions) '=' !('!' | ':' | '}') {
+        PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting '!', or ':', or '}'") }
+    | '{' (yield_expr | star_expressions) '='? invalid_conversion_character
+    | '{' (yield_expr | star_expressions) '='? ['!' NAME] !(':' | '}') {
+        PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting ':' or '}'") }
+    | '{' (yield_expr | star_expressions) '='? ['!' NAME] ':' fstring_format_spec* !'}' {
+        PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting '}', or format specs") }
+    | '{' (yield_expr | star_expressions) '='? ['!' NAME] !'}' {
+        PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting '}'") }
+
+invalid_conversion_character:
+    | '!' &(':' | '}') { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: missing conversion character") }
+    | '!' !NAME { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: invalid conversion character") }
--- a/Include/internal/pycore_token.h
+++ b/Include/internal/pycore_token.h
@ -67,14 +67,18 @@ extern "C" {
 #define RARROW          51
 #define ELLIPSIS        52
 #define COLONEQUAL      53
-#define OP              54
-#define AWAIT           55
-#define ASYNC           56
-#define TYPE_IGNORE     57
-#define TYPE_COMMENT    58
-#define SOFT_KEYWORD    59
-#define ERRORTOKEN      60
-#define N_TOKENS        64
+#define EXCLAMATION     54
+#define OP              55
+#define AWAIT           56
+#define ASYNC           57
+#define TYPE_IGNORE     58
+#define TYPE_COMMENT    59
+#define SOFT_KEYWORD    60
+#define FSTRING_START   61
+#define FSTRING_MIDDLE  62
+#define FSTRING_END     63
+#define ERRORTOKEN      64
+#define N_TOKENS        68
 #define NT_OFFSET       256

 /* Special definitions for cooperation with parser */
@ -86,6 +90,8 @@ extern "C" {
                                 (x) == NEWLINE   || \
                                 (x) == INDENT    || \
                                 (x) == DEDENT)
+#define ISSTRINGLIT(x)          ((x) == STRING           || \
+                                 (x) == FSTRING_MIDDLE)


 // Symbols exported for test_peg_generator
--- a/Lib/test/test_ast.py
+++ b/Lib/test/test_ast.py
@ -774,11 +774,6 @@ class AST_Tests(unittest.TestCase):
            ast.parse('with (CtxManager() as example): ...', feature_version=(3, 8))
        ast.parse('with CtxManager() as example: ...', feature_version=(3, 8))

-    def test_debug_f_string_feature_version(self):
-        ast.parse('f"{x=}"', feature_version=(3, 8))
-        with self.assertRaises(SyntaxError):
-            ast.parse('f"{x=}"', feature_version=(3, 7))
-
    def test_assignment_expression_feature_version(self):
        ast.parse('(x := 0)', feature_version=(3, 8))
        with self.assertRaises(SyntaxError):
--- a/Lib/test/test_cmd_line_script.py
+++ b/Lib/test/test_cmd_line_script.py
@ -636,9 +636,9 @@ class CmdLineTest(unittest.TestCase):
            self.assertEqual(
                stderr.splitlines()[-3:],
                [
-                    b'    foo"""',
+                    b'    foo = f"""{}',
                    b'               ^',
-                    b'SyntaxError: f-string: empty expression not allowed',
+                    b'SyntaxError: f-string: valid expression required before \'}\'',
                ],
            )

--- a/Lib/test/test_eof.py
+++ b/Lib/test/test_eof.py
@ -4,6 +4,7 @@ import sys
 from test import support
 from test.support import os_helper
 from test.support import script_helper
+from test.support import warnings_helper
 import unittest

 class EOFTestCase(unittest.TestCase):
@ -36,10 +37,11 @@ class EOFTestCase(unittest.TestCase):
            rc, out, err = script_helper.assert_python_failure(file_name)
        self.assertIn(b'unterminated triple-quoted string literal (detected at line 3)', err)

+    @warnings_helper.ignore_warnings(category=SyntaxWarning)
    def test_eof_with_line_continuation(self):
        expect = "unexpected EOF while parsing (<string>, line 1)"
        try:
-            compile('"\\xhh" \\',  '<string>', 'exec', dont_inherit=True)
+            compile('"\\Xhh" \\', '<string>', 'exec')
        except SyntaxError as msg:
            self.assertEqual(str(msg), expect)
        else:
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@ -155,6 +155,7 @@ class ExceptionTests(unittest.TestCase):

        ckmsg(s, "'continue' not properly in loop")
        ckmsg("continue\n", "'continue' not properly in loop")
+        ckmsg("f'{6 0}'", "invalid syntax. Perhaps you forgot a comma?")

    def testSyntaxErrorMissingParens(self):
        def ckmsg(src, msg, exception=SyntaxError):
@ -227,7 +228,7 @@ class ExceptionTests(unittest.TestCase):
        check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20)
        check(b'# -*- coding: cp1251 -*-\nPython = "\xcf\xb3\xf2\xee\xed" +',
              2, 19, encoding='cp1251')
-        check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 18)
+        check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 10)
        check('x = "a', 1, 5)
        check('lambda x: x = 2', 1, 1)
        check('f{a + b + c}', 1, 2)
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@ -329,13 +329,13 @@ non-important content
        self.assertEqual(t.body[1].lineno, 3)
        self.assertEqual(t.body[1].value.lineno, 3)
        self.assertEqual(t.body[1].value.values[0].lineno, 3)
-        self.assertEqual(t.body[1].value.values[1].lineno, 3)
-        self.assertEqual(t.body[1].value.values[2].lineno, 3)
+        self.assertEqual(t.body[1].value.values[1].lineno, 4)
+        self.assertEqual(t.body[1].value.values[2].lineno, 6)
        self.assertEqual(t.body[1].col_offset, 0)
        self.assertEqual(t.body[1].value.col_offset, 0)
-        self.assertEqual(t.body[1].value.values[0].col_offset, 0)
-        self.assertEqual(t.body[1].value.values[1].col_offset, 0)
-        self.assertEqual(t.body[1].value.values[2].col_offset, 0)
+        self.assertEqual(t.body[1].value.values[0].col_offset, 4)
+        self.assertEqual(t.body[1].value.values[1].col_offset, 2)
+        self.assertEqual(t.body[1].value.values[2].col_offset, 11)
        # NOTE: the following lineno information and col_offset is correct for
        # expressions within FormattedValues.
        binop = t.body[1].value.values[1].value
@ -366,13 +366,13 @@ a = f'''
        self.assertEqual(t.body[0].lineno, 2)
        self.assertEqual(t.body[0].value.lineno, 2)
        self.assertEqual(t.body[0].value.values[0].lineno, 2)
-        self.assertEqual(t.body[0].value.values[1].lineno, 2)
-        self.assertEqual(t.body[0].value.values[2].lineno, 2)
+        self.assertEqual(t.body[0].value.values[1].lineno, 3)
+        self.assertEqual(t.body[0].value.values[2].lineno, 3)
        self.assertEqual(t.body[0].col_offset, 0)
        self.assertEqual(t.body[0].value.col_offset, 4)
-        self.assertEqual(t.body[0].value.values[0].col_offset, 4)
-        self.assertEqual(t.body[0].value.values[1].col_offset, 4)
-        self.assertEqual(t.body[0].value.values[2].col_offset, 4)
+        self.assertEqual(t.body[0].value.values[0].col_offset, 8)
+        self.assertEqual(t.body[0].value.values[1].col_offset, 10)
+        self.assertEqual(t.body[0].value.values[2].col_offset, 17)
        # Check {blech}
        self.assertEqual(t.body[0].value.values[1].value.lineno, 3)
        self.assertEqual(t.body[0].value.values[1].value.end_lineno, 3)
@ -387,6 +387,20 @@ x = (
        t = ast.parse(expr)
        self.assertEqual(type(t), ast.Module)
        self.assertEqual(len(t.body), 1)
+        # check the joinedstr location
+        joinedstr = t.body[0].value
+        self.assertEqual(type(joinedstr), ast.JoinedStr)
+        self.assertEqual(joinedstr.lineno, 3)
+        self.assertEqual(joinedstr.end_lineno, 3)
+        self.assertEqual(joinedstr.col_offset, 4)
+        self.assertEqual(joinedstr.end_col_offset, 17)
+        # check the formatted value location
+        fv = t.body[0].value.values[1]
+        self.assertEqual(type(fv), ast.FormattedValue)
+        self.assertEqual(fv.lineno, 3)
+        self.assertEqual(fv.end_lineno, 3)
+        self.assertEqual(fv.col_offset, 7)
+        self.assertEqual(fv.end_col_offset, 16)
        # check the test(t) location
        call = t.body[0].value.values[1].value
        self.assertEqual(type(call), ast.Call)
@ -397,6 +411,50 @@ x = (

        expr = """
 x = (
+    u'wat',
+    u"wat",
+    b'wat',
+    b"wat",
+    f'wat',
+    f"wat",
+)
+
+y = (
+    u'''wat''',
+    u\"\"\"wat\"\"\",
+    b'''wat''',
+    b\"\"\"wat\"\"\",
+    f'''wat''',
+    f\"\"\"wat\"\"\",
+)
+        """
+        t = ast.parse(expr)
+        self.assertEqual(type(t), ast.Module)
+        self.assertEqual(len(t.body), 2)
+        x, y = t.body
+
+        # Check the single quoted string offsets first.
+        offsets = [
+            (elt.col_offset, elt.end_col_offset)
+            for elt in x.value.elts
+        ]
+        self.assertTrue(all(
+            offset == (4, 10)
+            for offset in offsets
+        ))
+
+        # Check the triple quoted string offsets.
+        offsets = [
+            (elt.col_offset, elt.end_col_offset)
+            for elt in y.value.elts
+        ]
+        self.assertTrue(all(
+            offset == (4, 14)
+            for offset in offsets
+        ))
+
+        expr = """
+x = (
        'PERL_MM_OPT', (
            f'wat'
            f'some_string={f(x)} '
@ -415,9 +473,9 @@ x = (
        # check the first wat
        self.assertEqual(type(wat1), ast.Constant)
        self.assertEqual(wat1.lineno, 4)
-        self.assertEqual(wat1.end_lineno, 6)
-        self.assertEqual(wat1.col_offset, 12)
-        self.assertEqual(wat1.end_col_offset, 18)
+        self.assertEqual(wat1.end_lineno, 5)
+        self.assertEqual(wat1.col_offset, 14)
+        self.assertEqual(wat1.end_col_offset, 26)
        # check the call
        call = middle.value
        self.assertEqual(type(call), ast.Call)
@ -427,10 +485,14 @@ x = (
        self.assertEqual(call.end_col_offset, 31)
        # check the second wat
        self.assertEqual(type(wat2), ast.Constant)
-        self.assertEqual(wat2.lineno, 4)
+        self.assertEqual(wat2.lineno, 5)
        self.assertEqual(wat2.end_lineno, 6)
-        self.assertEqual(wat2.col_offset, 12)
-        self.assertEqual(wat2.end_col_offset, 18)
+        self.assertEqual(wat2.col_offset, 32)
+        # wat ends at the offset 17, but the whole f-string
+        # ends at the offset 18 (since the quote is part of the
+        # f-string but not the wat string)
+        self.assertEqual(wat2.end_col_offset, 17)
+        self.assertEqual(fstring.end_col_offset, 18)

    def test_docstring(self):
        def f():
@ -467,7 +529,7 @@ x = (
        self.assertEqual(f' ', ' ')

    def test_unterminated_string(self):
-        self.assertAllRaise(SyntaxError, 'f-string: unterminated string',
+        self.assertAllRaise(SyntaxError, 'unterminated string',
                            [r"""f'{"x'""",
                             r"""f'{"x}'""",
                             r"""f'{("x'""",
@ -475,28 +537,33 @@ x = (
                             ])

    def test_mismatched_parens(self):
-        self.assertAllRaise(SyntaxError, r"f-string: closing parenthesis '\}' "
+        self.assertAllRaise(SyntaxError, r"closing parenthesis '\}' "
                            r"does not match opening parenthesis '\('",
                            ["f'{((}'",
                             ])
-        self.assertAllRaise(SyntaxError, r"f-string: closing parenthesis '\)' "
+        self.assertAllRaise(SyntaxError, r"closing parenthesis '\)' "
                            r"does not match opening parenthesis '\['",
                            ["f'{a[4)}'",
                            ])
-        self.assertAllRaise(SyntaxError, r"f-string: closing parenthesis '\]' "
+        self.assertAllRaise(SyntaxError, r"closing parenthesis '\]' "
                            r"does not match opening parenthesis '\('",
                            ["f'{a(4]}'",
                            ])
-        self.assertAllRaise(SyntaxError, r"f-string: closing parenthesis '\}' "
+        self.assertAllRaise(SyntaxError, r"closing parenthesis '\}' "
                            r"does not match opening parenthesis '\['",
                            ["f'{a[4}'",
                            ])
-        self.assertAllRaise(SyntaxError, r"f-string: closing parenthesis '\}' "
+        self.assertAllRaise(SyntaxError, r"closing parenthesis '\}' "
                            r"does not match opening parenthesis '\('",
                            ["f'{a(4}'",
                            ])
        self.assertRaises(SyntaxError, eval, "f'{" + "("*500 + "}'")

+    def test_fstring_nested_too_deeply(self):
+        self.assertAllRaise(SyntaxError,
+                            "f-string: expressions nested too deeply",
+                            ['f"{1+2:{1+2:{1+1:{1}}}}"'])
+
    def test_double_braces(self):
        self.assertEqual(f'{{', '{')
        self.assertEqual(f'a{{', 'a{')
@ -559,8 +626,14 @@ x = (
        self.assertEqual(f'' '' f'', '')
        self.assertEqual(f'' '' f'' '', '')

-        self.assertAllRaise(SyntaxError, "f-string: expecting '}'",
-                            ["f'{3' f'}'",  # can't concat to get a valid f-string
+        # This is not really [f'{'] + [f'}'] since we treat the inside
+        # of braces as a purely new context, so it is actually f'{ and
+        # then eval('  f') (a valid expression) and then }' which would
+        # constitute a valid f-string.
+        self.assertEqual(f'{' f'}', ' f')
+
+        self.assertAllRaise(SyntaxError, "expecting '}'",
+                            ['''f'{3' f"}"''',  # can't concat to get a valid f-string
                             ])

    def test_comments(self):
@ -618,25 +691,19 @@ x = (
        self.assertEqual(f'{-10:-{"#"}1{0}x}', '      -0xa')
        self.assertEqual(f'{-10:{"-"}#{1}0{"x"}}', '      -0xa')
        self.assertEqual(f'{10:#{3 != {4:5} and width}x}', '       0xa')
+        self.assertEqual(f'result: {value:{width:{0}}.{precision:1}}', 'result:      12.35')

-        self.assertAllRaise(SyntaxError,
-                            """f-string: invalid conversion character 'r{"': """
-                            """expected 's', 'r', or 'a'""",
+        self.assertAllRaise(SyntaxError, "f-string: expecting ':' or '}'",
                            ["""f'{"s"!r{":10"}}'""",
-
                             # This looks like a nested format spec.
                             ])

-        self.assertAllRaise(SyntaxError, "f-string: invalid syntax",
+        self.assertAllRaise(SyntaxError,
+                            "f-string: expecting a valid expression after '{'",
                            [# Invalid syntax inside a nested spec.
                             "f'{4:{/5}}'",
                             ])

-        self.assertAllRaise(SyntaxError, "f-string: expressions nested too deeply",
-                            [# Can't nest format specifiers.
-                             "f'result: {value:{width:{0}}.{precision:1}}'",
-                             ])
-
        self.assertAllRaise(SyntaxError, 'f-string: invalid conversion character',
                            [# No expansion inside conversion or for
                             #  the : or ! itself.
@ -655,7 +722,8 @@ x = (
        self.assertEqual(f'{x} {x}', '1 2')

    def test_missing_expression(self):
-        self.assertAllRaise(SyntaxError, 'f-string: empty expression not allowed',
+        self.assertAllRaise(SyntaxError,
+                            "f-string: valid expression required before '}'",
                            ["f'{}'",
                             "f'{ }'"
                             "f' {} '",
@ -667,8 +735,8 @@ x = (
                             "f'''{\t\f\r\n}'''",
                             ])

-        # Different error messages are raised when a specifier ('!', ':' or '=') is used after an empty expression
-        self.assertAllRaise(SyntaxError, "f-string: expression required before '!'",
+        self.assertAllRaise(SyntaxError,
+                            "f-string: valid expression required before '!'",
                            ["f'{!r}'",
                             "f'{ !r}'",
                             "f'{!}'",
@ -689,7 +757,8 @@ x = (
                             "f'{ !xr:a}'",
                             ])

-        self.assertAllRaise(SyntaxError, "f-string: expression required before ':'",
+        self.assertAllRaise(SyntaxError,
+                            "f-string: valid expression required before ':'",
                            ["f'{:}'",
                             "f'{ :!}'",
                             "f'{:2}'",
@ -697,7 +766,8 @@ x = (
                             "f'{:'",
                             ])

-        self.assertAllRaise(SyntaxError, "f-string: expression required before '='",
+        self.assertAllRaise(SyntaxError,
+                            "f-string: valid expression required before '='",
                            ["f'{=}'",
                             "f'{ =}'",
                             "f'{ =:}'",
@ -715,24 +785,18 @@ x = (
    def test_parens_in_expressions(self):
        self.assertEqual(f'{3,}', '(3,)')

-        # Add these because when an expression is evaluated, parens
-        #  are added around it. But we shouldn't go from an invalid
-        #  expression to a valid one. The added parens are just
-        #  supposed to allow whitespace (including newlines).
-        self.assertAllRaise(SyntaxError, 'f-string: invalid syntax',
+        self.assertAllRaise(SyntaxError,
+                            "f-string: expecting a valid expression after '{'",
                            ["f'{,}'",
-                             "f'{,}'",  # this is (,), which is an error
                             ])

        self.assertAllRaise(SyntaxError, r"f-string: unmatched '\)'",
                            ["f'{3)+(4}'",
                             ])

-        self.assertAllRaise(SyntaxError, 'unterminated string literal',
-                            ["f'{\n}'",
-                             ])
    def test_newlines_before_syntax_error(self):
-        self.assertAllRaise(SyntaxError, "invalid syntax",
+        self.assertAllRaise(SyntaxError,
+                            "f-string: expecting a valid expression after '{'",
                ["f'{.}'", "\nf'{.}'", "\n\nf'{.}'"])

    def test_backslashes_in_string_part(self):
@ -776,7 +840,7 @@ x = (
        self.assertEqual(f'2\x203', '2 3')
        self.assertEqual(f'\x203', ' 3')

-        with self.assertWarns(SyntaxWarning):  # invalid escape sequence
+        with self.assertWarns(DeprecationWarning):  # invalid escape sequence
            value = eval(r"f'\{6*7}'")
        self.assertEqual(value, '\\42')
        self.assertEqual(f'\\{6*7}', '\\42')
@ -809,18 +873,40 @@ x = (
                             r"'\N{GREEK CAPITAL LETTER DELTA'",
                             ])

-    def test_no_backslashes_in_expression_part(self):
-        self.assertAllRaise(SyntaxError, 'f-string expression part cannot include a backslash',
-                            [r"f'{\'a\'}'",
-                             r"f'{\t3}'",
-                             r"f'{\}'",
-                             r"rf'{\'a\'}'",
-                             r"rf'{\t3}'",
-                             r"rf'{\}'",
-                             r"""rf'{"\N{LEFT CURLY BRACKET}"}'""",
-                             r"f'{\n}'",
+    def test_backslashes_in_expression_part(self):
+        self.assertEqual(f"{(
+                        1 +
+                        2
+        )}", "3")
+
+        self.assertEqual("\N{LEFT CURLY BRACKET}", '{')
+        self.assertEqual(f'{"\N{LEFT CURLY BRACKET}"}', '{')
+        self.assertEqual(rf'{"\N{LEFT CURLY BRACKET}"}', '{')
+
+        self.assertAllRaise(SyntaxError,
+                            "f-string: valid expression required before '}'",
+                            ["f'{\n}'",
                             ])

+    def test_invalid_backslashes_inside_fstring_context(self):
+        # All of these variations are invalid python syntax,
+        # so they are also invalid in f-strings as well.
+        cases = [
+            formatting.format(expr=expr)
+            for formatting in [
+                "{expr}",
+                "f'{{{expr}}}'",
+                "rf'{{{expr}}}'",
+            ]
+            for expr in [
+                r"\'a\'",
+                r"\t3",
+                r"\\"[0],
+            ]
+        ]
+        self.assertAllRaise(SyntaxError, 'unexpected character after line continuation',
+                            cases)
+
    def test_no_escapes_for_braces(self):
        """
        Only literal curly braces begin an expression.
@ -844,10 +930,68 @@ x = (

        # lambda doesn't work without parens, because the colon
        # makes the parser think it's a format_spec
-        self.assertAllRaise(SyntaxError, 'f-string: invalid syntax',
+        # emit warning if we can match a format_spec
+        self.assertAllRaise(SyntaxError,
+                            "f-string: lambda expressions are not allowed "
+                            "without parentheses",
                            ["f'{lambda x:x}'",
+                             "f'{lambda :x}'",
+                             "f'{lambda *arg, :x}'",
+                             "f'{1, lambda:x}'",
                             ])

+        # but don't emit the paren warning in general cases
+        self.assertAllRaise(SyntaxError,
+                            "f-string: expecting a valid expression after '{'",
+                            ["f'{lambda x:}'",
+                             "f'{lambda :}'",
+                             "f'{+ lambda:None}'",
+                             ])
+
+    def test_valid_prefixes(self):
+        self.assertEqual(F'{1}', "1")
+        self.assertEqual(FR'{2}', "2")
+        self.assertEqual(fR'{3}', "3")
+
+    def test_roundtrip_raw_quotes(self):
+        self.assertEqual(fr"\'", "\\'")
+        self.assertEqual(fr'\"', '\\"')
+        self.assertEqual(fr'\"\'', '\\"\\\'')
+        self.assertEqual(fr'\'\"', '\\\'\\"')
+        self.assertEqual(fr'\"\'\"', '\\"\\\'\\"')
+        self.assertEqual(fr'\'\"\'', '\\\'\\"\\\'')
+        self.assertEqual(fr'\"\'\"\'', '\\"\\\'\\"\\\'')
+
+    def test_fstring_backslash_before_double_bracket(self):
+        self.assertEqual(f'\{{\}}', '\\{\\}')
+        self.assertEqual(f'\{{', '\\{')
+        self.assertEqual(f'\{{{1+1}', '\\{2')
+        self.assertEqual(f'\}}{1+1}', '\\}2')
+        self.assertEqual(f'{1+1}\}}', '2\\}')
+        self.assertEqual(fr'\{{\}}', '\\{\\}')
+        self.assertEqual(fr'\{{', '\\{')
+        self.assertEqual(fr'\{{{1+1}', '\\{2')
+        self.assertEqual(fr'\}}{1+1}', '\\}2')
+        self.assertEqual(fr'{1+1}\}}', '2\\}')
+
+    def test_fstring_backslash_prefix_raw(self):
+        self.assertEqual(f'\\', '\\')
+        self.assertEqual(f'\\\\', '\\\\')
+        self.assertEqual(fr'\\', r'\\')
+        self.assertEqual(fr'\\\\', r'\\\\')
+        self.assertEqual(rf'\\', r'\\')
+        self.assertEqual(rf'\\\\', r'\\\\')
+        self.assertEqual(Rf'\\', R'\\')
+        self.assertEqual(Rf'\\\\', R'\\\\')
+        self.assertEqual(fR'\\', R'\\')
+        self.assertEqual(fR'\\\\', R'\\\\')
+        self.assertEqual(FR'\\', R'\\')
+        self.assertEqual(FR'\\\\', R'\\\\')
+
+    def test_fstring_format_spec_greedy_matching(self):
+        self.assertEqual(f"{1:}}}", "1}")
+        self.assertEqual(f"{1:>3{5}}}}", "                                  1}")
+
    def test_yield(self):
        # Not terribly useful, but make sure the yield turns
        #  a function into a generator
@ -1037,6 +1181,11 @@ x = (
        self.assertEqual(f'{"a"!r}', "'a'")
        self.assertEqual(f'{"a"!a}', "'a'")

+        # Conversions can have trailing whitespace after them since it
+        # does not provide any significance
+        self.assertEqual(f"{3!s  }", "3")
+        self.assertEqual(f'{3.14!s  :10.10}', '3.14      ')
+
        # Not a conversion.
        self.assertEqual(f'{"a!r"}', "a!r")

@ -1049,16 +1198,27 @@ x = (
                             "f'{3!g'",
                             ])

-        self.assertAllRaise(SyntaxError, 'f-string: missed conversion character',
+        self.assertAllRaise(SyntaxError, 'f-string: missing conversion character',
                            ["f'{3!}'",
                             "f'{3!:'",
                             "f'{3!:}'",
                             ])

-        for conv in 'g', 'A', '3', 'G', '!', ' s', 's ', ' s ', 'ä', 'ɐ', 'ª':
+        for conv_identifier in 'g', 'A', 'G', 'ä', 'ɐ':
            self.assertAllRaise(SyntaxError,
                                "f-string: invalid conversion character %r: "
-                                "expected 's', 'r', or 'a'" % conv,
+                                "expected 's', 'r', or 'a'" % conv_identifier,
+                                ["f'{3!" + conv_identifier + "}'"])
+
+        for conv_non_identifier in '3', '!':
+            self.assertAllRaise(SyntaxError,
+                                "f-string: invalid conversion character",
+                                ["f'{3!" + conv_non_identifier + "}'"])
+
+        for conv in ' s', ' s ':
+            self.assertAllRaise(SyntaxError,
+                                "f-string: conversion type must come right after the"
+                                " exclamanation mark",
                                ["f'{3!" + conv + "}'"])

        self.assertAllRaise(SyntaxError,
@ -1097,8 +1257,7 @@ x = (
                             ])

        self.assertAllRaise(SyntaxError, "f-string: expecting '}'",
-                            ["f'{3:{{>10}'",
-                             "f'{3'",
+                            ["f'{3'",
                             "f'{3!'",
                             "f'{3:'",
                             "f'{3!s'",
@ -1111,11 +1270,14 @@ x = (
                             "f'{{{'",
                             "f'{{}}{'",
                             "f'{'",
-                             "f'x{<'",  # See bpo-46762.
-                             "f'x{>'",
                             "f'{i='",  # See gh-93418.
                             ])

+        self.assertAllRaise(SyntaxError,
+                            "f-string: expecting a valid expression after '{'",
+                            ["f'{3:{{>10}'",
+                             ])
+
        # But these are just normal strings.
        self.assertEqual(f'{"{"}', '{')
        self.assertEqual(f'{"}"}', '}')
@ -1314,6 +1476,7 @@ x = (
        self.assertEqual(f'X{x  =}Y', 'Xx  ='+repr(x)+'Y')
        self.assertEqual(f'X{x=  }Y', 'Xx=  '+repr(x)+'Y')
        self.assertEqual(f'X{x  =  }Y', 'Xx  =  '+repr(x)+'Y')
+        self.assertEqual(f"sadsd {1 + 1 =  :{1 + 1:1d}f}", "sadsd 1 + 1 =  2.000000")

        # These next lines contains tabs.  Backslash escapes don't
        # work in f-strings.
@ -1335,7 +1498,8 @@ x = (
        self.assertEqual(x, 10)

    def test_invalid_syntax_error_message(self):
-        with self.assertRaisesRegex(SyntaxError, "f-string: invalid syntax"):
+        with self.assertRaisesRegex(SyntaxError,
+                                    "f-string: expecting '=', or '!', or ':', or '}'"):
            compile("f'{a $ b}'", "?", "exec")

    def test_with_two_commas_in_format_specifier(self):
@ -1359,12 +1523,11 @@ x = (
            f'{1:_,}'

    def test_syntax_error_for_starred_expressions(self):
-        error_msg = re.escape("cannot use starred expression here")
-        with self.assertRaisesRegex(SyntaxError, error_msg):
+        with self.assertRaisesRegex(SyntaxError, "can't use starred expression here"):
            compile("f'{*a}'", "?", "exec")

-        error_msg = re.escape("cannot use double starred expression here")
-        with self.assertRaisesRegex(SyntaxError, error_msg):
+        with self.assertRaisesRegex(SyntaxError,
+                                    "f-string: expecting a valid expression after '{'"):
            compile("f'{**a}'", "?", "exec")

 if __name__ == '__main__':
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@ -1625,6 +1625,10 @@ class TestRoundtrip(TestCase):
        # 7 more testfiles fail.  Remove them also until the failure is diagnosed.

        testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
+
+        # TODO: Remove this once we can unparse PEP 701 syntax
+        testfiles.remove(os.path.join(tempdir, "test_fstring.py"))
+
        for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
            testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)

@ -1937,25 +1941,39 @@ c"""', """\
    """)

        self.check_tokenize('f"abc"', """\
-    STRING     'f"abc"'      (1, 0) (1, 6)
+    FSTRING_START 'f"'          (1, 0) (1, 2)
+    FSTRING_MIDDLE 'abc'         (1, 2) (1, 5)
+    FSTRING_END '"'           (1, 5) (1, 6)
    """)

        self.check_tokenize('fR"a{b}c"', """\
-    STRING     'fR"a{b}c"'   (1, 0) (1, 9)
+    FSTRING_START 'fR"'         (1, 0) (1, 3)
+    FSTRING_MIDDLE 'a'           (1, 3) (1, 4)
+    LBRACE     '{'           (1, 4) (1, 5)
+    NAME       'b'           (1, 5) (1, 6)
+    RBRACE     '}'           (1, 6) (1, 7)
+    FSTRING_MIDDLE 'c'           (1, 7) (1, 8)
+    FSTRING_END '"'           (1, 8) (1, 9)
    """)

        self.check_tokenize('f"""abc"""', """\
-    STRING     'f\"\"\"abc\"\"\"'  (1, 0) (1, 10)
+    FSTRING_START 'f\"""'        (1, 0) (1, 4)
+    FSTRING_MIDDLE 'abc'         (1, 4) (1, 7)
+    FSTRING_END '\"""'         (1, 7) (1, 10)
    """)

        self.check_tokenize(r'f"abc\
 def"', """\
-    STRING     'f"abc\\\\\\ndef"' (1, 0) (2, 4)
+    FSTRING_START \'f"\'          (1, 0) (1, 2)
+    FSTRING_MIDDLE 'abc\\\\\\ndef'  (1, 2) (2, 3)
+    FSTRING_END '"'           (2, 3) (2, 4)
    """)

        self.check_tokenize(r'Rf"abc\
 def"', """\
-    STRING     'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
+    FSTRING_START 'Rf"'         (1, 0) (1, 3)
+    FSTRING_MIDDLE 'abc\\\\\\ndef'  (1, 3) (2, 3)
+    FSTRING_END '"'           (2, 3) (2, 4)
    """)

    def test_function(self):
--- a/Lib/test/test_type_comments.py
+++ b/Lib/test/test_type_comments.py
@ -272,7 +272,7 @@ class TypeCommentTests(unittest.TestCase):
            pass

    def test_fstring(self):
-        for tree in self.parse_all(fstring, minver=6):
+        for tree in self.parse_all(fstring):
            pass

    def test_underscorednumber(self):
--- a/Lib/token.py
+++ b/Lib/token.py
@ -57,18 +57,22 @@ ATEQUAL = 50
 RARROW = 51
 ELLIPSIS = 52
 COLONEQUAL = 53
-OP = 54
-AWAIT = 55
-ASYNC = 56
-TYPE_IGNORE = 57
-TYPE_COMMENT = 58
-SOFT_KEYWORD = 59
+EXCLAMATION = 54
+OP = 55
+AWAIT = 56
+ASYNC = 57
+TYPE_IGNORE = 58
+TYPE_COMMENT = 59
+SOFT_KEYWORD = 60
+FSTRING_START = 61
+FSTRING_MIDDLE = 62
+FSTRING_END = 63
 # These aren't used by the C tokenizer but are needed for tokenize.py
-ERRORTOKEN = 60
-COMMENT = 61
-NL = 62
-ENCODING = 63
-N_TOKENS = 64
+ERRORTOKEN = 64
+COMMENT = 65
+NL = 66
+ENCODING = 67
+N_TOKENS = 68
 # Special definitions for cooperation with parser
 NT_OFFSET = 256

@ -78,6 +82,7 @@ tok_name = {value: name
 __all__.extend(tok_name.values())

 EXACT_TOKEN_TYPES = {
+    '!': EXCLAMATION,
    '!=': NOTEQUAL,
    '%': PERCENT,
    '%=': PERCENTEQUAL,
--- a/Builtins/2023-04-17-16-00-32.gh-issue-102856.UunJ7y.rst
+++ b/Builtins/2023-04-17-16-00-32.gh-issue-102856.UunJ7y.rst
@ -0,0 +1 @@
+Implement the required C tokenizer changes for PEP 701. Patch by Pablo Galindo Salgado, Lysandros Nikolaou, Batuhan Taskaya, Marta Gómez Macías and sunmy2019.
--- a/Parser/action_helpers.c
+++ b/Parser/action_helpers.c
@ -1,6 +1,7 @@
 #include <Python.h>

 #include "pegen.h"
+#include "tokenizer.h"
 #include "string_parser.h"
 #include "pycore_runtime.h"         // _PyRuntime

@ -853,96 +854,6 @@ _PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
    return new_seq;
 }

-expr_ty
-_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings)
-{
-    Py_ssize_t len = asdl_seq_LEN(strings);
-    assert(len > 0);
-
-    Token *first = asdl_seq_GET_UNTYPED(strings, 0);
-    Token *last = asdl_seq_GET_UNTYPED(strings, len - 1);
-
-    int bytesmode = 0;
-    PyObject *bytes_str = NULL;
-
-    FstringParser state;
-    _PyPegen_FstringParser_Init(&state);
-
-    for (Py_ssize_t i = 0; i < len; i++) {
-        Token *t = asdl_seq_GET_UNTYPED(strings, i);
-
-        int this_bytesmode;
-        int this_rawmode;
-        PyObject *s;
-        const char *fstr;
-        Py_ssize_t fstrlen = -1;
-
-        if (_PyPegen_parsestr(p, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen, t) != 0) {
-            goto error;
-        }
-
-        /* Check that we are not mixing bytes with unicode. */
-        if (i != 0 && bytesmode != this_bytesmode) {
-            RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
-            Py_XDECREF(s);
-            goto error;
-        }
-        bytesmode = this_bytesmode;
-
-        if (fstr != NULL) {
-            assert(s == NULL && !bytesmode);
-
-            int result = _PyPegen_FstringParser_ConcatFstring(p, &state, &fstr, fstr + fstrlen,
-                                                     this_rawmode, 0, first, t, last);
-            if (result < 0) {
-                goto error;
-            }
-        }
-        else {
-            /* String or byte string. */
-            assert(s != NULL && fstr == NULL);
-            assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
-
-            if (bytesmode) {
-                if (i == 0) {
-                    bytes_str = s;
-                }
-                else {
-                    PyBytes_ConcatAndDel(&bytes_str, s);
-                    if (!bytes_str) {
-                        goto error;
-                    }
-                }
-            }
-            else {
-                /* This is a regular string. Concatenate it. */
-                if (_PyPegen_FstringParser_ConcatAndDel(&state, s) < 0) {
-                    goto error;
-                }
-            }
-        }
-    }
-
-    if (bytesmode) {
-        if (_PyArena_AddPyObject(p->arena, bytes_str) < 0) {
-            goto error;
-        }
-        return _PyAST_Constant(bytes_str, NULL, first->lineno,
-                               first->col_offset, last->end_lineno,
-                               last->end_col_offset, p->arena);
-    }
-
-    return _PyPegen_FstringParser_Finish(p, &state, first, last);
-
-error:
-    Py_XDECREF(bytes_str);
-    _PyPegen_FstringParser_Dealloc(&state);
-    if (PyErr_Occurred()) {
-        _Pypegen_raise_decode_error(p);
-    }
-    return NULL;
-}
-
 expr_ty
 _PyPegen_ensure_imaginary(Parser *p, expr_ty exp)
 {
@ -1054,6 +965,18 @@ _PyPegen_check_legacy_stmt(Parser *p, expr_ty name) {
    return 0;
 }

+expr_ty
+_PyPegen_check_fstring_conversion(Parser *p, Token* symbol, expr_ty conv) {
+    if (symbol->lineno != conv->lineno || symbol->end_col_offset != conv->col_offset) {
+        return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
+            symbol, conv,
+            "f-string: conversion type must come right after the exclamanation mark"
+        );
+    }
+    return conv;
+}
+
+
 const char *
 _PyPegen_get_expr_name(expr_ty e)
 {
@ -1271,3 +1194,439 @@ _PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq
        "Generator expression must be parenthesized"
    );
 }
+
+// Fstring stuff
+
+static expr_ty
+decode_fstring_buffer(Parser *p, int lineno, int col_offset, int end_lineno,
+                      int end_col_offset)
+{
+    tokenizer_mode *tok_mode = &(p->tok->tok_mode_stack[p->tok->tok_mode_stack_index]);
+    assert(tok_mode->last_expr_buffer != NULL);
+    assert(tok_mode->last_expr_size >= 0 && tok_mode->last_expr_end >= 0);
+
+    PyObject *res = PyUnicode_DecodeUTF8(
+        tok_mode->last_expr_buffer,
+        tok_mode->last_expr_size - tok_mode->last_expr_end,
+        NULL
+    );
+    if (!res || _PyArena_AddPyObject(p->arena, res) < 0) {
+        Py_XDECREF(res);
+        return NULL;
+    }
+
+    return _PyAST_Constant(res, NULL, lineno, col_offset, end_lineno, end_col_offset, p->arena);
+}
+
+static expr_ty
+_PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant) {
+    assert(PyUnicode_CheckExact(constant->v.Constant.value));
+
+    const char* bstr = PyUnicode_AsUTF8(constant->v.Constant.value);
+    if (bstr == NULL) {
+        return NULL;
+    }
+
+    size_t len;
+    if (strcmp(bstr, "{{") == 0 || strcmp(bstr, "}}") == 0) {
+        len = 1;
+    } else {
+        len = strlen(bstr);
+    }
+
+    is_raw = is_raw || strchr(bstr, '\\') == NULL;
+    PyObject *str = _PyPegen_decode_string(p, is_raw, bstr, len, NULL);
+    if (str == NULL) {
+        _Pypegen_raise_decode_error(p);
+        return NULL;
+    }
+    if (_PyArena_AddPyObject(p->arena, str) < 0) {
+        Py_DECREF(str);
+        return NULL;
+    }
+    return _PyAST_Constant(str, NULL, constant->lineno, constant->col_offset,
+                           constant->end_lineno, constant->end_col_offset,
+                           p->arena);
+}
+
+static asdl_expr_seq *
+unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions)
+{
+    /* The parser might put multiple f-string values into an individual
+     * JoinedStr node at the top level due to stuff like f-string debugging
+     * expressions. This function flattens those and promotes them to the
+     * upper level. Only simplifies AST, but the compiler already takes care
+     * of the regular output, so this is not necessary if you are not going
+     * to expose the output AST to Python level. */
+
+    Py_ssize_t i, req_size, raw_size;
+
+    req_size = raw_size = asdl_seq_LEN(raw_expressions);
+    expr_ty expr;
+    for (i = 0; i < raw_size; i++) {
+        expr = asdl_seq_GET(raw_expressions, i);
+        if (expr->kind == JoinedStr_kind) {
+            req_size += asdl_seq_LEN(expr->v.JoinedStr.values) - 1;
+        }
+    }
+
+    asdl_expr_seq *expressions = _Py_asdl_expr_seq_new(req_size, p->arena);
+
+    Py_ssize_t raw_index, req_index = 0;
+    for (raw_index = 0; raw_index < raw_size; raw_index++) {
+        expr = asdl_seq_GET(raw_expressions, raw_index);
+        if (expr->kind == JoinedStr_kind) {
+            asdl_expr_seq *values = expr->v.JoinedStr.values;
+            for (Py_ssize_t n = 0; n < asdl_seq_LEN(values); n++) {
+                asdl_seq_SET(expressions, req_index, asdl_seq_GET(values, n));
+                req_index++;
+            }
+        } else {
+            asdl_seq_SET(expressions, req_index, expr);
+            req_index++;
+        }
+    }
+    return expressions;
+}
+
+expr_ty
+_PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b) {
+    asdl_expr_seq *expr = unpack_top_level_joined_strs(p, raw_expressions);
+    Py_ssize_t n_items = asdl_seq_LEN(expr);
+
+    const char* quote_str = PyBytes_AsString(a->bytes);
+    if (quote_str == NULL) {
+        return NULL;
+    }
+    int is_raw = strpbrk(quote_str, "rR") != NULL;
+
+    asdl_expr_seq *seq = _Py_asdl_expr_seq_new(n_items, p->arena);
+    if (seq == NULL) {
+        return NULL;
+    }
+
+    Py_ssize_t index = 0;
+    for (Py_ssize_t i = 0; i < n_items; i++) {
+        expr_ty item = asdl_seq_GET(expr, i);
+        if (item->kind == Constant_kind) {
+            item = _PyPegen_decode_fstring_part(p, is_raw, item);
+            if (item == NULL) {
+                return NULL;
+            }
+
+            /* Tokenizer emits string parts even when the underlying string
+            might become an empty value (e.g. FSTRING_MIDDLE with the value \\n)
+            so we need to check for them and simplify it here. */
+            if (PyUnicode_CheckExact(item->v.Constant.value)
+                && PyUnicode_GET_LENGTH(item->v.Constant.value) == 0) {
+                continue;
+            }
+        }
+        asdl_seq_SET(seq, index++, item);
+    }
+
+    asdl_expr_seq *resized_exprs;
+    if (index != n_items) {
+        resized_exprs = _Py_asdl_expr_seq_new(index, p->arena);
+        if (resized_exprs == NULL) {
+            return NULL;
+        }
+        for (Py_ssize_t i = 0; i < index; i++) {
+            asdl_seq_SET(resized_exprs, i, asdl_seq_GET(seq, i));
+        }
+    }
+    else {
+        resized_exprs = seq;
+    }
+
+    return _PyAST_JoinedStr(resized_exprs, a->lineno, a->col_offset,
+                            b->end_lineno, b->end_col_offset,
+                            p->arena);
+}
+
+expr_ty _PyPegen_constant_from_token(Parser* p, Token* tok) {
+    char* bstr = PyBytes_AsString(tok->bytes);
+    if (bstr == NULL) {
+        return NULL;
+    }
+    PyObject* str = PyUnicode_FromString(bstr);
+    if (str == NULL) {
+        return NULL;
+    }
+    if (_PyArena_AddPyObject(p->arena, str) < 0) {
+        Py_DECREF(str);
+        return NULL;
+    }
+    return _PyAST_Constant(str, NULL, tok->lineno, tok->col_offset,
+                           tok->end_lineno, tok->end_col_offset,
+                           p->arena);
+}
+
+expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok) {
+    char* the_str = PyBytes_AsString(tok->bytes);
+    if (the_str == NULL) {
+        return NULL;
+    }
+    PyObject *s = _PyPegen_parse_string(p, tok);
+    if (s == NULL) {
+        _Pypegen_raise_decode_error(p);
+        return NULL;
+    }
+    if (_PyArena_AddPyObject(p->arena, s) < 0) {
+        Py_DECREF(s);
+        return NULL;
+    }
+    PyObject *kind = NULL;
+    if (the_str && the_str[0] == 'u') {
+        kind = _PyPegen_new_identifier(p, "u");
+        if (kind == NULL) {
+            return NULL;
+        }
+    }
+    return _PyAST_Constant(s, kind, tok->lineno, tok->col_offset, tok->end_lineno, tok->end_col_offset, p->arena);
+}
+
+expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, expr_ty conversion,
+                                 expr_ty format, int lineno, int col_offset, int end_lineno, int end_col_offset,
+                                 PyArena *arena) {
+    int conversion_val = -1;
+    if (conversion != NULL) {
+        assert(conversion->kind == Name_kind);
+        Py_UCS4 first = PyUnicode_READ_CHAR(conversion->v.Name.id, 0);
+
+        if (PyUnicode_GET_LENGTH(conversion->v.Name.id) > 1 ||
+            !(first == 's' || first == 'r' || first == 'a')) {
+            RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conversion,
+                                              "f-string: invalid conversion character %R: expected 's', 'r', or 'a'",
+                                              conversion->v.Name.id);
+            return NULL;
+        }
+
+        conversion_val = Py_SAFE_DOWNCAST(first, Py_UCS4, int);
+    }
+    else if (debug && !format) {
+        /* If no conversion is specified, use !r for debug expressions */
+        conversion_val = (int)'r';
+    }
+
+    expr_ty formatted_value = _PyAST_FormattedValue(
+        expression, conversion_val, format,
+        lineno, col_offset, end_lineno,
+        end_col_offset, arena
+    );
+
+    if (debug) {
+        /* Find the non whitespace token after the "=" */
+        int debug_end_line, debug_end_offset;
+
+        if (conversion) {
+            debug_end_line = conversion->lineno;
+            debug_end_offset = conversion->col_offset;
+        }
+        else if (format) {
+            debug_end_line = format->lineno;
+            debug_end_offset = format->col_offset + 1; // HACK: ??
+        }
+        else {
+            debug_end_line = end_lineno;
+            debug_end_offset = end_col_offset;
+        }
+
+        expr_ty debug_text = decode_fstring_buffer(p, lineno, col_offset + 1,
+                                                   debug_end_line, debug_end_offset - 1);
+        if (!debug_text) {
+            return NULL;
+        }
+
+        asdl_expr_seq *values = _Py_asdl_expr_seq_new(2, arena);
+        asdl_seq_SET(values, 0, debug_text);
+        asdl_seq_SET(values, 1, formatted_value);
+        return _PyAST_JoinedStr(values, lineno, col_offset, debug_end_line, debug_end_offset, p->arena);
+    }
+    else {
+        return formatted_value;
+    }
+}
+
+expr_ty
+_PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
+                             int lineno, int col_offset, int end_lineno,
+                             int end_col_offset, PyArena *arena)
+{
+    Py_ssize_t len = asdl_seq_LEN(strings);
+    assert(len > 0);
+
+    int f_string_found = 0;
+    int unicode_string_found = 0;
+    int bytes_found = 0;
+
+    Py_ssize_t i = 0;
+    Py_ssize_t n_flattened_elements = 0;
+    for (i = 0; i < len; i++) {
+        expr_ty elem = asdl_seq_GET(strings, i);
+        if (elem->kind == Constant_kind) {
+            if (PyBytes_CheckExact(elem->v.Constant.value)) {
+                bytes_found = 1;
+            } else {
+                unicode_string_found = 1;
+            }
+            n_flattened_elements++;
+        } else {
+            n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values);
+            f_string_found = 1;
+        }
+    }
+
+    if ((unicode_string_found || f_string_found) && bytes_found) {
+        RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
+        return NULL;
+    }
+
+    if (bytes_found) {
+        PyObject* res = PyBytes_FromString("");
+
+        /* Bytes literals never get a kind, but just for consistency
+           since they are represented as Constant nodes, we'll mirror
+           the same behavior as unicode strings for determining the
+           kind. */
+        PyObject* kind = asdl_seq_GET(strings, 0)->v.Constant.kind;
+        for (i = 0; i < len; i++) {
+            expr_ty elem = asdl_seq_GET(strings, i);
+            PyBytes_Concat(&res, elem->v.Constant.value);
+        }
+        if (!res || _PyArena_AddPyObject(arena, res) < 0) {
+            Py_XDECREF(res);
+            return NULL;
+        }
+        return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, end_col_offset, p->arena);
+    }
+
+    if (!f_string_found && len == 1) {
+        return asdl_seq_GET(strings, 0);
+    }
+
+    asdl_expr_seq* flattened = _Py_asdl_expr_seq_new(n_flattened_elements, p->arena);
+    if (flattened == NULL) {
+        return NULL;
+    }
+
+    /* build flattened list */
+    Py_ssize_t current_pos = 0;
+    Py_ssize_t j = 0;
+    for (i = 0; i < len; i++) {
+        expr_ty elem = asdl_seq_GET(strings, i);
+        if (elem->kind == Constant_kind) {
+            asdl_seq_SET(flattened, current_pos++, elem);
+        } else {
+            for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
+                expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j);
+                if (subvalue == NULL) {
+                    return NULL;
+                }
+                asdl_seq_SET(flattened, current_pos++, subvalue);
+            }
+        }
+    }
+
+    /* calculate folded element count */
+    Py_ssize_t n_elements = 0;
+    int prev_is_constant = 0;
+    for (i = 0; i < n_flattened_elements; i++) {
+        expr_ty elem = asdl_seq_GET(flattened, i);
+
+        /* The concatenation of a FormattedValue and an empty Contant should
+           lead to the FormattedValue itself. Thus, we will not take any empty
+           constants into account, just as in `_PyPegen_joined_str` */
+        if (f_string_found && elem->kind == Constant_kind &&
+            PyUnicode_CheckExact(elem->v.Constant.value) &&
+            PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0)
+            continue;
+
+        if (!prev_is_constant || elem->kind != Constant_kind) {
+            n_elements++;
+        }
+        prev_is_constant = elem->kind == Constant_kind;
+    }
+
+    asdl_expr_seq* values = _Py_asdl_expr_seq_new(n_elements, p->arena);
+    if (values == NULL) {
+        return NULL;
+    }
+
+    /* build folded list */
+    _PyUnicodeWriter writer;
+    current_pos = 0;
+    for (i = 0; i < n_flattened_elements; i++) {
+        expr_ty elem = asdl_seq_GET(flattened, i);
+
+        /* if the current elem and the following are constants,
+           fold them and all consequent constants */
+        if (elem->kind == Constant_kind) {
+            if (i + 1 < n_flattened_elements &&
+                asdl_seq_GET(flattened, i + 1)->kind == Constant_kind) {
+                expr_ty first_elem = elem;
+
+                /* When a string is getting concatenated, the kind of the string
+                   is determined by the first string in the concatenation
+                   sequence.
+
+                   u"abc" "def" -> u"abcdef"
+                   "abc" u"abc" ->  "abcabc" */
+                PyObject *kind = elem->v.Constant.kind;
+
+                _PyUnicodeWriter_Init(&writer);
+                expr_ty last_elem = elem;
+                for (j = i; j < n_flattened_elements; j++) {
+                    expr_ty current_elem = asdl_seq_GET(flattened, j);
+                    if (current_elem->kind == Constant_kind) {
+                        if (_PyUnicodeWriter_WriteStr(
+                                &writer, current_elem->v.Constant.value)) {
+                            _PyUnicodeWriter_Dealloc(&writer);
+                            return NULL;
+                        }
+                        last_elem = current_elem;
+                    } else {
+                        break;
+                    }
+                }
+                i = j - 1;
+
+                PyObject *concat_str = _PyUnicodeWriter_Finish(&writer);
+                if (concat_str == NULL) {
+                    _PyUnicodeWriter_Dealloc(&writer);
+                    return NULL;
+                }
+                if (_PyArena_AddPyObject(p->arena, concat_str) < 0) {
+                    Py_DECREF(concat_str);
+                    return NULL;
+                }
+                elem = _PyAST_Constant(concat_str, kind, first_elem->lineno,
+                                       first_elem->col_offset,
+                                       last_elem->end_lineno,
+                                       last_elem->end_col_offset, p->arena);
+                if (elem == NULL) {
+                    return NULL;
+                }
+            }
+
+            /* Drop all empty contanst strings */
+            if (f_string_found &&
+                PyUnicode_CheckExact(elem->v.Constant.value) &&
+                PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0) {
+                continue;
+            }
+        }
+
+        asdl_seq_SET(values, current_pos++, elem);
+    }
+
+    if (!f_string_found) {
+        assert(n_elements == 1);
+        expr_ty elem = asdl_seq_GET(values, 0);
+        assert(elem->kind == Constant_kind);
+        return elem;
+    }
+
+    assert(current_pos == n_elements);
+    return _PyAST_JoinedStr(values, lineno, col_offset, end_lineno, end_col_offset, p->arena);
+}
--- a/Parser/parser.c
+++ b/Parser/parser.c
--- a/Parser/pegen.h
+++ b/Parser/pegen.h
@ -138,6 +138,7 @@ void* _PyPegen_expect_forced_result(Parser *p, void* result, const char* expecte
 Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
 expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
 expr_ty _PyPegen_soft_keyword_token(Parser *p);
+expr_ty _PyPegen_fstring_middle_token(Parser* p);
 Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
 int _PyPegen_fill_token(Parser *p);
 expr_ty _PyPegen_name_token(Parser *p);
@ -155,7 +156,7 @@ typedef enum {
 int _Pypegen_raise_decode_error(Parser *p);
 void _PyPegen_raise_tokenizer_init_error(PyObject *filename);
 int _Pypegen_tokenizer_error(Parser *p);
-void *_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...);
+void *_PyPegen_raise_error(Parser *p, PyObject *errtype, int use_mark, const char *errmsg, ...);
 void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
                                          Py_ssize_t lineno, Py_ssize_t col_offset,
                                          Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
@ -175,8 +176,9 @@ RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype,
    va_end(va);
    return NULL;
 }
-#define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, msg, ##__VA_ARGS__)
-#define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, msg, ##__VA_ARGS__)
+#define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, 0, msg, ##__VA_ARGS__)
+#define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, 0, msg, ##__VA_ARGS__)
+#define RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, 1, msg, ##__VA_ARGS__)
 #define RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, msg, ...) \
    RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (b)->end_lineno, (b)->end_col_offset, msg, ##__VA_ARGS__)
 #define RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, msg, ...) \
@ -308,6 +310,7 @@ StarEtc *_PyPegen_star_etc(Parser *, arg_ty, asdl_seq *, arg_ty);
 arguments_ty _PyPegen_make_arguments(Parser *, asdl_arg_seq *, SlashWithDefault *,
                                     asdl_arg_seq *, asdl_seq *, StarEtc *);
 arguments_ty _PyPegen_empty_arguments(Parser *);
+expr_ty _PyPegen_formatted_value(Parser *, expr_ty, Token *, expr_ty, expr_ty, int, int, int, int, PyArena *);
 AugOperator *_PyPegen_augoperator(Parser*, operator_ty type);
 stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
 stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
@ -317,12 +320,16 @@ asdl_keyword_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *);
 expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *,
                     int lineno, int col_offset, int end_lineno,
                     int end_col_offset, PyArena *arena);
-expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *);
+expr_ty _PyPegen_constant_from_token(Parser* p, Token* tok);
+expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok);
+expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *, int, int, int, int, PyArena *);
+expr_ty _PyPegen_FetchRawForm(Parser *p, int, int, int, int);
 expr_ty _PyPegen_ensure_imaginary(Parser *p, expr_ty);
 expr_ty _PyPegen_ensure_real(Parser *p, expr_ty);
 asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
 int _PyPegen_check_barry_as_flufl(Parser *, Token *);
 int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
+expr_ty _PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t);
 mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
 void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
 expr_ty _PyPegen_get_last_comprehension_item(comprehension_ty comprehension);
@ -338,6 +345,9 @@ void *_PyPegen_run_parser(Parser *);
 mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, PyCompilerFlags *, PyArena *);
 asdl_stmt_seq *_PyPegen_interactive_exit(Parser *);

+// TODO: move to the correct place in this file
+expr_ty _PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* expr, Token*b);
+
 // Generated function in parse.c - function definition in python.gram
 void *_PyPegen_parse(Parser *);

--- a/Parser/pegen_errors.c
+++ b/Parser/pegen_errors.c
@ -192,7 +192,10 @@ _PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) {


 exit:
-    if (PyErr_Occurred()) {
+    // If we're in an f-string, we want the syntax error in the expression part
+    // to propagate, so that tokenizer errors (like expecting '}') that happen afterwards
+    // do not swallow it.
+    if (PyErr_Occurred() && p->tok->tok_mode_stack_index <= 0) {
        Py_XDECREF(value);
        Py_XDECREF(type);
        Py_XDECREF(traceback);
@ -205,7 +208,7 @@ exit:
 // PARSER ERRORS

 void *
-_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
+_PyPegen_raise_error(Parser *p, PyObject *errtype, int use_mark, const char *errmsg, ...)
 {
    if (p->fill == 0) {
        va_list va;
@ -214,8 +217,13 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
        va_end(va);
        return NULL;
    }
-
-    Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
+    if (use_mark && p->mark == p->fill && _PyPegen_fill_token(p) < 0) {
+        p->error_indicator = 1;
+        return NULL;
+    }
+    Token *t = p->known_err_token != NULL
+                   ? p->known_err_token
+                   : p->tokens[use_mark ? p->mark : p->fill - 1];
    Py_ssize_t col_offset;
    Py_ssize_t end_col_offset = -1;
    if (t->col_offset == -1) {
--- a/Parser/string_parser.c
+++ b/Parser/string_parser.c
--- a/Parser/string_parser.h
+++ b/Parser/string_parser.h
@ -5,42 +5,7 @@
 #include <pycore_ast.h>
 #include "pegen.h"

-#define EXPRLIST_N_CACHED  64
-
-typedef struct {
-    /* Incrementally build an array of expr_ty, so be used in an
-       asdl_seq. Cache some small but reasonably sized number of
-       expr_ty's, and then after that start dynamically allocating,
-       doubling the number allocated each time. Note that the f-string
-       f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
-       Constant for the literal 'a'. So you add expr_ty's about twice as
-       fast as you add expressions in an f-string. */
-
-    Py_ssize_t allocated;  /* Number we've allocated. */
-    Py_ssize_t size;       /* Number we've used. */
-    expr_ty    *p;         /* Pointer to the memory we're actually
-                              using. Will point to 'data' until we
-                              start dynamically allocating. */
-    expr_ty    data[EXPRLIST_N_CACHED];
-} ExprList;
-
-/* The FstringParser is designed to add a mix of strings and
-   f-strings, and concat them together as needed. Ultimately, it
-   generates an expr_ty. */
-typedef struct {
-    PyObject *last_str;
-    ExprList expr_list;
-    int fmode;
-} FstringParser;
-
-void _PyPegen_FstringParser_Init(FstringParser *);
-int _PyPegen_parsestr(Parser *, int *, int *, PyObject **,
-                      const char **, Py_ssize_t *, Token *);
-int _PyPegen_FstringParser_ConcatFstring(Parser *, FstringParser *, const char **,
-                                const char *, int, int, Token *, Token *,
-                                Token *);
-int _PyPegen_FstringParser_ConcatAndDel(FstringParser *, PyObject *);
-expr_ty _PyPegen_FstringParser_Finish(Parser *, FstringParser *, Token *, Token *);
-void _PyPegen_FstringParser_Dealloc(FstringParser *);
+PyObject *_PyPegen_parse_string(Parser *, Token *);
+PyObject *_PyPegen_decode_string(Parser *, int, const char *, size_t, Token *);

 #endif
--- a/Parser/token.c
+++ b/Parser/token.c
@ -60,12 +60,16 @@ const char * const _PyParser_TokenNames[] = {
    "RARROW",
    "ELLIPSIS",
    "COLONEQUAL",
+    "EXCLAMATION",
    "OP",
    "AWAIT",
    "ASYNC",
    "TYPE_IGNORE",
    "TYPE_COMMENT",
    "SOFT_KEYWORD",
+    "FSTRING_START",
+    "FSTRING_MIDDLE",
+    "FSTRING_END",
    "<ERRORTOKEN>",
    "<COMMENT>",
    "<NL>",
@ -79,6 +83,7 @@ int
 _PyToken_OneChar(int c1)
 {
    switch (c1) {
+    case '!': return EXCLAMATION;
    case '%': return PERCENT;
    case '&': return AMPER;
    case '(': return LPAR;
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@ -43,6 +43,28 @@
            tok->lineno++; \
            tok->col_offset = 0;

+#ifdef Py_DEBUG
+static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
+    assert(tok->tok_mode_stack_index >= 0);
+    assert(tok->tok_mode_stack_index < MAXLEVEL);
+    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
+}
+static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
+    assert(tok->tok_mode_stack_index >= 0);
+    assert(tok->tok_mode_stack_index < MAXLEVEL);
+    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
+}
+static inline int *TOK_GET_BRACKET_MARK(tokenizer_mode* mode) {
+    assert(mode->bracket_mark_index >= 0);
+    assert(mode->bracket_mark_index < MAX_EXPR_NESTING);
+    return &(mode->bracket_mark[mode->bracket_mark_index]);
+}
+#else
+#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
+#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
+#define TOK_GET_BRACKET_MARK(mode) (&(mode->bracket_mark[mode->bracket_mark_index]))
+#endif
+
 /* Forward */
 static struct tok_state *tok_new(void);
 static int tok_nextc(struct tok_state *tok);
@ -98,6 +120,9 @@ tok_new(void)
    tok->interactive_underflow = IUNDERFLOW_NORMAL;
    tok->str = NULL;
    tok->report_warnings = 1;
+    tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0};
+    tok->tok_mode_stack_index = 0;
+    tok->tok_report_warnings = 1;
 #ifdef Py_DEBUG
    tok->debug = _Py_GetConfig()->parser_debug;
 #endif
@ -346,6 +371,92 @@ tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) {
 }


+/* Traverse and update all f-string buffers with the value */
+static void
+update_fstring_buffers(struct tok_state *tok, char value, int regular, int multiline)
+{
+    int index;
+    tokenizer_mode *mode;
+
+    for (index = tok->tok_mode_stack_index; index >= 0; --index) {
+        mode = &(tok->tok_mode_stack[index]);
+        if (regular && mode->f_string_start != NULL) {
+            mode->f_string_start += value;
+        }
+        if (multiline && mode->f_string_multi_line_start != NULL) {
+            mode->f_string_multi_line_start += value;
+        }
+    }
+}
+
+static int
+update_fstring_expr(struct tok_state *tok, char cur)
+{
+    assert(tok->cur != NULL);
+
+    Py_ssize_t size = strlen(tok->cur);
+    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
+
+    switch (cur) {
+        case '{':
+            if (tok_mode->last_expr_buffer != NULL) {
+                PyMem_Free(tok_mode->last_expr_buffer);
+            }
+            tok_mode->last_expr_buffer = PyMem_Malloc(size);
+            if (tok_mode->last_expr_buffer == NULL) {
+                tok->done = E_NOMEM;
+                return 0;
+            }
+            tok_mode->last_expr_size = size;
+            tok_mode->last_expr_end = -1;
+            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
+            break;
+        case 0:
+            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
+                return 1;
+            }
+            char *new_buffer = PyMem_Realloc(
+                tok_mode->last_expr_buffer,
+                tok_mode->last_expr_size + size
+            );
+            if (new_buffer == NULL) {
+                PyMem_Free(tok_mode->last_expr_buffer);
+                tok->done = E_NOMEM;
+                return 0;
+            }
+            tok_mode->last_expr_buffer = new_buffer;
+            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
+            tok_mode->last_expr_size += size;
+            break;
+        case '}':
+        case '!':
+        case ':':
+            if (tok_mode->last_expr_end == -1) {
+                tok_mode->last_expr_end = strlen(tok->start);
+            }
+            break;
+    }
+
+    return 1;
+}
+
+static void
+free_fstring_expressions(struct tok_state *tok)
+{
+    int index;
+    tokenizer_mode *mode;
+
+    for (index = tok->tok_mode_stack_index; index >= 0; --index) {
+        mode = &(tok->tok_mode_stack[index]);
+        if (mode->last_expr_buffer != NULL) {
+            PyMem_Free(mode->last_expr_buffer);
+            mode->last_expr_buffer = NULL;
+            mode->last_expr_size = 0;
+            mode->last_expr_end = -1;
+        }
+    }
+}
+
 /* Read a line of text from TOK into S, using the stream in TOK.
   Return NULL on failure, else S.

@ -372,6 +483,7 @@ tok_reserve_buf(struct tok_state *tok, Py_ssize_t size)
        Py_ssize_t start = tok->start == NULL ? -1 : tok->start - tok->buf;
        Py_ssize_t line_start = tok->start == NULL ? -1 : tok->line_start - tok->buf;
        Py_ssize_t multi_line_start = tok->multi_line_start - tok->buf;
+        update_fstring_buffers(tok, -*tok->buf, /*regular=*/1, /*multiline=*/1);
        newbuf = (char *)PyMem_Realloc(newbuf, newsize);
        if (newbuf == NULL) {
            tok->done = E_NOMEM;
@ -384,6 +496,7 @@ tok_reserve_buf(struct tok_state *tok, Py_ssize_t size)
        tok->start = start < 0 ? NULL : tok->buf + start;
        tok->line_start = line_start < 0 ? NULL : tok->buf + line_start;
        tok->multi_line_start = multi_line_start < 0 ? NULL : tok->buf + multi_line_start;
+        update_fstring_buffers(tok, *tok->buf, /*regular=*/1, /*multiline=*/1);
    }
    return 1;
 }
@ -838,6 +951,7 @@ _PyTokenizer_Free(struct tok_state *tok)
    if (tok->interactive_src_start != NULL) {
        PyMem_Free(tok->interactive_src_start);
    }
+    free_fstring_expressions(tok);
    PyMem_Free(tok);
 }

@ -854,6 +968,9 @@ tok_readline_raw(struct tok_state *tok)
        if (line == NULL) {
            return 1;
        }
+        if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) {
+            return 0;
+        }
        if (tok->fp_interactive &&
            tok_concatenate_interactive_new_line(tok, line) == -1) {
            return 0;
@ -941,6 +1058,7 @@ tok_underflow_interactive(struct tok_state *tok) {
    }
    else if (tok->start != NULL) {
        Py_ssize_t cur_multi_line_start = tok->multi_line_start - tok->buf;
+        update_fstring_buffers(tok, -*tok->buf, /*regular=*/0, /*multiline=*/1);
        size_t size = strlen(newtok);
        ADVANCE_LINENO();
        if (!tok_reserve_buf(tok, size + 1)) {
@ -953,6 +1071,7 @@ tok_underflow_interactive(struct tok_state *tok) {
        PyMem_Free(newtok);
        tok->inp += size;
        tok->multi_line_start = tok->buf + cur_multi_line_start;
+        update_fstring_buffers(tok, *tok->buf, /*regular=*/0, /*multiline=*/1);
    }
    else {
        ADVANCE_LINENO();
@ -969,6 +1088,10 @@ tok_underflow_interactive(struct tok_state *tok) {
        }
        return 0;
    }
+
+    if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) {
+        return 0;
+    }
    return 1;
 }

@ -1115,7 +1238,7 @@ tok_backup(struct tok_state *tok, int c)
        if (--tok->cur < tok->buf) {
            Py_FatalError("tokenizer beginning of buffer");
        }
-        if ((int)(unsigned char)*tok->cur != c) {
+        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
            Py_FatalError("tok_backup: wrong character");
        }
        tok->col_offset--;
@ -1172,6 +1295,7 @@ error:
 static int
 syntaxerror(struct tok_state *tok, const char *format, ...)
 {
+    // This errors are cleaned on startup. Todo: Fix it.
    va_list vargs;
    va_start(vargs, format);
    int ret = _syntaxerror_range(tok, format, -1, -1, vargs);
@ -1234,6 +1358,41 @@ error:
    return -1;
 }

+static int
+warn_invalid_escape_sequence(struct tok_state *tok, int first_invalid_escape_char)
+{
+
+    if (!tok->tok_report_warnings) {
+        return 0;
+    }
+
+    PyObject *msg = PyUnicode_FromFormat(
+        "invalid escape sequence '\\%c'",
+        (char) first_invalid_escape_char
+    );
+
+    if (msg == NULL) {
+        return -1;
+    }
+
+    if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, tok->filename,
+                                 tok->lineno, NULL, NULL) < 0) {
+        Py_DECREF(msg);
+
+        if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
+            /* Replace the DeprecationWarning exception with a SyntaxError
+               to get a more accurate error report */
+            PyErr_Clear();
+            return syntaxerror(tok, "invalid escape sequence '\\%c'", (char) first_invalid_escape_char);
+        }
+
+        return -1;
+    }
+
+    Py_DECREF(msg);
+    return 0;
+}
+
 static int
 lookahead(struct tok_state *tok, const char *test)
 {
@ -1389,7 +1548,6 @@ tok_decimal_tail(struct tok_state *tok)
    return c;
 }

-/* Get next token, after space stripping etc. */

 static inline int
 tok_continuation_line(struct tok_state *tok) {
@ -1427,7 +1585,12 @@ token_setup(struct tok_state *tok, struct token *token, int type, const char *st
 {
    assert((start == NULL && end == NULL) || (start != NULL && end != NULL));
    token->level = tok->level;
-    token->lineno = type == STRING ? tok->first_lineno : tok->lineno;
+    if (ISSTRINGLIT(type)) {
+        token->lineno = tok->first_lineno;
+    }
+    else {
+        token->lineno = tok->lineno;
+    }
    token->end_lineno = tok->lineno;
    token->col_offset = token->end_col_offset = -1;
    token->start = start;
@ -1441,7 +1604,7 @@ token_setup(struct tok_state *tok, struct token *token, int type, const char *st
 }

 static int
-tok_get(struct tok_state *tok, struct token *token)
+tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
 {
    int c;
    int blankline, nonascii;
@ -1602,6 +1765,11 @@ tok_get(struct tok_state *tok, struct token *token)

    /* Skip comment, unless it's a type comment */
    if (c == '#') {
+
+        if (tok->tok_mode_stack_index > 0) {
+            return MAKE_TOKEN(syntaxerror(tok, "f-string expression part cannot include '#'"));
+        }
+
        const char *prefix, *p, *type_start;
        int current_starting_col_offset;

@ -1703,6 +1871,9 @@ tok_get(struct tok_state *tok, struct token *token)
            }
            c = tok_nextc(tok);
            if (c == '"' || c == '\'') {
+                if (saw_f) {
+                    goto f_string_quote;
+                }
                goto letter_quote;
            }
        }
@ -1748,7 +1919,9 @@ tok_get(struct tok_state *tok, struct token *token)
                int ahead_tok_kind;

                memcpy(&ahead_tok, tok, sizeof(ahead_tok));
-                ahead_tok_kind = tok_get(&ahead_tok, &ahead_token);
+                ahead_tok_kind = tok_get_normal_mode(&ahead_tok,
+                                                     current_tok,
+                                                     &ahead_token);

                if (ahead_tok_kind == NAME
                    && ahead_tok.cur - ahead_tok.start == 3
@ -2003,6 +2176,67 @@ tok_get(struct tok_state *tok, struct token *token)
        return MAKE_TOKEN(NUMBER);
    }

+  f_string_quote:
+    if (((tolower(*tok->start) == 'f' || tolower(*tok->start) == 'r') && (c == '\'' || c == '"'))) {
+        int quote = c;
+        int quote_size = 1;             /* 1 or 3 */
+
+        /* Nodes of type STRING, especially multi line strings
+           must be handled differently in order to get both
+           the starting line number and the column offset right.
+           (cf. issue 16806) */
+        tok->first_lineno = tok->lineno;
+        tok->multi_line_start = tok->line_start;
+
+        /* Find the quote size and start of string */
+        int after_quote = tok_nextc(tok);
+        if (after_quote == quote) {
+            int after_after_quote = tok_nextc(tok);
+            if (after_after_quote == quote) {
+                quote_size = 3;
+            }
+            else {
+                // TODO: Check this
+                tok_backup(tok, after_after_quote);
+                tok_backup(tok, after_quote);
+            }
+        }
+        if (after_quote != quote) {
+            tok_backup(tok, after_quote);
+        }
+
+
+        p_start = tok->start;
+        p_end = tok->cur;
+        tokenizer_mode *current_tok = TOK_NEXT_MODE(tok);
+        current_tok->kind = TOK_FSTRING_MODE;
+        current_tok->f_string_quote = quote;
+        current_tok->f_string_quote_size = quote_size;
+        current_tok->f_string_start = tok->start;
+        current_tok->f_string_multi_line_start = tok->line_start;
+        current_tok->last_expr_buffer = NULL;
+        current_tok->last_expr_size = 0;
+        current_tok->last_expr_end = -1;
+
+        switch (*tok->start) {
+            case 'F':
+            case 'f':
+                current_tok->f_string_raw = tolower(*(tok->start + 1)) == 'r';
+                break;
+            case 'R':
+            case 'r':
+                current_tok->f_string_raw = 1;
+                break;
+            default:
+                Py_UNREACHABLE();
+        }
+
+        current_tok->bracket_stack = 0;
+        current_tok->bracket_mark[0] = 0;
+        current_tok->bracket_mark_index = -1;
+        return MAKE_TOKEN(FSTRING_START);
+    }
+
  letter_quote:
    /* String */
    if (c == '\'' || c == '"') {
@ -2047,6 +2281,20 @@ tok_get(struct tok_state *tok, struct token *token)
                tok->line_start = tok->multi_line_start;
                int start = tok->lineno;
                tok->lineno = tok->first_lineno;
+
+                if (tok->tok_mode_stack_index > 0) {
+                    /* When we are in an f-string, before raising the
+                     * unterminated string literal error, check whether
+                     * does the initial quote matches with f-strings quotes
+                     * and if it is, then this must be a missing '}' token
+                     * so raise the proper error */
+                    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
+                    if (current_tok->f_string_quote == quote &&
+                        current_tok->f_string_quote_size == quote_size) {
+                        return MAKE_TOKEN(syntaxerror(tok, "f-string: expecting '}'", start));
+                    }
+                }
+
                if (quote_size == 3) {
                    syntaxerror(tok, "unterminated triple-quoted string literal"
                                     " (detected at line %d)", start);
@ -2089,6 +2337,27 @@ tok_get(struct tok_state *tok, struct token *token)
        goto again; /* Read next line */
    }

+    /* Punctuation character */
+    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
+    if (is_punctuation && tok->tok_mode_stack_index > 0 && current_tok->bracket_mark_index >= 0) {
+        int mark = *TOK_GET_BRACKET_MARK(current_tok);
+        /* This code block gets executed before the bracket_stack is incremented
+         * by the `{` case, so for ensuring that we are on the 0th level, we need
+         * to adjust it manually */
+        int cursor = current_tok->bracket_stack - (c != '{');
+
+        if (cursor == 0 && !update_fstring_expr(tok, c)) {
+            return MAKE_TOKEN(ENDMARKER);
+        }
+
+        if (c == ':' && cursor == mark) {
+            current_tok->kind = TOK_FSTRING_MODE;
+            p_start = tok->start;
+            p_end = tok->cur;
+            return MAKE_TOKEN(_PyToken_OneChar(c));
+        }
+    }
+
    /* Check for two-character token */
    {
        int c2 = tok_nextc(tok);
@ -2121,11 +2390,18 @@ tok_get(struct tok_state *tok, struct token *token)
        tok->parenlinenostack[tok->level] = tok->lineno;
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
        tok->level++;
+
+        if (tok->tok_mode_stack_index > 0) {
+            current_tok->bracket_stack++;
+        }
        break;
    case ')':
    case ']':
    case '}':
        if (!tok->level) {
+            if (tok->tok_mode_stack_index > 0 && !current_tok->bracket_stack && c == '}') {
+                return MAKE_TOKEN(syntaxerror(tok, "f-string: single '}' is not allowed"));
+            }
            return MAKE_TOKEN(syntaxerror(tok, "unmatched '%c'", c));
        }
        tok->level--;
@ -2134,6 +2410,18 @@ tok_get(struct tok_state *tok, struct token *token)
              (opening == '[' && c == ']') ||
              (opening == '{' && c == '}')))
        {
+            /* If the opening bracket belongs to an f-string's expression
+               part (e.g. f"{)}") and the closing bracket is an arbitrary
+               nested expression, then instead of matching a different
+               syntactical construct with it; we'll throw an unmatched
+               parentheses error. */
+            if (tok->tok_mode_stack_index > 0 && opening == '{') {
+                assert(current_tok->bracket_stack >= 0);
+                int previous_bracket = current_tok->bracket_stack - 1;
+                if (previous_bracket == *TOK_GET_BRACKET_MARK(current_tok)) {
+                    return MAKE_TOKEN(syntaxerror(tok, "f-string: unmatched '%c'", c));
+                }
+            }
            if (tok->parenlinenostack[tok->level] != tok->lineno) {
                return MAKE_TOKEN(syntaxerror(tok,
                        "closing parenthesis '%c' does not match "
@ -2147,6 +2435,14 @@ tok_get(struct tok_state *tok, struct token *token)
                        c, opening));
            }
        }
+
+        if (tok->tok_mode_stack_index > 0) {
+            current_tok->bracket_stack--;
+            if (c == '}' && current_tok->bracket_stack == *TOK_GET_BRACKET_MARK(current_tok)) {
+                current_tok->bracket_mark_index--;
+                current_tok->kind = TOK_FSTRING_MODE;
+            }
+        }
        break;
    }

@ -2162,6 +2458,187 @@ tok_get(struct tok_state *tok, struct token *token)
    return MAKE_TOKEN(_PyToken_OneChar(c));
 }

+static int
+tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
+{
+    const char *p_start = NULL;
+    const char *p_end = NULL;
+    int end_quote_size = 0;
+    int unicode_escape = 0;
+
+    tok->start = tok->cur;
+    tok->first_lineno = tok->lineno;
+    tok->starting_col_offset = tok->col_offset;
+
+    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
+    // before it.
+    int start_char = tok_nextc(tok);
+    int peek1 = tok_nextc(tok);
+    tok_backup(tok, peek1);
+    tok_backup(tok, start_char);
+
+    if ((start_char == '{' && peek1 != '{') || (start_char == '}' && peek1 != '}')) {
+        if (start_char == '{') {
+            current_tok->bracket_mark_index++;
+            if (current_tok->bracket_mark_index >= MAX_EXPR_NESTING) {
+                return MAKE_TOKEN(syntaxerror(tok, "f-string: expressions nested too deeply"));
+            }
+            *TOK_GET_BRACKET_MARK(current_tok) = current_tok->bracket_stack;
+        }
+        TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
+        return tok_get_normal_mode(tok, current_tok, token);
+    }
+
+    // Check if we are at the end of the string
+    for (int i = 0; i < current_tok->f_string_quote_size; i++) {
+        int quote = tok_nextc(tok);
+        if (quote != current_tok->f_string_quote) {
+            tok_backup(tok, quote);
+            goto f_string_middle;
+        }
+    }
+
+    if (current_tok->last_expr_buffer != NULL) {
+        PyMem_Free(current_tok->last_expr_buffer);
+        current_tok->last_expr_buffer = NULL;
+        current_tok->last_expr_size = 0;
+        current_tok->last_expr_end = -1;
+    }
+
+    p_start = tok->start;
+    p_end = tok->cur;
+    tok->tok_mode_stack_index--;
+    return MAKE_TOKEN(FSTRING_END);
+
+f_string_middle:
+
+    while (end_quote_size != current_tok->f_string_quote_size) {
+        int c = tok_nextc(tok);
+        if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
+            assert(tok->multi_line_start != NULL);
+            // shift the tok_state's location into
+            // the start of string, and report the error
+            // from the initial quote character
+            tok->cur = (char *)current_tok->f_string_start;
+            tok->cur++;
+            tok->line_start = current_tok->f_string_multi_line_start;
+            int start = tok->lineno;
+            tok->lineno = tok->first_lineno;
+
+            if (current_tok->f_string_quote_size == 3) {
+                return MAKE_TOKEN(syntaxerror(tok,
+                                    "unterminated triple-quoted f-string literal"
+                                    " (detected at line %d)", start));
+            }
+            else {
+                return MAKE_TOKEN(syntaxerror(tok,
+                                    "unterminated f-string literal (detected at"
+                                    " line %d)", start));
+            }
+        }
+
+        if (c == current_tok->f_string_quote) {
+            end_quote_size += 1;
+            continue;
+        } else {
+            end_quote_size = 0;
+        }
+
+        int in_format_spec = current_tok->last_expr_end != -1 && current_tok->bracket_mark_index >= 0;
+        if (c == '{') {
+            int peek = tok_nextc(tok);
+            if (peek != '{' || in_format_spec) {
+                tok_backup(tok, peek);
+                tok_backup(tok, c);
+                current_tok->bracket_mark_index++;
+                if (current_tok->bracket_mark_index >= MAX_EXPR_NESTING) {
+                    return MAKE_TOKEN(syntaxerror(tok, "f-string: expressions nested too deeply"));
+                }
+                *TOK_GET_BRACKET_MARK(current_tok) = current_tok->bracket_stack;
+                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
+                p_start = tok->start;
+                p_end = tok->cur;
+            } else {
+                p_start = tok->start;
+                p_end = tok->cur - 1;
+            }
+            return MAKE_TOKEN(FSTRING_MIDDLE);
+        } else if (c == '}') {
+            if (unicode_escape) {
+                p_start = tok->start;
+                p_end = tok->cur;
+                return MAKE_TOKEN(FSTRING_MIDDLE);
+            }
+            int peek = tok_nextc(tok);
+
+            // The tokenizer can only be in the format spec if we have already completed the expression
+            // scanning (indicated by the end of the expression being set) and we are not at the top level
+            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
+            // brackets, we can bypass it here.
+            if (peek == '}' && !in_format_spec) {
+                p_start = tok->start;
+                p_end = tok->cur - 1;
+            } else {
+                tok_backup(tok, peek);
+                tok_backup(tok, c);
+                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
+                p_start = tok->start;
+                p_end = tok->cur;
+            }
+            return MAKE_TOKEN(FSTRING_MIDDLE);
+        } else if (c == '\\') {
+            int peek = tok_nextc(tok);
+            // Special case when the backslash is right before a curly
+            // brace. We have to restore and return the control back
+            // to the loop for the next iteration.
+            if (peek == '{' || peek == '}') {
+                if (!current_tok->f_string_raw) {
+                    if (warn_invalid_escape_sequence(tok, peek)) {
+                        return MAKE_TOKEN(ERRORTOKEN);
+                    }
+                }
+                tok_backup(tok, peek);
+                continue;
+            }
+
+            if (!current_tok->f_string_raw) {
+                if (peek == 'N') {
+                    /* Handle named unicode escapes (\N{BULLET}) */
+                    peek = tok_nextc(tok);
+                    if (peek == '{') {
+                        unicode_escape = 1;
+                    } else {
+                        tok_backup(tok, peek);
+                    }
+                }
+            } /* else {
+                skip the escaped character
+            }*/
+        }
+    }
+
+    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
+    // add the quotes to the FSTRING_END in the next tokenizer iteration.
+    for (int i = 0; i < current_tok->f_string_quote_size; i++) {
+        tok_backup(tok, current_tok->f_string_quote);
+    }
+    p_start = tok->start;
+    p_end = tok->cur;
+    return MAKE_TOKEN(FSTRING_MIDDLE);
+}
+
+
+static int
+tok_get(struct tok_state *tok, struct token *token)
+{
+    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
+    if (current_tok->kind == TOK_REGULAR_MODE) {
+        return tok_get_normal_mode(tok, current_tok, token);
+    } else {
+        return tok_get_fstring_mode(tok, current_tok, token);
+    }
+}
+
 int
 _PyTokenizer_Get(struct tok_state *tok, struct token *token)
 {
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@ -33,6 +33,31 @@ struct token {
    const char *start, *end;
 };

+enum tokenizer_mode_kind_t {
+    TOK_REGULAR_MODE,
+    TOK_FSTRING_MODE,
+};
+
+#define MAX_EXPR_NESTING 3
+
+typedef struct _tokenizer_mode {
+    enum tokenizer_mode_kind_t kind;
+
+    int bracket_stack;
+    int bracket_mark[MAX_EXPR_NESTING];
+    int bracket_mark_index;
+
+    char f_string_quote;
+    int f_string_quote_size;
+    int f_string_raw;
+    const char* f_string_start;
+    const char* f_string_multi_line_start;
+
+    Py_ssize_t last_expr_size;
+    Py_ssize_t last_expr_end;
+    char* last_expr_buffer;
+} tokenizer_mode;
+
 /* Tokenizer state */
 struct tok_state {
    /* Input state; buf <= cur <= inp <= end */
@ -93,6 +118,10 @@ struct tok_state {
    /* How to proceed when asked for a new token in interactive mode */
    enum interactive_underflow_t interactive_underflow;
    int report_warnings;
+    // TODO: Factor this into its own thing
+    tokenizer_mode tok_mode_stack[MAXLEVEL];
+    int tok_mode_stack_index;
+    int tok_report_warnings;
 #ifdef Py_DEBUG
    int debug;
 #endif
--- a/Programs/test_frozenmain.h
+++ b/Programs/test_frozenmain.h
@ -27,12 +27,12 @@ unsigned char M_test_frozenmain[] = {
    218,3,107,101,121,169,0,243,0,0,0,0,250,18,116,101,
    115,116,95,102,114,111,122,101,110,109,97,105,110,46,112,121,
    250,8,60,109,111,100,117,108,101,62,114,18,0,0,0,1,
-    0,0,0,115,100,0,0,0,240,3,1,1,1,243,8,0,
+    0,0,0,115,102,0,0,0,240,3,1,1,1,243,8,0,
    1,11,219,0,24,225,0,5,208,6,26,212,0,27,217,0,
    5,128,106,144,35,151,40,145,40,212,0,27,216,9,38,208,
    9,26,215,9,38,209,9,38,211,9,40,168,24,209,9,50,
    128,6,240,2,6,12,2,242,0,7,1,42,128,67,241,14,
-    0,5,10,208,10,40,144,67,209,10,40,152,54,160,35,153,
-    59,209,10,40,213,4,41,241,15,7,1,42,114,16,0,0,
-    0,
+    0,5,10,136,71,144,67,144,53,152,2,152,54,160,35,153,
+    59,152,45,208,10,40,213,4,41,241,15,7,1,42,114,16,
+    0,0,0,
 };
--- a/Python/Python-tokenize.c
+++ b/Python/Python-tokenize.c
@ -86,8 +86,8 @@ tokenizeriter_next(tokenizeriterobject *it)
        Py_DECREF(str);
        return NULL;
    }
-    const char *line_start = type == STRING ? it->tok->multi_line_start : it->tok->line_start;
-    int lineno = type == STRING ? it->tok->first_lineno : it->tok->lineno;
+    const char *line_start = ISSTRINGLIT(type) ? it->tok->multi_line_start : it->tok->line_start;
+    int lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno;
    int end_lineno = it->tok->lineno;
    int col_offset = -1;
    int end_col_offset = -1;
--- a/Tools/build/generate_token.py
+++ b/Tools/build/generate_token.py
@ -80,6 +80,8 @@ extern "C" {
                                 (x) == NEWLINE   || \\
                                 (x) == INDENT    || \\
                                 (x) == DEDENT)
+#define ISSTRINGLIT(x)          ((x) == STRING           || \\
+                                 (x) == FSTRING_MIDDLE)


 // Symbols exported for test_peg_generator
--- a/Tools/peg_generator/pegen/c_generator.py
+++ b/Tools/peg_generator/pegen/c_generator.py
@ -68,6 +68,7 @@ class NodeTypes(Enum):
    KEYWORD = 4
    SOFT_KEYWORD = 5
    CUT_OPERATOR = 6
+    F_STRING_CHUNK = 7


 BASE_NODETYPES = {
				`@ -0,0 +1 @@`
				`Implement the required C tokenizer changes for PEP 701. Patch by Pablo Galindo Salgado, Lysandros Nikolaou, Batuhan Taskaya, Marta Gómez Macías and sunmy2019.`