merge 3.4 (#24022)

2024-11-28 20:33:54 +08:00 · 2015-04-21 12:07:06 -04:00 · 2015-04-21 12:07:06 -04:00 · 273a720f87
commit 273a720f87
parent 8714cfdc4a d73aca769f
3 changed files with 18 additions and 5 deletions
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@ -1,9 +1,11 @@
 import math
 import os
 import unittest
 import sys
 import _ast
 import tempfile
 import types
-from test import support
+from test import support, script_helper
 class TestSpecifics(unittest.TestCase):
@ -492,6 +494,16 @@ if 1:
        self.assertInvalidSingle('f()\nxy # blah\nblah()')
        self.assertInvalidSingle('x = 5 # comment\nx = 6\n')
    def test_particularly_evil_undecodable(self):
        # Issue 24022
        src = b'0000\x00\n00000000000\n\x00\n\x9e\n'
        with tempfile.TemporaryDirectory() as tmpd:
            fn = os.path.join(tmpd, "bad.py")
            with open(fn, "wb") as fp:
                fp.write(src)
            res = script_helper.run_python_until_end(fn)[0]
        self.assertIn(b"Non-UTF-8", res.err)
    @support.cpython_only
    def test_compiler_recursion_limit(self):
        # Expected limit is sys.getrecursionlimit() * the scaling factor
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -10,6 +10,8 @@ Release date: 2015-04-24
 Core and Builtins
 -----------------
 - Issue #24022: Fix tokenizer crash when processing undecodable source code.
 Library
 -------
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@ -1307,6 +1307,8 @@ verify_identifier(struct tok_state *tok)
 {
    PyObject *s;
    int result;
    if (tok->decoding_erred)
        return 0;
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
    if (s == NULL || PyUnicode_READY(s) == -1) {
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
@ -1475,11 +1477,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
            c = tok_nextc(tok);
        }
        tok_backup(tok, c);
-        if (nonascii &&
+        if (nonascii && !verify_identifier(tok))
            !verify_identifier(tok)) {
            tok->done = E_IDENTIFIER;
            return ERRORTOKEN;
        }
        *p_start = tok->start;
        *p_end = tok->cur;
        return NAME;