mirror of
https://github.com/python/cpython.git
synced 2024-11-28 20:33:54 +08:00
Latin-1 source code was not being properly decoded when passed through
compile(). This was due to left-over special-casing before UTF-8 became the default source encoding. Closes issue #3574. Thanks to Victor Stinner for help with the patch.
This commit is contained in:
parent
9e9dcd6d42
commit
da78043237
@ -23,8 +23,24 @@ class PEP3120Test(unittest.TestCase):
|
||||
else:
|
||||
self.fail("expected exception didn't occur")
|
||||
|
||||
|
||||
class BuiltinCompileTests(unittest.TestCase):
|
||||
|
||||
# Issue 3574.
|
||||
def test_latin1(self):
|
||||
# Allow compile() to read Latin-1 source.
|
||||
source_code = '# coding: Latin-1\nu = "Ç"\n'.encode("Latin-1")
|
||||
try:
|
||||
code = compile(source_code, '<dummy>', 'exec')
|
||||
except SyntaxError:
|
||||
self.fail("compile() cannot handle Latin-1 source")
|
||||
ns = {}
|
||||
exec(code, ns)
|
||||
self.assertEqual('Ç', ns['u'])
|
||||
|
||||
|
||||
def test_main():
|
||||
support.run_unittest(PEP3120Test)
|
||||
support.run_unittest(PEP3120Test, BuiltinCompileTests)
|
||||
|
||||
if __name__=="__main__":
|
||||
test_main()
|
||||
|
@ -15,6 +15,8 @@ What's New in Python 3.0 beta 5
|
||||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #3574: compile() incorrectly handled source code encoded as Latin-1.
|
||||
|
||||
- Issues #2384 and #3975: Tracebacks were not correctly printed when the
|
||||
source file contains a ``coding:`` header: the wrong line was displayed, and
|
||||
the encoding was not respected.
|
||||
|
@ -135,6 +135,7 @@ tok_new(void)
|
||||
tok->decoding_state = STATE_INIT;
|
||||
tok->decoding_erred = 0;
|
||||
tok->read_coding_spec = 0;
|
||||
tok->enc = NULL;
|
||||
tok->encoding = NULL;
|
||||
tok->cont_line = 0;
|
||||
#ifndef PGEN
|
||||
@ -274,8 +275,7 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
|
||||
tok->read_coding_spec = 1;
|
||||
if (tok->encoding == NULL) {
|
||||
assert(tok->decoding_state == STATE_RAW);
|
||||
if (strcmp(cs, "utf-8") == 0 ||
|
||||
strcmp(cs, "iso-8859-1") == 0) {
|
||||
if (strcmp(cs, "utf-8") == 0) {
|
||||
tok->encoding = cs;
|
||||
} else {
|
||||
r = set_readline(tok, cs);
|
||||
|
@ -49,14 +49,14 @@ struct tok_state {
|
||||
enum decoding_state decoding_state;
|
||||
int decoding_erred; /* whether erred in decoding */
|
||||
int read_coding_spec; /* whether 'coding:...' has been read */
|
||||
char *encoding;
|
||||
char *encoding; /* Source encoding. */
|
||||
int cont_line; /* whether we are in a continuation line. */
|
||||
const char* line_start; /* pointer to start of current line */
|
||||
#ifndef PGEN
|
||||
PyObject *decoding_readline; /* codecs.open(...).readline */
|
||||
PyObject *decoding_buffer;
|
||||
#endif
|
||||
const char* enc;
|
||||
const char* enc; /* Encoding for the current str. */
|
||||
const char* str;
|
||||
};
|
||||
|
||||
|
@ -3160,9 +3160,6 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
|
||||
if (encoding == NULL) {
|
||||
buf = (char *)s;
|
||||
u = NULL;
|
||||
} else if (strcmp(encoding, "iso-8859-1") == 0) {
|
||||
buf = (char *)s;
|
||||
u = NULL;
|
||||
} else {
|
||||
/* check for integer overflow */
|
||||
if (len > PY_SIZE_MAX / 4)
|
||||
@ -3275,8 +3272,7 @@ parsestr(struct compiling *c, const node *n, int *bytesmode)
|
||||
}
|
||||
}
|
||||
need_encoding = (!*bytesmode && c->c_encoding != NULL &&
|
||||
strcmp(c->c_encoding, "utf-8") != 0 &&
|
||||
strcmp(c->c_encoding, "iso-8859-1") != 0);
|
||||
strcmp(c->c_encoding, "utf-8") != 0);
|
||||
if (rawmode || strchr(s, '\\') == NULL) {
|
||||
if (need_encoding) {
|
||||
PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);
|
||||
|
Loading…
Reference in New Issue
Block a user