diff --git a/Include/stringobject.h b/Include/stringobject.h index abc8fad625e..fd0f49a3109 100644 --- a/Include/stringobject.h +++ b/Include/stringobject.h @@ -53,6 +53,7 @@ PyAPI_FUNC(PyObject *) PyString_FromFormat(const char*, ...) __attribute__((format(printf, 1, 2))); PyAPI_FUNC(int) PyString_Size(PyObject *); PyAPI_FUNC(char *) PyString_AsString(PyObject *); +PyAPI_FUNC(PyObject *) PyString_Repr(PyObject *, int); PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *); PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *); PyAPI_FUNC(int) _PyString_Resize(PyObject **, int); @@ -60,6 +61,9 @@ PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*); PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *); PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int, int, char**, int*); +extern DL_IMPORT(PyObject *) PyString_DecodeEscape(const char *, int, + const char *, int, + const char *); PyAPI_FUNC(void) PyString_InternInPlace(PyObject **); PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *); diff --git a/Lib/encodings/string_escape.py b/Lib/encodings/string_escape.py new file mode 100644 index 00000000000..0e9a17f1ba2 --- /dev/null +++ b/Lib/encodings/string_escape.py @@ -0,0 +1,23 @@ +# -*- coding: iso-8859-1 -*- +""" Python 'escape' Codec + + +Written by Martin v. Löwis (martin@v.loewis.de). + +""" +import codecs + +class Codec(codecs.Codec): + + encode = codecs.escape_encode + decode = codecs.escape_decode + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +def getregentry(): + + return (Codec.encode,Codec.decode,StreamReader,StreamWriter) diff --git a/Lib/pickle.py b/Lib/pickle.py index a507595203e..4bc54ec5f9d 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -126,6 +126,8 @@ FALSE = 'I00\n' __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)]) del x +_quotes = ["'", '"'] + class Pickler: def __init__(self, file, bin = 0): @@ -740,10 +742,15 @@ class Unpickler: def load_string(self): rep = self.readline()[:-1] - if not self._is_string_secure(rep): + for q in _quotes: + if rep.startswith(q): + if not rep.endswith(q): + raise ValueError, "insecure string pickle" + rep = rep[len(q):-len(q)] + break + else: raise ValueError, "insecure string pickle" - self.append(eval(rep, - {'__builtins__': {}})) # Let's be careful + self.append(rep.decode("string-escape")) dispatch[STRING] = load_string def _is_string_secure(self, s): diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index eb97a9cfa0a..3dc7901c0be 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -195,13 +195,13 @@ class AbstractPickleTests(unittest.TestCase): def test_insecure_strings(self): insecure = ["abc", "2 + 2", # not quoted - "'abc' + 'def'", # not a single quoted string + #"'abc' + 'def'", # not a single quoted string "'abc", # quote is not closed "'abc\"", # open quote and close quote don't match "'abc' ?", # junk after close quote # some tests of the quoting rules - "'abc\"\''", - "'\\\\a\'\'\'\\\'\\\\\''", + #"'abc\"\''", + #"'\\\\a\'\'\'\\\'\\\\\''", ] for s in insecure: buf = "S" + s + "\012p0\012." diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index d663293e885..1e3fc5d5b8a 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -71,7 +71,6 @@ PyObject *codeclookup(PyObject *self, PyObject *args) return NULL; } -#ifdef Py_USING_UNICODE /* --- Helpers ------------------------------------------------------------ */ static @@ -97,6 +96,49 @@ PyObject *codec_tuple(PyObject *unicode, return v; } +/* --- String codecs ------------------------------------------------------ */ +static PyObject * +escape_decode(PyObject *self, + PyObject *args) +{ + const char *errors = NULL; + const char *data; + int size; + + if (!PyArg_ParseTuple(args, "s#|z:escape_decode", + &data, &size, &errors)) + return NULL; + return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL), + size); +} + +static PyObject * +escape_encode(PyObject *self, + PyObject *args) +{ + PyObject *str; + const char *errors = NULL; + char *buf; + int len; + + if (!PyArg_ParseTuple(args, "O!|z:escape_encode", + &PyString_Type, &str, &errors)) + return NULL; + + str = PyString_Repr(str, 0); + if (!str) + return NULL; + + /* The string will be quoted. Unquote, similar to unicode-escape. */ + buf = PyString_AS_STRING (str); + len = PyString_GET_SIZE (str); + memmove(buf, buf+1, len-2); + _PyString_Resize(&str, len-2); + + return codec_tuple(str, PyString_Size(str)); +} + +#ifdef Py_USING_UNICODE /* --- Decoder ------------------------------------------------------------ */ static PyObject * @@ -669,6 +711,8 @@ mbcs_encode(PyObject *self, static PyMethodDef _codecs_functions[] = { {"register", codecregister, METH_VARARGS}, {"lookup", codeclookup, METH_VARARGS}, + {"escape_encode", escape_encode, METH_VARARGS}, + {"escape_decode", escape_decode, METH_VARARGS}, #ifdef Py_USING_UNICODE {"utf_8_encode", utf_8_encode, METH_VARARGS}, {"utf_8_decode", utf_8_decode, METH_VARARGS}, diff --git a/Modules/cPickle.c b/Modules/cPickle.c index d1f7867f8aa..14936a6805c 100644 --- a/Modules/cPickle.c +++ b/Modules/cPickle.c @@ -2864,46 +2864,35 @@ static int load_string(Unpicklerobject *self) { PyObject *str = 0; - int len, res = -1, nslash; - char *s, q, *p; - - static PyObject *eval_dict = 0; + int len, res = -1; + char *s, *p; if ((len = (*self->readline_func)(self, &s)) < 0) return -1; if (len < 2) return bad_readline(); if (!( s=pystrndup(s,len))) return -1; - /* Check for unquoted quotes (evil strings) */ - q=*s; - if (q != '"' && q != '\'') goto insecure; - for (p=s+1, nslash=0; *p; p++) { - if (*p==q && nslash%2==0) break; - if (*p=='\\') nslash++; - else nslash=0; - } - if (*p == q) { - for (p++; *p; p++) - if (*(unsigned char *)p > ' ') - goto insecure; - } - else + + /* Strip outermost quotes */ + while (s[len-1] <= ' ') + len--; + if(s[0]=='"' && s[len-1]=='"'){ + s[len-1] = '\0'; + p = s + 1 ; + len -= 2; + } else if(s[0]=='\'' && s[len-1]=='\''){ + s[len-1] = '\0'; + p = s + 1 ; + len -= 2; + } else goto insecure; /********************************************/ - if (!( eval_dict )) - if (!( eval_dict = Py_BuildValue("{s{}}", "__builtins__"))) - goto finally; - - if (!( str = PyRun_String(s, Py_eval_input, eval_dict, eval_dict))) - goto finally; - + str = PyString_DecodeEscape(p, len, NULL, 0, NULL); + if (str) { + PDATA_PUSH(self->stack, str, -1); + res = 0; + } free(s); - PDATA_PUSH(self->stack, str, -1); - return 0; - - finally: - free(s); - return res; insecure: diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 1bbd201047f..19c28346d31 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -489,6 +489,152 @@ string_dealloc(PyObject *op) op->ob_type->tp_free(op); } +/* Unescape a backslash-escaped string. If unicode is non-zero, + the string is a u-literal. If recode_encoding is non-zero, + the string is UTF-8 encoded and should be re-encoded in the + specified encoding. */ + +PyObject *PyString_DecodeEscape(const char *s, + int len, + const char *errors, + int unicode, + const char *recode_encoding) +{ + int c; + char *p, *buf; + const char *end; + PyObject *v; + v = PyString_FromStringAndSize((char *)NULL, + recode_encoding ? 4*len:len); + if (v == NULL) + return NULL; + p = buf = PyString_AsString(v); + end = s + len; + while (s < end) { + if (*s != '\\') { +#ifdef Py_USING_UNICODE + if (recode_encoding && (*s & 0x80)) { + PyObject *u, *w; + char *r; + const char* t; + int rn; + t = s; + /* Decode non-ASCII bytes as UTF-8. */ + while (t < end && (*t & 0x80)) t++; + u = PyUnicode_DecodeUTF8(s, t - s, errors); + if(!u) goto failed; + + /* Recode them in target encoding. */ + w = PyUnicode_AsEncodedString( + u, recode_encoding, errors); + Py_DECREF(u); + if (!w) goto failed; + + /* Append bytes to output buffer. */ + r = PyString_AsString(w); + rn = PyString_Size(w); + memcpy(p, r, rn); + p += rn; + Py_DECREF(w); + s = t; + } else { + *p++ = *s++; + } +#else + *p++ = *s++; +#endif + continue; + } + s++; + switch (*s++) { + /* XXX This assumes ASCII! */ + case '\n': break; + case '\\': *p++ = '\\'; break; + case '\'': *p++ = '\''; break; + case '\"': *p++ = '\"'; break; + case 'b': *p++ = '\b'; break; + case 'f': *p++ = '\014'; break; /* FF */ + case 't': *p++ = '\t'; break; + case 'n': *p++ = '\n'; break; + case 'r': *p++ = '\r'; break; + case 'v': *p++ = '\013'; break; /* VT */ + case 'a': *p++ = '\007'; break; /* BEL, not classic C */ + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + c = s[-1] - '0'; + if ('0' <= *s && *s <= '7') { + c = (c<<3) + *s++ - '0'; + if ('0' <= *s && *s <= '7') + c = (c<<3) + *s++ - '0'; + } + *p++ = c; + break; + case 'x': + if (isxdigit(Py_CHARMASK(s[0])) + && isxdigit(Py_CHARMASK(s[1]))) { + unsigned int x = 0; + c = Py_CHARMASK(*s); + s++; + if (isdigit(c)) + x = c - '0'; + else if (islower(c)) + x = 10 + c - 'a'; + else + x = 10 + c - 'A'; + x = x << 4; + c = Py_CHARMASK(*s); + s++; + if (isdigit(c)) + x += c - '0'; + else if (islower(c)) + x += 10 + c - 'a'; + else + x += 10 + c - 'A'; + *p++ = x; + break; + } + if (!errors || strcmp(errors, "strict") == 0) { + Py_DECREF(v); + PyErr_SetString(PyExc_ValueError, + "invalid \\x escape"); + return NULL; + } + if (strcmp(errors, "replace") == 0) { + *p++ = '?'; + } else if (strcmp(errors, "ignore") == 0) + /* do nothing */; + else { + PyErr_Format(PyExc_ValueError, + "decoding error; " + "unknown error handling code: %.400s", + errors); + return NULL; + } +#ifndef Py_USING_UNICODE + case 'u': + case 'U': + case 'N': + if (unicode) { + Py_DECREF(v); + com_error(com, PyExc_ValueError, + "Unicode escapes not legal " + "when Unicode disabled"); + return NULL; + } +#endif + default: + *p++ = '\\'; + *p++ = s[-1]; + break; + } + } + _PyString_Resize(&v, (int)(p - buf)); + return v; + failed: + Py_DECREF(v); + return NULL; +} + static int string_getsize(register PyObject *op) { @@ -614,9 +760,10 @@ string_print(PyStringObject *op, FILE *fp, int flags) return 0; } -static PyObject * -string_repr(register PyStringObject *op) +PyObject * +PyString_Repr(PyObject *obj, int smartquotes) { + register PyStringObject* op = (PyStringObject*) obj; size_t newsize = 2 + 4 * op->ob_size * sizeof(char); PyObject *v; if (newsize > INT_MAX) { @@ -635,7 +782,8 @@ string_repr(register PyStringObject *op) /* figure out which quote to use; single is preferred */ quote = '\''; - if (memchr(op->ob_sval, '\'', op->ob_size) && + if (smartquotes && + memchr(op->ob_sval, '\'', op->ob_size) && !memchr(op->ob_sval, '"', op->ob_size)) quote = '"'; @@ -673,6 +821,12 @@ string_repr(register PyStringObject *op) } } +static PyObject * +string_repr(PyObject *op) +{ + return PyString_Repr(op, 1); +} + static PyObject * string_str(PyObject *s) { diff --git a/Python/compile.c b/Python/compile.c index b160f736125..d1655e954fc 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -1226,9 +1226,7 @@ parsestr(struct compiling *com, char *s) char *buf; char *p; char *end; - int c; - int first = *s; - int quote = first; + int quote = *s; int rawmode = 0; char* encoding = ((com == NULL) ? NULL : com->c_encoding); int need_encoding; @@ -1347,102 +1345,11 @@ parsestr(struct compiling *com, char *s) return PyString_FromStringAndSize(s, len); } } - v = PyString_FromStringAndSize((char *)NULL, /* XXX 4 is enough? */ - need_encoding ? len * 4 : len); + + v = PyString_DecodeEscape(s, len, NULL, unicode, + need_encoding ? encoding : NULL); if (v == NULL) - return NULL; - p = buf = PyString_AsString(v); - end = s + len; - while (s < end) { - if (*s != '\\') { - ORDINAL: - if (need_encoding && (*s & 0x80)) { - char *r; - int rn; - PyObject* w = decode_utf8(&s, end, encoding); - if (w == NULL) - return NULL; - r = PyString_AsString(w); - rn = PyString_Size(w); - memcpy(p, r, rn); - p += rn; - Py_DECREF(w); - } else { - *p++ = *s++; - } - continue; - } - s++; - switch (*s++) { - /* XXX This assumes ASCII! */ - case '\n': break; - case '\\': *p++ = '\\'; break; - case '\'': *p++ = '\''; break; - case '\"': *p++ = '\"'; break; - case 'b': *p++ = '\b'; break; - case 'f': *p++ = '\014'; break; /* FF */ - case 't': *p++ = '\t'; break; - case 'n': *p++ = '\n'; break; - case 'r': *p++ = '\r'; break; - case 'v': *p++ = '\013'; break; /* VT */ - case 'a': *p++ = '\007'; break; /* BEL, not classic C */ - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - c = s[-1] - '0'; - if ('0' <= *s && *s <= '7') { - c = (c<<3) + *s++ - '0'; - if ('0' <= *s && *s <= '7') - c = (c<<3) + *s++ - '0'; - } - *p++ = c; - break; - case 'x': - if (isxdigit(Py_CHARMASK(s[0])) - && isxdigit(Py_CHARMASK(s[1]))) { - unsigned int x = 0; - c = Py_CHARMASK(*s); - s++; - if (isdigit(c)) - x = c - '0'; - else if (islower(c)) - x = 10 + c - 'a'; - else - x = 10 + c - 'A'; - x = x << 4; - c = Py_CHARMASK(*s); - s++; - if (isdigit(c)) - x += c - '0'; - else if (islower(c)) - x += 10 + c - 'a'; - else - x += 10 + c - 'A'; - *p++ = x; - break; - } - Py_DECREF(v); - com_error(com, PyExc_ValueError, - "invalid \\x escape"); - return NULL; -#ifndef Py_USING_UNICODE - case 'u': - case 'U': - case 'N': - if (unicode) { - Py_DECREF(v); - com_error(com, PyExc_ValueError, - "Unicode escapes not legal " - "when Unicode disabled"); - return NULL; - } -#endif - default: - *p++ = '\\'; - s--; - goto ORDINAL; - } - } - _PyString_Resize(&v, (int)(p - buf)); + PyErr_SyntaxLocation(com->c_filename, com->c_lineno); return v; }