mirror of
https://github.com/python/cpython.git
synced 2024-11-24 02:15:30 +08:00
This patch changes the way the string .encode() method works slightly
and introduces a new method .decode(). The major change is that strg.encode() will no longer try to convert Unicode returns from the codec into a string, but instead pass along the Unicode object as-is. The same is now true for all other codec return types. The underlying C APIs were changed accordingly. Note that even though this does have the potential of breaking existing code, the chances are low since conversion from Unicode previously took place using the default encoding which is normally set to ASCII rendering this auto-conversion mechanism useless for most Unicode encodings. The good news is that you can now use .encode() and .decode() with much greater ease and that the door was opened for better accessibility of the builtin codecs. As demonstration of the new feature, the patch includes a few new codecs which allow string to string encoding and decoding (rot13, hex, zip, uu, base64). Written by Marc-Andre Lemburg. Copyright assigned to the PSF.
This commit is contained in:
parent
2e0a654f6e
commit
2d9204199f
@ -2326,30 +2326,44 @@ interned string object with the same value.
|
||||
int size,
|
||||
const char *encoding,
|
||||
const char *errors}
|
||||
Create a string object by decoding \var{size} bytes of the encoded
|
||||
buffer \var{s}. \var{encoding} and \var{errors} have the same meaning
|
||||
Creates an object by decoding \var{size} bytes of the encoded
|
||||
buffer \var{s} using the codec registered
|
||||
for \var{encoding}. \var{encoding} and \var{errors} have the same meaning
|
||||
as the parameters of the same name in the unicode() builtin
|
||||
function. The codec to be used is looked up using the Python codec
|
||||
registry. Returns \NULL{} in case an exception was raised by the
|
||||
codec.
|
||||
\end{cfuncdesc}
|
||||
|
||||
\begin{cfuncdesc}{PyObject*}{PyString_Encode}{const Py_UNICODE *s,
|
||||
\begin{cfuncdesc}{PyObject*}{PyString_AsDecodedObject}{PyObject *str,
|
||||
const char *encoding,
|
||||
const char *errors}
|
||||
Decodes a string object by passing it to the codec registered
|
||||
for \var{encoding} and returns the result as Python
|
||||
object. \var{encoding} and \var{errors} have the same meaning as the
|
||||
parameters of the same name in the string .encode() method. The codec
|
||||
to be used is looked up using the Python codec registry. Returns
|
||||
\NULL{} in case an exception was raised by the codec.
|
||||
\end{cfuncdesc}
|
||||
|
||||
\begin{cfuncdesc}{PyObject*}{PyString_Encode}{const char *s,
|
||||
int size,
|
||||
const char *encoding,
|
||||
const char *errors}
|
||||
Encodes the \ctype{Py_UNICODE} buffer of the given size and returns a
|
||||
Python string object. \var{encoding} and \var{errors} have the same
|
||||
Encodes the \ctype{char} buffer of the given size by passing it to
|
||||
the codec registered for \var{encoding} and returns a Python object.
|
||||
\var{encoding} and \var{errors} have the same
|
||||
meaning as the parameters of the same name in the string .encode()
|
||||
method. The codec to be used is looked up using the Python codec
|
||||
registry. Returns \NULL{} in case an exception was raised by the
|
||||
codec.
|
||||
\end{cfuncdesc}
|
||||
|
||||
\begin{cfuncdesc}{PyObject*}{PyString_AsEncodedString}{PyObject *unicode,
|
||||
\begin{cfuncdesc}{PyObject*}{PyString_AsEncodedObject}{PyObject *str,
|
||||
const char *encoding,
|
||||
const char *errors}
|
||||
Encodes a string object and returns the result as Python string
|
||||
Encodes a string object using the codec registered
|
||||
for \var{encoding} and returns the result as Python
|
||||
object. \var{encoding} and \var{errors} have the same meaning as the
|
||||
parameters of the same name in the string .encode() method. The codec
|
||||
to be used is looked up using the Python codec registry. Returns
|
||||
|
@ -78,7 +78,7 @@ extern DL_IMPORT(void) _Py_ReleaseInternedStrings(void);
|
||||
|
||||
/* --- Generic Codecs ----------------------------------------------------- */
|
||||
|
||||
/* Create a string object by decoding the encoded string s of the
|
||||
/* Create an object by decoding the encoded string s of the
|
||||
given size. */
|
||||
|
||||
extern DL_IMPORT(PyObject*) PyString_Decode(
|
||||
@ -89,7 +89,7 @@ extern DL_IMPORT(PyObject*) PyString_Decode(
|
||||
);
|
||||
|
||||
/* Encodes a char buffer of the given size and returns a
|
||||
Python string object. */
|
||||
Python object. */
|
||||
|
||||
extern DL_IMPORT(PyObject*) PyString_Encode(
|
||||
const char *s, /* string char buffer */
|
||||
@ -98,15 +98,52 @@ extern DL_IMPORT(PyObject*) PyString_Encode(
|
||||
const char *errors /* error handling */
|
||||
);
|
||||
|
||||
/* Encodes a string object and returns the result as Python string
|
||||
/* Encodes a string object and returns the result as Python
|
||||
object. */
|
||||
|
||||
extern DL_IMPORT(PyObject*) PyString_AsEncodedObject(
|
||||
PyObject *str, /* string object */
|
||||
const char *encoding, /* encoding */
|
||||
const char *errors /* error handling */
|
||||
);
|
||||
|
||||
/* Encodes a string object and returns the result as Python string
|
||||
object.
|
||||
|
||||
If the codec returns an Unicode object, the object is converted
|
||||
back to a string using the default encoding.
|
||||
|
||||
DEPRECATED - use PyString_AsEncodedObject() instead. */
|
||||
|
||||
extern DL_IMPORT(PyObject*) PyString_AsEncodedString(
|
||||
PyObject *str, /* string object */
|
||||
const char *encoding, /* encoding */
|
||||
const char *errors /* error handling */
|
||||
);
|
||||
|
||||
/* Decodes a string object and returns the result as Python
|
||||
object. */
|
||||
|
||||
extern DL_IMPORT(PyObject*) PyString_AsDecodedObject(
|
||||
PyObject *str, /* string object */
|
||||
const char *encoding, /* encoding */
|
||||
const char *errors /* error handling */
|
||||
);
|
||||
|
||||
/* Decodes a string object and returns the result as Python string
|
||||
object.
|
||||
|
||||
If the codec returns an Unicode object, the object is converted
|
||||
back to a string using the default encoding.
|
||||
|
||||
DEPRECATED - use PyString_AsDecodedObject() instead. */
|
||||
|
||||
extern DL_IMPORT(PyObject*) PyString_AsDecodedString(
|
||||
PyObject *str, /* string object */
|
||||
const char *encoding, /* encoding */
|
||||
const char *errors /* error handling */
|
||||
);
|
||||
|
||||
/* Provides access to the internal data buffer and size of a string
|
||||
object or the default encoded version of an Unicode object. Passing
|
||||
NULL as *len parameter will force the string buffer to be
|
||||
|
@ -72,6 +72,14 @@ class UserString:
|
||||
def center(self, width): return self.__class__(self.data.center(width))
|
||||
def count(self, sub, start=0, end=sys.maxint):
|
||||
return self.data.count(sub, start, end)
|
||||
def decode(self, encoding=None, errors=None): # XXX improve this?
|
||||
if encoding:
|
||||
if errors:
|
||||
return self.__class__(self.data.decode(encoding, errors))
|
||||
else:
|
||||
return self.__class__(self.data.decode(encoding))
|
||||
else:
|
||||
return self.__class__(self.data.decode())
|
||||
def encode(self, encoding=None, errors=None): # XXX improve this?
|
||||
if encoding:
|
||||
if errors:
|
||||
|
@ -79,4 +79,13 @@ aliases = {
|
||||
'tis260': 'tactis',
|
||||
'sjis': 'shift_jis',
|
||||
|
||||
# Content transfer/compression encodings
|
||||
'rot13': 'rot_13',
|
||||
'base64': 'base64_codec',
|
||||
'base_64': 'base64_codec',
|
||||
'zlib': 'zlib_codec',
|
||||
'zip': 'zlib_codec',
|
||||
'hex': 'hex_codec',
|
||||
'uu': 'uu_codec',
|
||||
|
||||
}
|
||||
|
60
Lib/encodings/base64_codec.py
Normal file
60
Lib/encodings/base64_codec.py
Normal file
@ -0,0 +1,60 @@
|
||||
""" Python 'base64_codec' Codec - base64 content transfer encoding
|
||||
|
||||
Unlike most of the other codecs which target Unicode, this codec
|
||||
will return Python string objects for both encode and decode.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
"""
|
||||
import codecs, base64
|
||||
|
||||
### Codec APIs
|
||||
|
||||
def base64_encode(input,errors='strict'):
|
||||
|
||||
""" Encodes the object input and returns a tuple (output
|
||||
object, length consumed).
|
||||
|
||||
errors defines the error handling to apply. It defaults to
|
||||
'strict' handling which is the only currently supported
|
||||
error handling for this codec.
|
||||
|
||||
"""
|
||||
assert errors == 'strict'
|
||||
output = base64.encodestring(input)
|
||||
return (output, len(input))
|
||||
|
||||
def base64_decode(input,errors='strict'):
|
||||
|
||||
""" Decodes the object input and returns a tuple (output
|
||||
object, length consumed).
|
||||
|
||||
input must be an object which provides the bf_getreadbuf
|
||||
buffer slot. Python strings, buffer objects and memory
|
||||
mapped files are examples of objects providing this slot.
|
||||
|
||||
errors defines the error handling to apply. It defaults to
|
||||
'strict' handling which is the only currently supported
|
||||
error handling for this codec.
|
||||
|
||||
"""
|
||||
assert errors == 'strict'
|
||||
output = base64.decodestring(input)
|
||||
return (output, len(input))
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
encode = base64_encode
|
||||
decode = base64_decode
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (base64_encode,base64_decode,StreamReader,StreamWriter)
|
60
Lib/encodings/hex_codec.py
Normal file
60
Lib/encodings/hex_codec.py
Normal file
@ -0,0 +1,60 @@
|
||||
""" Python 'hex_codec' Codec - 2-digit hex content transfer encoding
|
||||
|
||||
Unlike most of the other codecs which target Unicode, this codec
|
||||
will return Python string objects for both encode and decode.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
"""
|
||||
import codecs, binascii
|
||||
|
||||
### Codec APIs
|
||||
|
||||
def hex_encode(input,errors='strict'):
|
||||
|
||||
""" Encodes the object input and returns a tuple (output
|
||||
object, length consumed).
|
||||
|
||||
errors defines the error handling to apply. It defaults to
|
||||
'strict' handling which is the only currently supported
|
||||
error handling for this codec.
|
||||
|
||||
"""
|
||||
assert errors == 'strict'
|
||||
output = binascii.b2a_hex(input)
|
||||
return (output, len(input))
|
||||
|
||||
def hex_decode(input,errors='strict'):
|
||||
|
||||
""" Decodes the object input and returns a tuple (output
|
||||
object, length consumed).
|
||||
|
||||
input must be an object which provides the bf_getreadbuf
|
||||
buffer slot. Python strings, buffer objects and memory
|
||||
mapped files are examples of objects providing this slot.
|
||||
|
||||
errors defines the error handling to apply. It defaults to
|
||||
'strict' handling which is the only currently supported
|
||||
error handling for this codec.
|
||||
|
||||
"""
|
||||
assert errors == 'strict'
|
||||
output = binascii.a2b_hex(input)
|
||||
return (output, len(input))
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
encode = hex_encode
|
||||
decode = hex_decode
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (hex_encode,hex_decode,StreamReader,StreamWriter)
|
107
Lib/encodings/rot_13.py
Normal file
107
Lib/encodings/rot_13.py
Normal file
@ -0,0 +1,107 @@
|
||||
#!/usr/local/bin/python2.1
|
||||
""" Python Character Mapping Codec for ROT13.
|
||||
|
||||
See http://ucsub.colorado.edu/~kominek/rot13/ for details.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0041: 0x004e,
|
||||
0x0042: 0x004f,
|
||||
0x0043: 0x0050,
|
||||
0x0044: 0x0051,
|
||||
0x0045: 0x0052,
|
||||
0x0046: 0x0053,
|
||||
0x0047: 0x0054,
|
||||
0x0048: 0x0055,
|
||||
0x0049: 0x0056,
|
||||
0x004a: 0x0057,
|
||||
0x004b: 0x0058,
|
||||
0x004c: 0x0059,
|
||||
0x004d: 0x005a,
|
||||
0x004e: 0x0041,
|
||||
0x004f: 0x0042,
|
||||
0x0050: 0x0043,
|
||||
0x0051: 0x0044,
|
||||
0x0052: 0x0045,
|
||||
0x0053: 0x0046,
|
||||
0x0054: 0x0047,
|
||||
0x0055: 0x0048,
|
||||
0x0056: 0x0049,
|
||||
0x0057: 0x004a,
|
||||
0x0058: 0x004b,
|
||||
0x0059: 0x004c,
|
||||
0x005a: 0x004d,
|
||||
0x0061: 0x006e,
|
||||
0x0062: 0x006f,
|
||||
0x0063: 0x0070,
|
||||
0x0064: 0x0071,
|
||||
0x0065: 0x0072,
|
||||
0x0066: 0x0073,
|
||||
0x0067: 0x0074,
|
||||
0x0068: 0x0075,
|
||||
0x0069: 0x0076,
|
||||
0x006a: 0x0077,
|
||||
0x006b: 0x0078,
|
||||
0x006c: 0x0079,
|
||||
0x006d: 0x007a,
|
||||
0x006e: 0x0061,
|
||||
0x006f: 0x0062,
|
||||
0x0070: 0x0063,
|
||||
0x0071: 0x0064,
|
||||
0x0072: 0x0065,
|
||||
0x0073: 0x0066,
|
||||
0x0074: 0x0067,
|
||||
0x0075: 0x0068,
|
||||
0x0076: 0x0069,
|
||||
0x0077: 0x006a,
|
||||
0x0078: 0x006b,
|
||||
0x0079: 0x006c,
|
||||
0x007a: 0x006d,
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
||||
|
||||
### Filter API
|
||||
|
||||
def rot13(infile, outfile):
|
||||
outfile.write(infile.read().encode('rot-13'))
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
rot13(sys.stdin, sys.stdout)
|
110
Lib/encodings/uu_codec.py
Normal file
110
Lib/encodings/uu_codec.py
Normal file
@ -0,0 +1,110 @@
|
||||
""" Python 'uu_codec' Codec - UU content transfer encoding
|
||||
|
||||
Unlike most of the other codecs which target Unicode, this codec
|
||||
will return Python string objects for both encode and decode.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com). Some details were
|
||||
adapted from uu.py which was written by Lance Ellinghouse and
|
||||
modified by Jack Jansen and Fredrik Lundh.
|
||||
|
||||
"""
|
||||
import codecs, binascii
|
||||
|
||||
### Codec APIs
|
||||
|
||||
def uu_encode(input,errors='strict',filename='<data>',mode=0666):
|
||||
|
||||
""" Encodes the object input and returns a tuple (output
|
||||
object, length consumed).
|
||||
|
||||
errors defines the error handling to apply. It defaults to
|
||||
'strict' handling which is the only currently supported
|
||||
error handling for this codec.
|
||||
|
||||
"""
|
||||
assert errors == 'strict'
|
||||
from cStringIO import StringIO
|
||||
from binascii import b2a_uu
|
||||
infile = StringIO(input)
|
||||
outfile = StringIO()
|
||||
read = infile.read
|
||||
write = outfile.write
|
||||
|
||||
# Encode
|
||||
write('begin %o %s\n' % (mode & 0777, filename))
|
||||
chunk = read(45)
|
||||
while chunk:
|
||||
write(b2a_uu(chunk))
|
||||
chunk = read(45)
|
||||
write(' \nend\n')
|
||||
|
||||
return (outfile.getvalue(), len(input))
|
||||
|
||||
def uu_decode(input,errors='strict'):
|
||||
|
||||
""" Decodes the object input and returns a tuple (output
|
||||
object, length consumed).
|
||||
|
||||
input must be an object which provides the bf_getreadbuf
|
||||
buffer slot. Python strings, buffer objects and memory
|
||||
mapped files are examples of objects providing this slot.
|
||||
|
||||
errors defines the error handling to apply. It defaults to
|
||||
'strict' handling which is the only currently supported
|
||||
error handling for this codec.
|
||||
|
||||
Note: filename and file mode information in the input data is
|
||||
ignored.
|
||||
|
||||
"""
|
||||
assert errors == 'strict'
|
||||
from cStringIO import StringIO
|
||||
from binascii import a2b_uu
|
||||
infile = StringIO(input)
|
||||
outfile = StringIO()
|
||||
readline = infile.readline
|
||||
write = outfile.write
|
||||
|
||||
# Find start of encoded data
|
||||
while 1:
|
||||
s = readline()
|
||||
if not s:
|
||||
raise ValueError, 'Missing "begin" line in input data'
|
||||
if s[:5] == 'begin':
|
||||
break
|
||||
|
||||
# Decode
|
||||
while 1:
|
||||
s = readline()
|
||||
if not s or \
|
||||
s == 'end\n':
|
||||
break
|
||||
try:
|
||||
data = a2b_uu(s)
|
||||
except binascii.Error, v:
|
||||
# Workaround for broken uuencoders by /Fredrik Lundh
|
||||
nbytes = (((ord(s[0])-32) & 63) * 4 + 5) / 3
|
||||
data = a2b_uu(s[:nbytes])
|
||||
#sys.stderr.write("Warning: %s\n" % str(v))
|
||||
write(data)
|
||||
if not s:
|
||||
raise ValueError, 'Truncated input data'
|
||||
|
||||
return (outfile.getvalue(), len(input))
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
encode = uu_encode
|
||||
decode = uu_decode
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (uu_encode,uu_decode,StreamReader,StreamWriter)
|
61
Lib/encodings/zlib_codec.py
Normal file
61
Lib/encodings/zlib_codec.py
Normal file
@ -0,0 +1,61 @@
|
||||
""" Python 'zlib_codec' Codec - zlib compression encoding
|
||||
|
||||
Unlike most of the other codecs which target Unicode, this codec
|
||||
will return Python string objects for both encode and decode.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
"""
|
||||
import codecs
|
||||
import zlib # this codec needs the optional zlib module !
|
||||
|
||||
### Codec APIs
|
||||
|
||||
def zlib_encode(input,errors='strict'):
|
||||
|
||||
""" Encodes the object input and returns a tuple (output
|
||||
object, length consumed).
|
||||
|
||||
errors defines the error handling to apply. It defaults to
|
||||
'strict' handling which is the only currently supported
|
||||
error handling for this codec.
|
||||
|
||||
"""
|
||||
assert errors == 'strict'
|
||||
output = zlib.compress(input)
|
||||
return (output, len(input))
|
||||
|
||||
def zlib_decode(input,errors='strict'):
|
||||
|
||||
""" Decodes the object input and returns a tuple (output
|
||||
object, length consumed).
|
||||
|
||||
input must be an object which provides the bf_getreadbuf
|
||||
buffer slot. Python strings, buffer objects and memory
|
||||
mapped files are examples of objects providing this slot.
|
||||
|
||||
errors defines the error handling to apply. It defaults to
|
||||
'strict' handling which is the only currently supported
|
||||
error handling for this codec.
|
||||
|
||||
"""
|
||||
assert errors == 'strict'
|
||||
output = zlib.decompress(input)
|
||||
return (output, len(input))
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
encode = zlib_encode
|
||||
decode = zlib_decode
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (zlib_encode,zlib_decode,StreamReader,StreamWriter)
|
@ -1,6 +1,7 @@
|
||||
"""Common tests shared by test_string and test_userstring"""
|
||||
|
||||
import string
|
||||
from test_support import verify, verbose, TestFailed
|
||||
|
||||
transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
|
||||
|
||||
@ -212,3 +213,21 @@ def run_method_tests(test):
|
||||
test('endswith', 'helloworld', 0, 'lowo', 3, 8)
|
||||
test('endswith', 'ab', 0, 'ab', 0, 1)
|
||||
test('endswith', 'ab', 0, 'ab', 0, 0)
|
||||
|
||||
# Encoding/decoding
|
||||
codecs = [('rot13', 'uryyb jbeyq'),
|
||||
('base64', 'aGVsbG8gd29ybGQ=\n'),
|
||||
('hex', '68656c6c6f20776f726c64'),
|
||||
('uu', 'begin 666 <data>\n+:&5L;&\\@=V]R;&0 \n \nend\n')]
|
||||
for encoding, data in codecs:
|
||||
test('encode', 'hello world', data, encoding)
|
||||
test('decode', data, 'hello world', encoding)
|
||||
# zlib is optional, so we make the test optional too...
|
||||
try:
|
||||
import zlib
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
data = 'x\x9c\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\x01\x00\x1a\x0b\x04]'
|
||||
verify('hello world'.encode('zlib') == data)
|
||||
verify(data.decode('zlib') == 'hello world')
|
||||
|
@ -152,38 +152,70 @@ PyObject *PyString_Decode(const char *s,
|
||||
const char *encoding,
|
||||
const char *errors)
|
||||
{
|
||||
PyObject *buffer = NULL, *str;
|
||||
PyObject *v, *str;
|
||||
|
||||
str = PyString_FromStringAndSize(s, size);
|
||||
if (str == NULL)
|
||||
return NULL;
|
||||
v = PyString_AsDecodedString(str, encoding, errors);
|
||||
Py_DECREF(str);
|
||||
return v;
|
||||
}
|
||||
|
||||
PyObject *PyString_AsDecodedObject(PyObject *str,
|
||||
const char *encoding,
|
||||
const char *errors)
|
||||
{
|
||||
PyObject *v;
|
||||
|
||||
if (!PyString_Check(str)) {
|
||||
PyErr_BadArgument();
|
||||
goto onError;
|
||||
}
|
||||
|
||||
if (encoding == NULL)
|
||||
encoding = PyUnicode_GetDefaultEncoding();
|
||||
|
||||
/* Decode via the codec registry */
|
||||
buffer = PyBuffer_FromMemory((void *)s, size);
|
||||
if (buffer == NULL)
|
||||
v = PyCodec_Decode(str, encoding, errors);
|
||||
if (v == NULL)
|
||||
goto onError;
|
||||
str = PyCodec_Decode(buffer, encoding, errors);
|
||||
if (str == NULL)
|
||||
goto onError;
|
||||
/* Convert Unicode to a string using the default encoding */
|
||||
if (PyUnicode_Check(str)) {
|
||||
PyObject *temp = str;
|
||||
str = PyUnicode_AsEncodedString(str, NULL, NULL);
|
||||
Py_DECREF(temp);
|
||||
if (str == NULL)
|
||||
goto onError;
|
||||
}
|
||||
if (!PyString_Check(str)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"decoder did not return a string object (type=%.400s)",
|
||||
str->ob_type->tp_name);
|
||||
Py_DECREF(str);
|
||||
goto onError;
|
||||
}
|
||||
Py_DECREF(buffer);
|
||||
return str;
|
||||
|
||||
return v;
|
||||
|
||||
onError:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *PyString_AsDecodedString(PyObject *str,
|
||||
const char *encoding,
|
||||
const char *errors)
|
||||
{
|
||||
PyObject *v;
|
||||
|
||||
v = PyString_AsDecodedObject(str, encoding, errors);
|
||||
if (v == NULL)
|
||||
goto onError;
|
||||
|
||||
/* Convert Unicode to a string using the default encoding */
|
||||
if (PyUnicode_Check(v)) {
|
||||
PyObject *temp = v;
|
||||
v = PyUnicode_AsEncodedString(v, NULL, NULL);
|
||||
Py_DECREF(temp);
|
||||
if (v == NULL)
|
||||
goto onError;
|
||||
}
|
||||
if (!PyString_Check(v)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"decoder did not return a string object (type=%.400s)",
|
||||
v->ob_type->tp_name);
|
||||
Py_DECREF(v);
|
||||
goto onError;
|
||||
}
|
||||
|
||||
return v;
|
||||
|
||||
onError:
|
||||
Py_XDECREF(buffer);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -202,7 +234,7 @@ PyObject *PyString_Encode(const char *s,
|
||||
return v;
|
||||
}
|
||||
|
||||
PyObject *PyString_AsEncodedString(PyObject *str,
|
||||
PyObject *PyString_AsEncodedObject(PyObject *str,
|
||||
const char *encoding,
|
||||
const char *errors)
|
||||
{
|
||||
@ -220,6 +252,23 @@ PyObject *PyString_AsEncodedString(PyObject *str,
|
||||
v = PyCodec_Encode(str, encoding, errors);
|
||||
if (v == NULL)
|
||||
goto onError;
|
||||
|
||||
return v;
|
||||
|
||||
onError:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *PyString_AsEncodedString(PyObject *str,
|
||||
const char *encoding,
|
||||
const char *errors)
|
||||
{
|
||||
PyObject *v;
|
||||
|
||||
v = PyString_AsEncodedString(str, encoding, errors);
|
||||
if (v == NULL)
|
||||
goto onError;
|
||||
|
||||
/* Convert Unicode to a string using the default encoding */
|
||||
if (PyUnicode_Check(v)) {
|
||||
PyObject *temp = v;
|
||||
@ -235,6 +284,7 @@ PyObject *PyString_AsEncodedString(PyObject *str,
|
||||
Py_DECREF(v);
|
||||
goto onError;
|
||||
}
|
||||
|
||||
return v;
|
||||
|
||||
onError:
|
||||
@ -1779,10 +1829,10 @@ string_endswith(PyStringObject *self, PyObject *args)
|
||||
|
||||
|
||||
static char encode__doc__[] =
|
||||
"S.encode([encoding[,errors]]) -> string\n\
|
||||
"S.encode([encoding[,errors]]) -> object\n\
|
||||
\n\
|
||||
Return an encoded string version of S. Default encoding is the current\n\
|
||||
default string encoding. errors may be given to set a different error\n\
|
||||
Encodes S using the codec registered for encoding. encoding defaults\n\
|
||||
to the default encoding. errors may be given to set a different error\n\
|
||||
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
|
||||
a ValueError. Other possible values are 'ignore' and 'replace'.";
|
||||
|
||||
@ -1793,7 +1843,26 @@ string_encode(PyStringObject *self, PyObject *args)
|
||||
char *errors = NULL;
|
||||
if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
|
||||
return NULL;
|
||||
return PyString_AsEncodedString((PyObject *)self, encoding, errors);
|
||||
return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
|
||||
}
|
||||
|
||||
|
||||
static char decode__doc__[] =
|
||||
"S.decode([encoding[,errors]]) -> object\n\
|
||||
\n\
|
||||
Decodes S using the codec registered for encoding. encoding defaults\n\
|
||||
to the default encoding. errors may be given to set a different error\n\
|
||||
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
|
||||
a ValueError. Other possible values are 'ignore' and 'replace'.";
|
||||
|
||||
static PyObject *
|
||||
string_decode(PyStringObject *self, PyObject *args)
|
||||
{
|
||||
char *encoding = NULL;
|
||||
char *errors = NULL;
|
||||
if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
|
||||
return NULL;
|
||||
return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
|
||||
}
|
||||
|
||||
|
||||
@ -2371,6 +2440,7 @@ string_methods[] = {
|
||||
{"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
|
||||
{"center", (PyCFunction)string_center, 1, center__doc__},
|
||||
{"encode", (PyCFunction)string_encode, 1, encode__doc__},
|
||||
{"decode", (PyCFunction)string_decode, 1, decode__doc__},
|
||||
{"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
|
||||
{"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
|
||||
#if 0
|
||||
|
Loading…
Reference in New Issue
Block a user