mirror of
https://github.com/python/cpython.git
synced 2024-11-27 03:45:08 +08:00
gh-126997: Fix support of non-ASCII strings in pickletools (GH-127062)
* Fix support of STRING and GLOBAL opcodes with non-ASCII arguments. * dis() now outputs non-ASCII bytes in STRING, BINSTRING and SHORT_BINSTRING arguments as escaped (\xXX).
This commit is contained in:
parent
ff2278e2bf
commit
eaf2171082
@ -312,7 +312,7 @@ uint8 = ArgumentDescriptor(
|
||||
doc="Eight-byte unsigned integer, little-endian.")
|
||||
|
||||
|
||||
def read_stringnl(f, decode=True, stripquotes=True):
|
||||
def read_stringnl(f, decode=True, stripquotes=True, *, encoding='latin-1'):
|
||||
r"""
|
||||
>>> import io
|
||||
>>> read_stringnl(io.BytesIO(b"'abcd'\nefg\n"))
|
||||
@ -356,7 +356,7 @@ def read_stringnl(f, decode=True, stripquotes=True):
|
||||
raise ValueError("no string quotes around %r" % data)
|
||||
|
||||
if decode:
|
||||
data = codecs.escape_decode(data)[0].decode("ascii")
|
||||
data = codecs.escape_decode(data)[0].decode(encoding)
|
||||
return data
|
||||
|
||||
stringnl = ArgumentDescriptor(
|
||||
@ -370,7 +370,7 @@ stringnl = ArgumentDescriptor(
|
||||
""")
|
||||
|
||||
def read_stringnl_noescape(f):
|
||||
return read_stringnl(f, stripquotes=False)
|
||||
return read_stringnl(f, stripquotes=False, encoding='utf-8')
|
||||
|
||||
stringnl_noescape = ArgumentDescriptor(
|
||||
name='stringnl_noescape',
|
||||
@ -2509,6 +2509,9 @@ def dis(pickle, out=None, memo=None, indentlevel=4, annotate=0):
|
||||
# make a mild effort to align arguments
|
||||
line += ' ' * (10 - len(opcode.name))
|
||||
if arg is not None:
|
||||
if opcode.name in ("STRING", "BINSTRING", "SHORT_BINSTRING"):
|
||||
line += ' ' + ascii(arg)
|
||||
else:
|
||||
line += ' ' + repr(arg)
|
||||
if markmsg:
|
||||
line += ' ' + markmsg
|
||||
|
@ -361,6 +361,88 @@ highest protocol among opcodes = 0
|
||||
highest protocol among opcodes = 0
|
||||
''', annotate=20)
|
||||
|
||||
def test_string(self):
|
||||
self.check_dis(b"S'abc'\n.", '''\
|
||||
0: S STRING 'abc'
|
||||
7: . STOP
|
||||
highest protocol among opcodes = 0
|
||||
''')
|
||||
self.check_dis(b'S"abc"\n.', '''\
|
||||
0: S STRING 'abc'
|
||||
7: . STOP
|
||||
highest protocol among opcodes = 0
|
||||
''')
|
||||
self.check_dis(b"S'\xc3\xb5'\n.", '''\
|
||||
0: S STRING '\\xc3\\xb5'
|
||||
6: . STOP
|
||||
highest protocol among opcodes = 0
|
||||
''')
|
||||
|
||||
def test_string_without_quotes(self):
|
||||
self.check_dis_error(b"Sabc'\n.", '',
|
||||
'no string quotes around b"abc\'"')
|
||||
self.check_dis_error(b'Sabc"\n.', '',
|
||||
"no string quotes around b'abc\"'")
|
||||
self.check_dis_error(b"S'abc\n.", '',
|
||||
'''strinq quote b"'" not found at both ends of b"'abc"''')
|
||||
self.check_dis_error(b'S"abc\n.', '',
|
||||
r"""strinq quote b'"' not found at both ends of b'"abc'""")
|
||||
self.check_dis_error(b"S'abc\"\n.", '',
|
||||
r"""strinq quote b"'" not found at both ends of b'\\'abc"'""")
|
||||
self.check_dis_error(b"S\"abc'\n.", '',
|
||||
r"""strinq quote b'"' not found at both ends of b'"abc\\''""")
|
||||
|
||||
def test_binstring(self):
|
||||
self.check_dis(b"T\x03\x00\x00\x00abc.", '''\
|
||||
0: T BINSTRING 'abc'
|
||||
8: . STOP
|
||||
highest protocol among opcodes = 1
|
||||
''')
|
||||
self.check_dis(b"T\x02\x00\x00\x00\xc3\xb5.", '''\
|
||||
0: T BINSTRING '\\xc3\\xb5'
|
||||
7: . STOP
|
||||
highest protocol among opcodes = 1
|
||||
''')
|
||||
|
||||
def test_short_binstring(self):
|
||||
self.check_dis(b"U\x03abc.", '''\
|
||||
0: U SHORT_BINSTRING 'abc'
|
||||
5: . STOP
|
||||
highest protocol among opcodes = 1
|
||||
''')
|
||||
self.check_dis(b"U\x02\xc3\xb5.", '''\
|
||||
0: U SHORT_BINSTRING '\\xc3\\xb5'
|
||||
4: . STOP
|
||||
highest protocol among opcodes = 1
|
||||
''')
|
||||
|
||||
def test_global(self):
|
||||
self.check_dis(b"cmodule\nname\n.", '''\
|
||||
0: c GLOBAL 'module name'
|
||||
13: . STOP
|
||||
highest protocol among opcodes = 0
|
||||
''')
|
||||
self.check_dis(b"cm\xc3\xb6dule\nn\xc3\xa4me\n.", '''\
|
||||
0: c GLOBAL 'm\xf6dule n\xe4me'
|
||||
15: . STOP
|
||||
highest protocol among opcodes = 0
|
||||
''')
|
||||
|
||||
def test_inst(self):
|
||||
self.check_dis(b"(imodule\nname\n.", '''\
|
||||
0: ( MARK
|
||||
1: i INST 'module name' (MARK at 0)
|
||||
14: . STOP
|
||||
highest protocol among opcodes = 0
|
||||
''')
|
||||
|
||||
def test_persid(self):
|
||||
self.check_dis(b"Pabc\n.", '''\
|
||||
0: P PERSID 'abc'
|
||||
5: . STOP
|
||||
highest protocol among opcodes = 0
|
||||
''')
|
||||
|
||||
|
||||
class MiscTestCase(unittest.TestCase):
|
||||
def test__all__(self):
|
||||
|
@ -0,0 +1,3 @@
|
||||
Fix support of STRING and GLOBAL opcodes with non-ASCII arguments in
|
||||
:mod:`pickletools`. :func:`pickletools.dis` now outputs non-ASCII bytes in
|
||||
STRING, BINSTRING and SHORT_BINSTRING arguments as escaped (``\xXX``).
|
Loading…
Reference in New Issue
Block a user