mirror of
https://github.com/python/cpython.git
synced 2024-11-25 10:54:51 +08:00
Fix quopri to operate consistently on bytes.
This commit is contained in:
parent
f3f0c611dd
commit
c582bfca26
@ -6,10 +6,10 @@
|
||||
|
||||
__all__ = ["encode", "decode", "encodestring", "decodestring"]
|
||||
|
||||
ESCAPE = '='
|
||||
ESCAPE = b'='
|
||||
MAXLINESIZE = 76
|
||||
HEX = '0123456789ABCDEF'
|
||||
EMPTYSTRING = ''
|
||||
HEX = b'0123456789ABCDEF'
|
||||
EMPTYSTRING = b''
|
||||
|
||||
try:
|
||||
from binascii import a2b_qp, b2a_qp
|
||||
@ -19,23 +19,25 @@ except ImportError:
|
||||
|
||||
|
||||
def needsquoting(c, quotetabs, header):
|
||||
"""Decide whether a particular character needs to be quoted.
|
||||
"""Decide whether a particular byte ordinal needs to be quoted.
|
||||
|
||||
The 'quotetabs' flag indicates whether embedded tabs and spaces should be
|
||||
quoted. Note that line-ending tabs and spaces are always encoded, as per
|
||||
RFC 1521.
|
||||
"""
|
||||
if c in ' \t':
|
||||
assert isinstance(c, bytes)
|
||||
if c in b' \t':
|
||||
return quotetabs
|
||||
# if header, we have to escape _ because _ is used to escape space
|
||||
if c == '_':
|
||||
if c == b'_':
|
||||
return header
|
||||
return c == ESCAPE or not (' ' <= c <= '~')
|
||||
return c == ESCAPE or not (b' ' <= c <= b'~')
|
||||
|
||||
def quote(c):
|
||||
"""Quote a single character."""
|
||||
i = ord(c)
|
||||
return ESCAPE + HEX[i//16] + HEX[i%16]
|
||||
assert isinstance(c, bytes) and len(c)==1
|
||||
c = ord(c)
|
||||
return ESCAPE + bytes((HEX[c//16], HEX[c%16]))
|
||||
|
||||
|
||||
|
||||
@ -56,12 +58,12 @@ def encode(input, output, quotetabs, header = 0):
|
||||
output.write(odata)
|
||||
return
|
||||
|
||||
def write(s, output=output, lineEnd='\n'):
|
||||
def write(s, output=output, lineEnd=b'\n'):
|
||||
# RFC 1521 requires that the line ending in a space or tab must have
|
||||
# that trailing character encoded.
|
||||
if s and s[-1:] in ' \t':
|
||||
output.write(s[:-1] + quote(s[-1]) + lineEnd)
|
||||
elif s == '.':
|
||||
if s and s[-1:] in b' \t':
|
||||
output.write(s[:-1] + quote(s[-1:]) + lineEnd)
|
||||
elif s == b'.':
|
||||
output.write(quote(s) + lineEnd)
|
||||
else:
|
||||
output.write(s + lineEnd)
|
||||
@ -73,16 +75,17 @@ def encode(input, output, quotetabs, header = 0):
|
||||
break
|
||||
outline = []
|
||||
# Strip off any readline induced trailing newline
|
||||
stripped = ''
|
||||
if line[-1:] == '\n':
|
||||
stripped = b''
|
||||
if line[-1:] == b'\n':
|
||||
line = line[:-1]
|
||||
stripped = '\n'
|
||||
stripped = b'\n'
|
||||
# Calculate the un-length-limited encoded line
|
||||
for c in line:
|
||||
c = bytes((c,))
|
||||
if needsquoting(c, quotetabs, header):
|
||||
c = quote(c)
|
||||
if header and c == ' ':
|
||||
outline.append('_')
|
||||
if header and c == b' ':
|
||||
outline.append(b'_')
|
||||
else:
|
||||
outline.append(c)
|
||||
# First, write out the previous line
|
||||
@ -94,7 +97,7 @@ def encode(input, output, quotetabs, header = 0):
|
||||
while len(thisline) > MAXLINESIZE:
|
||||
# Don't forget to include the soft line break `=' sign in the
|
||||
# length calculation!
|
||||
write(thisline[:MAXLINESIZE-1], lineEnd='=\n')
|
||||
write(thisline[:MAXLINESIZE-1], lineEnd=b'=\n')
|
||||
thisline = thisline[MAXLINESIZE-1:]
|
||||
# Write out the current line
|
||||
prevline = thisline
|
||||
@ -105,9 +108,9 @@ def encode(input, output, quotetabs, header = 0):
|
||||
def encodestring(s, quotetabs = 0, header = 0):
|
||||
if b2a_qp is not None:
|
||||
return b2a_qp(s, quotetabs = quotetabs, header = header)
|
||||
from io import StringIO
|
||||
infp = StringIO(s)
|
||||
outfp = StringIO()
|
||||
from io import BytesIO
|
||||
infp = BytesIO(s)
|
||||
outfp = BytesIO()
|
||||
encode(infp, outfp, quotetabs, header)
|
||||
return outfp.getvalue()
|
||||
|
||||
@ -124,44 +127,44 @@ def decode(input, output, header = 0):
|
||||
output.write(odata)
|
||||
return
|
||||
|
||||
new = ''
|
||||
new = b''
|
||||
while 1:
|
||||
line = input.readline()
|
||||
if not line: break
|
||||
i, n = 0, len(line)
|
||||
if n > 0 and line[n-1] == '\n':
|
||||
if n > 0 and line[n-1:n] == b'\n':
|
||||
partial = 0; n = n-1
|
||||
# Strip trailing whitespace
|
||||
while n > 0 and line[n-1] in " \t\r":
|
||||
while n > 0 and line[n-1:n] in b" \t\r":
|
||||
n = n-1
|
||||
else:
|
||||
partial = 1
|
||||
while i < n:
|
||||
c = line[i]
|
||||
if c == '_' and header:
|
||||
new = new + ' '; i = i+1
|
||||
c = line[i:i+1]
|
||||
if c == b'_' and header:
|
||||
new = new + b' '; i = i+1
|
||||
elif c != ESCAPE:
|
||||
new = new + c; i = i+1
|
||||
elif i+1 == n and not partial:
|
||||
partial = 1; break
|
||||
elif i+1 < n and line[i+1] == ESCAPE:
|
||||
new = new + ESCAPE; i = i+2
|
||||
elif i+2 < n and ishex(line[i+1]) and ishex(line[i+2]):
|
||||
new = new + chr(unhex(line[i+1:i+3])); i = i+3
|
||||
elif i+2 < n and ishex(line[i+1:i+2]) and ishex(line[i+2:i+3]):
|
||||
new = new + bytes((unhex(line[i+1:i+3]),)); i = i+3
|
||||
else: # Bad escape sequence -- leave it in
|
||||
new = new + c; i = i+1
|
||||
if not partial:
|
||||
output.write(new + '\n')
|
||||
new = ''
|
||||
output.write(new + b'\n')
|
||||
new = b''
|
||||
if new:
|
||||
output.write(new)
|
||||
|
||||
def decodestring(s, header = 0):
|
||||
if a2b_qp is not None:
|
||||
return a2b_qp(s, header = header)
|
||||
from io import StringIO
|
||||
infp = StringIO(s)
|
||||
outfp = StringIO()
|
||||
from io import BytesIO
|
||||
infp = BytesIO(s)
|
||||
outfp = BytesIO()
|
||||
decode(infp, outfp, header = header)
|
||||
return outfp.getvalue()
|
||||
|
||||
@ -169,21 +172,23 @@ def decodestring(s, header = 0):
|
||||
|
||||
# Other helper functions
|
||||
def ishex(c):
|
||||
"""Return true if the character 'c' is a hexadecimal digit."""
|
||||
return '0' <= c <= '9' or 'a' <= c <= 'f' or 'A' <= c <= 'F'
|
||||
"""Return true if the byte ordinal 'c' is a hexadecimal digit in ASCII."""
|
||||
assert isinstance(c, bytes)
|
||||
return b'0' <= c <= b'9' or b'a' <= c <= b'f' or b'A' <= c <= b'F'
|
||||
|
||||
def unhex(s):
|
||||
"""Get the integer value of a hexadecimal number."""
|
||||
bits = 0
|
||||
for c in s:
|
||||
if '0' <= c <= '9':
|
||||
c = bytes((c,))
|
||||
if b'0' <= c <= b'9':
|
||||
i = ord('0')
|
||||
elif 'a' <= c <= 'f':
|
||||
elif b'a' <= c <= b'f':
|
||||
i = ord('a')-10
|
||||
elif 'A' <= c <= 'F':
|
||||
i = ord('A')-10
|
||||
elif b'A' <= c <= b'F':
|
||||
i = ord(b'A')-10
|
||||
else:
|
||||
break
|
||||
assert False, "non-hex digit "+repr(c)
|
||||
bits = bits*16 + (ord(c) - i)
|
||||
return bits
|
||||
|
||||
@ -214,18 +219,18 @@ def main():
|
||||
sts = 0
|
||||
for file in args:
|
||||
if file == '-':
|
||||
fp = sys.stdin
|
||||
fp = sys.stdin.buffer
|
||||
else:
|
||||
try:
|
||||
fp = open(file)
|
||||
fp = open(file, "rb")
|
||||
except IOError as msg:
|
||||
sys.stderr.write("%s: can't open (%s)\n" % (file, msg))
|
||||
sts = 1
|
||||
continue
|
||||
if deco:
|
||||
decode(fp, sys.stdout)
|
||||
decode(fp, sys.stdout.buffer)
|
||||
else:
|
||||
encode(fp, sys.stdout, tabs)
|
||||
encode(fp, sys.stdout.buffer, tabs)
|
||||
if fp is not sys.stdin:
|
||||
fp.close()
|
||||
if sts:
|
||||
|
@ -6,7 +6,7 @@ import quopri
|
||||
|
||||
|
||||
|
||||
ENCSAMPLE = """\
|
||||
ENCSAMPLE = b"""\
|
||||
Here's a bunch of special=20
|
||||
|
||||
=A1=A2=A3=A4=A5=A6=A7=A8=A9
|
||||
@ -25,8 +25,8 @@ characters... have fun!
|
||||
"""
|
||||
|
||||
# First line ends with a space
|
||||
DECSAMPLE = "Here's a bunch of special \n" + \
|
||||
"""\
|
||||
DECSAMPLE = b"Here's a bunch of special \n" + \
|
||||
b"""\
|
||||
|
||||
\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9
|
||||
\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3
|
||||
@ -67,48 +67,48 @@ class QuopriTestCase(unittest.TestCase):
|
||||
# used in the "quotetabs=0" tests.
|
||||
STRINGS = (
|
||||
# Some normal strings
|
||||
('hello', 'hello'),
|
||||
('''hello
|
||||
(b'hello', b'hello'),
|
||||
(b'''hello
|
||||
there
|
||||
world''', '''hello
|
||||
world''', b'''hello
|
||||
there
|
||||
world'''),
|
||||
('''hello
|
||||
(b'''hello
|
||||
there
|
||||
world
|
||||
''', '''hello
|
||||
''', b'''hello
|
||||
there
|
||||
world
|
||||
'''),
|
||||
('\201\202\203', '=81=82=83'),
|
||||
(b'\201\202\203', b'=81=82=83'),
|
||||
# Add some trailing MUST QUOTE strings
|
||||
('hello ', 'hello=20'),
|
||||
('hello\t', 'hello=09'),
|
||||
(b'hello ', b'hello=20'),
|
||||
(b'hello\t', b'hello=09'),
|
||||
# Some long lines. First, a single line of 108 characters
|
||||
('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xd8\xd9\xda\xdb\xdc\xdd\xde\xdfxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
|
||||
'''xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=D8=D9=DA=DB=DC=DD=DE=DFx=
|
||||
(b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xd8\xd9\xda\xdb\xdc\xdd\xde\xdfxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
|
||||
b'''xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=D8=D9=DA=DB=DC=DD=DE=DFx=
|
||||
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'''),
|
||||
# A line of exactly 76 characters, no soft line break should be needed
|
||||
('yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy',
|
||||
'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'),
|
||||
(b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy',
|
||||
b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'),
|
||||
# A line of 77 characters, forcing a soft line break at position 75,
|
||||
# and a second line of exactly 2 characters (because the soft line
|
||||
# break `=' sign counts against the line length limit).
|
||||
('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
|
||||
'''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=
|
||||
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
|
||||
b'''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=
|
||||
zz'''),
|
||||
# A line of 151 characters, forcing a soft line break at position 75,
|
||||
# with a second line of exactly 76 characters and no trailing =
|
||||
('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
|
||||
'''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=
|
||||
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
|
||||
b'''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=
|
||||
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'''),
|
||||
# A string containing a hard line break, but which the first line is
|
||||
# 151 characters and the second line is exactly 76 characters. This
|
||||
# should leave us with three lines, the first which has a soft line
|
||||
# break, and which the second and third do not.
|
||||
('''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
|
||||
(b'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
|
||||
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''',
|
||||
'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=
|
||||
b'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=
|
||||
yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
|
||||
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'''),
|
||||
# Now some really complex stuff ;)
|
||||
@ -117,14 +117,14 @@ zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''')
|
||||
|
||||
# These are used in the "quotetabs=1" tests.
|
||||
ESTRINGS = (
|
||||
('hello world', 'hello=20world'),
|
||||
('hello\tworld', 'hello=09world'),
|
||||
(b'hello world', b'hello=20world'),
|
||||
(b'hello\tworld', b'hello=09world'),
|
||||
)
|
||||
|
||||
# These are used in the "header=1" tests.
|
||||
HSTRINGS = (
|
||||
('hello world', 'hello_world'),
|
||||
('hello_world', 'hello=5Fworld'),
|
||||
(b'hello world', b'hello_world'),
|
||||
(b'hello_world', b'hello=5Fworld'),
|
||||
)
|
||||
|
||||
@withpythonimplementation
|
||||
@ -161,18 +161,18 @@ zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''')
|
||||
@withpythonimplementation
|
||||
def test_embedded_ws(self):
|
||||
for p, e in self.ESTRINGS:
|
||||
self.assert_(quopri.encodestring(p, quotetabs=True) == e)
|
||||
self.assertEqual(quopri.encodestring(p, quotetabs=True), e)
|
||||
self.assertEqual(quopri.decodestring(e), p)
|
||||
|
||||
@withpythonimplementation
|
||||
def test_encode_header(self):
|
||||
for p, e in self.HSTRINGS:
|
||||
self.assert_(quopri.encodestring(p, header=True) == e)
|
||||
self.assertEqual(quopri.encodestring(p, header=True), e)
|
||||
|
||||
@withpythonimplementation
|
||||
def test_decode_header(self):
|
||||
for p, e in self.HSTRINGS:
|
||||
self.assert_(quopri.decodestring(e, header=True) == p)
|
||||
self.assertEqual(quopri.decodestring(e, header=True), p)
|
||||
|
||||
def test_scriptencode(self):
|
||||
(p, e) = self.STRINGS[-1]
|
||||
@ -182,13 +182,20 @@ zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''')
|
||||
# On Windows, Python will output the result to stdout using
|
||||
# CRLF, as the mode of stdout is text mode. To compare this
|
||||
# with the expected result, we need to do a line-by-line comparison.
|
||||
self.assertEqual(cout.splitlines(), e.splitlines())
|
||||
cout = cout.decode('latin-1').splitlines()
|
||||
e = e.decode('latin-1').splitlines()
|
||||
assert len(cout)==len(e)
|
||||
for i in range(len(cout)):
|
||||
self.assertEqual(cout[i], e[i])
|
||||
self.assertEqual(cout, e)
|
||||
|
||||
def test_scriptdecode(self):
|
||||
(p, e) = self.STRINGS[-1]
|
||||
process = subprocess.Popen([sys.executable, "-mquopri", "-d"],
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
cout, cerr = process.communicate(e)
|
||||
cout = cout.decode('latin-1')
|
||||
p = p.decode('latin-1')
|
||||
self.assertEqual(cout.splitlines(), p.splitlines())
|
||||
|
||||
def test_main():
|
||||
|
Loading…
Reference in New Issue
Block a user