mirror of
https://github.com/python/cpython.git
synced 2024-12-15 12:54:31 +08:00
975ac326ff
* bpo-33262: Deprecate passing None for `s` to shlex.split() This reads the string to split from standard input. * Update What's New. * Fix shlex.rst
377 lines
14 KiB
Python
377 lines
14 KiB
Python
import io
|
|
import itertools
|
|
import shlex
|
|
import string
|
|
import unittest
|
|
from unittest import mock
|
|
|
|
|
|
# The original test data set was from shellwords, by Hartmut Goebel.
|
|
|
|
data = r"""x|x|
|
|
foo bar|foo|bar|
|
|
foo bar|foo|bar|
|
|
foo bar |foo|bar|
|
|
foo bar bla fasel|foo|bar|bla|fasel|
|
|
x y z xxxx|x|y|z|xxxx|
|
|
\x bar|\|x|bar|
|
|
\ x bar|\|x|bar|
|
|
\ bar|\|bar|
|
|
foo \x bar|foo|\|x|bar|
|
|
foo \ x bar|foo|\|x|bar|
|
|
foo \ bar|foo|\|bar|
|
|
foo "bar" bla|foo|"bar"|bla|
|
|
"foo" "bar" "bla"|"foo"|"bar"|"bla"|
|
|
"foo" bar "bla"|"foo"|bar|"bla"|
|
|
"foo" bar bla|"foo"|bar|bla|
|
|
foo 'bar' bla|foo|'bar'|bla|
|
|
'foo' 'bar' 'bla'|'foo'|'bar'|'bla'|
|
|
'foo' bar 'bla'|'foo'|bar|'bla'|
|
|
'foo' bar bla|'foo'|bar|bla|
|
|
blurb foo"bar"bar"fasel" baz|blurb|foo"bar"bar"fasel"|baz|
|
|
blurb foo'bar'bar'fasel' baz|blurb|foo'bar'bar'fasel'|baz|
|
|
""|""|
|
|
''|''|
|
|
foo "" bar|foo|""|bar|
|
|
foo '' bar|foo|''|bar|
|
|
foo "" "" "" bar|foo|""|""|""|bar|
|
|
foo '' '' '' bar|foo|''|''|''|bar|
|
|
\""|\|""|
|
|
"\"|"\"|
|
|
"foo\ bar"|"foo\ bar"|
|
|
"foo\\ bar"|"foo\\ bar"|
|
|
"foo\\ bar\"|"foo\\ bar\"|
|
|
"foo\\" bar\""|"foo\\"|bar|\|""|
|
|
"foo\\ bar\" dfadf"|"foo\\ bar\"|dfadf"|
|
|
"foo\\\ bar\" dfadf"|"foo\\\ bar\"|dfadf"|
|
|
"foo\\\x bar\" dfadf"|"foo\\\x bar\"|dfadf"|
|
|
"foo\x bar\" dfadf"|"foo\x bar\"|dfadf"|
|
|
\''|\|''|
|
|
'foo\ bar'|'foo\ bar'|
|
|
'foo\\ bar'|'foo\\ bar'|
|
|
"foo\\\x bar\" df'a\ 'df'|"foo\\\x bar\"|df'a|\|'df'|
|
|
\"foo"|\|"foo"|
|
|
\"foo"\x|\|"foo"|\|x|
|
|
"foo\x"|"foo\x"|
|
|
"foo\ "|"foo\ "|
|
|
foo\ xx|foo|\|xx|
|
|
foo\ x\x|foo|\|x|\|x|
|
|
foo\ x\x\""|foo|\|x|\|x|\|""|
|
|
"foo\ x\x"|"foo\ x\x"|
|
|
"foo\ x\x\\"|"foo\ x\x\\"|
|
|
"foo\ x\x\\""foobar"|"foo\ x\x\\"|"foobar"|
|
|
"foo\ x\x\\"\''"foobar"|"foo\ x\x\\"|\|''|"foobar"|
|
|
"foo\ x\x\\"\'"fo'obar"|"foo\ x\x\\"|\|'"fo'|obar"|
|
|
"foo\ x\x\\"\'"fo'obar" 'don'\''t'|"foo\ x\x\\"|\|'"fo'|obar"|'don'|\|''|t'|
|
|
'foo\ bar'|'foo\ bar'|
|
|
'foo\\ bar'|'foo\\ bar'|
|
|
foo\ bar|foo|\|bar|
|
|
foo#bar\nbaz|foobaz|
|
|
:-) ;-)|:|-|)|;|-|)|
|
|
áéíóú|á|é|í|ó|ú|
|
|
"""
|
|
|
|
posix_data = r"""x|x|
|
|
foo bar|foo|bar|
|
|
foo bar|foo|bar|
|
|
foo bar |foo|bar|
|
|
foo bar bla fasel|foo|bar|bla|fasel|
|
|
x y z xxxx|x|y|z|xxxx|
|
|
\x bar|x|bar|
|
|
\ x bar| x|bar|
|
|
\ bar| bar|
|
|
foo \x bar|foo|x|bar|
|
|
foo \ x bar|foo| x|bar|
|
|
foo \ bar|foo| bar|
|
|
foo "bar" bla|foo|bar|bla|
|
|
"foo" "bar" "bla"|foo|bar|bla|
|
|
"foo" bar "bla"|foo|bar|bla|
|
|
"foo" bar bla|foo|bar|bla|
|
|
foo 'bar' bla|foo|bar|bla|
|
|
'foo' 'bar' 'bla'|foo|bar|bla|
|
|
'foo' bar 'bla'|foo|bar|bla|
|
|
'foo' bar bla|foo|bar|bla|
|
|
blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz|
|
|
blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz|
|
|
""||
|
|
''||
|
|
foo "" bar|foo||bar|
|
|
foo '' bar|foo||bar|
|
|
foo "" "" "" bar|foo||||bar|
|
|
foo '' '' '' bar|foo||||bar|
|
|
\"|"|
|
|
"\""|"|
|
|
"foo\ bar"|foo\ bar|
|
|
"foo\\ bar"|foo\ bar|
|
|
"foo\\ bar\""|foo\ bar"|
|
|
"foo\\" bar\"|foo\|bar"|
|
|
"foo\\ bar\" dfadf"|foo\ bar" dfadf|
|
|
"foo\\\ bar\" dfadf"|foo\\ bar" dfadf|
|
|
"foo\\\x bar\" dfadf"|foo\\x bar" dfadf|
|
|
"foo\x bar\" dfadf"|foo\x bar" dfadf|
|
|
\'|'|
|
|
'foo\ bar'|foo\ bar|
|
|
'foo\\ bar'|foo\\ bar|
|
|
"foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df|
|
|
\"foo|"foo|
|
|
\"foo\x|"foox|
|
|
"foo\x"|foo\x|
|
|
"foo\ "|foo\ |
|
|
foo\ xx|foo xx|
|
|
foo\ x\x|foo xx|
|
|
foo\ x\x\"|foo xx"|
|
|
"foo\ x\x"|foo\ x\x|
|
|
"foo\ x\x\\"|foo\ x\x\|
|
|
"foo\ x\x\\""foobar"|foo\ x\x\foobar|
|
|
"foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar|
|
|
"foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar|
|
|
"foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't|
|
|
"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\|
|
|
'foo\ bar'|foo\ bar|
|
|
'foo\\ bar'|foo\\ bar|
|
|
foo\ bar|foo bar|
|
|
foo#bar\nbaz|foo|baz|
|
|
:-) ;-)|:-)|;-)|
|
|
áéíóú|áéíóú|
|
|
"""
|
|
|
|
class ShlexTest(unittest.TestCase):
|
|
def setUp(self):
|
|
self.data = [x.split("|")[:-1]
|
|
for x in data.splitlines()]
|
|
self.posix_data = [x.split("|")[:-1]
|
|
for x in posix_data.splitlines()]
|
|
for item in self.data:
|
|
item[0] = item[0].replace(r"\n", "\n")
|
|
for item in self.posix_data:
|
|
item[0] = item[0].replace(r"\n", "\n")
|
|
|
|
def splitTest(self, data, comments):
|
|
for i in range(len(data)):
|
|
l = shlex.split(data[i][0], comments=comments)
|
|
self.assertEqual(l, data[i][1:],
|
|
"%s: %s != %s" %
|
|
(data[i][0], l, data[i][1:]))
|
|
|
|
def oldSplit(self, s):
|
|
ret = []
|
|
lex = shlex.shlex(io.StringIO(s))
|
|
tok = lex.get_token()
|
|
while tok:
|
|
ret.append(tok)
|
|
tok = lex.get_token()
|
|
return ret
|
|
|
|
@mock.patch('sys.stdin', io.StringIO())
|
|
def testSplitNoneDeprecation(self):
|
|
with self.assertWarns(DeprecationWarning):
|
|
shlex.split(None)
|
|
|
|
def testSplitPosix(self):
|
|
"""Test data splitting with posix parser"""
|
|
self.splitTest(self.posix_data, comments=True)
|
|
|
|
def testCompat(self):
|
|
"""Test compatibility interface"""
|
|
for i in range(len(self.data)):
|
|
l = self.oldSplit(self.data[i][0])
|
|
self.assertEqual(l, self.data[i][1:],
|
|
"%s: %s != %s" %
|
|
(self.data[i][0], l, self.data[i][1:]))
|
|
|
|
def testSyntaxSplitAmpersandAndPipe(self):
|
|
"""Test handling of syntax splitting of &, |"""
|
|
# Could take these forms: &&, &, |&, ;&, ;;&
|
|
# of course, the same applies to | and ||
|
|
# these should all parse to the same output
|
|
for delimiter in ('&&', '&', '|&', ';&', ';;&',
|
|
'||', '|', '&|', ';|', ';;|'):
|
|
src = ['echo hi %s echo bye' % delimiter,
|
|
'echo hi%secho bye' % delimiter]
|
|
ref = ['echo', 'hi', delimiter, 'echo', 'bye']
|
|
for ss, ws in itertools.product(src, (False, True)):
|
|
s = shlex.shlex(ss, punctuation_chars=True)
|
|
s.whitespace_split = ws
|
|
result = list(s)
|
|
self.assertEqual(ref, result,
|
|
"While splitting '%s' [ws=%s]" % (ss, ws))
|
|
|
|
def testSyntaxSplitSemicolon(self):
|
|
"""Test handling of syntax splitting of ;"""
|
|
# Could take these forms: ;, ;;, ;&, ;;&
|
|
# these should all parse to the same output
|
|
for delimiter in (';', ';;', ';&', ';;&'):
|
|
src = ['echo hi %s echo bye' % delimiter,
|
|
'echo hi%s echo bye' % delimiter,
|
|
'echo hi%secho bye' % delimiter]
|
|
ref = ['echo', 'hi', delimiter, 'echo', 'bye']
|
|
for ss, ws in itertools.product(src, (False, True)):
|
|
s = shlex.shlex(ss, punctuation_chars=True)
|
|
s.whitespace_split = ws
|
|
result = list(s)
|
|
self.assertEqual(ref, result,
|
|
"While splitting '%s' [ws=%s]" % (ss, ws))
|
|
|
|
def testSyntaxSplitRedirect(self):
|
|
"""Test handling of syntax splitting of >"""
|
|
# of course, the same applies to <, |
|
|
# these should all parse to the same output
|
|
for delimiter in ('<', '|'):
|
|
src = ['echo hi %s out' % delimiter,
|
|
'echo hi%s out' % delimiter,
|
|
'echo hi%sout' % delimiter]
|
|
ref = ['echo', 'hi', delimiter, 'out']
|
|
for ss, ws in itertools.product(src, (False, True)):
|
|
s = shlex.shlex(ss, punctuation_chars=True)
|
|
result = list(s)
|
|
self.assertEqual(ref, result,
|
|
"While splitting '%s' [ws=%s]" % (ss, ws))
|
|
|
|
def testSyntaxSplitParen(self):
|
|
"""Test handling of syntax splitting of ()"""
|
|
# these should all parse to the same output
|
|
src = ['( echo hi )',
|
|
'(echo hi)']
|
|
ref = ['(', 'echo', 'hi', ')']
|
|
for ss, ws in itertools.product(src, (False, True)):
|
|
s = shlex.shlex(ss, punctuation_chars=True)
|
|
s.whitespace_split = ws
|
|
result = list(s)
|
|
self.assertEqual(ref, result,
|
|
"While splitting '%s' [ws=%s]" % (ss, ws))
|
|
|
|
def testSyntaxSplitCustom(self):
|
|
"""Test handling of syntax splitting with custom chars"""
|
|
ss = "~/a&&b-c --color=auto||d *.py?"
|
|
ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?']
|
|
s = shlex.shlex(ss, punctuation_chars="|")
|
|
result = list(s)
|
|
self.assertEqual(ref, result, "While splitting '%s' [ws=False]" % ss)
|
|
ref = ['~/a&&b-c', '--color=auto', '||', 'd', '*.py?']
|
|
s = shlex.shlex(ss, punctuation_chars="|")
|
|
s.whitespace_split = True
|
|
result = list(s)
|
|
self.assertEqual(ref, result, "While splitting '%s' [ws=True]" % ss)
|
|
|
|
def testTokenTypes(self):
|
|
"""Test that tokens are split with types as expected."""
|
|
for source, expected in (
|
|
('a && b || c',
|
|
[('a', 'a'), ('&&', 'c'), ('b', 'a'),
|
|
('||', 'c'), ('c', 'a')]),
|
|
):
|
|
s = shlex.shlex(source, punctuation_chars=True)
|
|
observed = []
|
|
while True:
|
|
t = s.get_token()
|
|
if t == s.eof:
|
|
break
|
|
if t[0] in s.punctuation_chars:
|
|
tt = 'c'
|
|
else:
|
|
tt = 'a'
|
|
observed.append((t, tt))
|
|
self.assertEqual(observed, expected)
|
|
|
|
def testPunctuationInWordChars(self):
|
|
"""Test that any punctuation chars are removed from wordchars"""
|
|
s = shlex.shlex('a_b__c', punctuation_chars='_')
|
|
self.assertNotIn('_', s.wordchars)
|
|
self.assertEqual(list(s), ['a', '_', 'b', '__', 'c'])
|
|
|
|
def testPunctuationWithWhitespaceSplit(self):
|
|
"""Test that with whitespace_split, behaviour is as expected"""
|
|
s = shlex.shlex('a && b || c', punctuation_chars='&')
|
|
# whitespace_split is False, so splitting will be based on
|
|
# punctuation_chars
|
|
self.assertEqual(list(s), ['a', '&&', 'b', '|', '|', 'c'])
|
|
s = shlex.shlex('a && b || c', punctuation_chars='&')
|
|
s.whitespace_split = True
|
|
# whitespace_split is True, so splitting will be based on
|
|
# white space
|
|
self.assertEqual(list(s), ['a', '&&', 'b', '||', 'c'])
|
|
|
|
def testPunctuationWithPosix(self):
|
|
"""Test that punctuation_chars and posix behave correctly together."""
|
|
# see Issue #29132
|
|
s = shlex.shlex('f >"abc"', posix=True, punctuation_chars=True)
|
|
self.assertEqual(list(s), ['f', '>', 'abc'])
|
|
s = shlex.shlex('f >\\"abc\\"', posix=True, punctuation_chars=True)
|
|
self.assertEqual(list(s), ['f', '>', '"abc"'])
|
|
|
|
def testEmptyStringHandling(self):
|
|
"""Test that parsing of empty strings is correctly handled."""
|
|
# see Issue #21999
|
|
expected = ['', ')', 'abc']
|
|
for punct in (False, True):
|
|
s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct)
|
|
slist = list(s)
|
|
self.assertEqual(slist, expected)
|
|
expected = ["''", ')', 'abc']
|
|
s = shlex.shlex("'')abc", punctuation_chars=True)
|
|
self.assertEqual(list(s), expected)
|
|
|
|
def testUnicodeHandling(self):
|
|
"""Test punctuation_chars and whitespace_split handle unicode."""
|
|
ss = "\u2119\u01b4\u2602\u210c\u00f8\u1f24"
|
|
# Should be parsed as one complete token (whitespace_split=True).
|
|
ref = ['\u2119\u01b4\u2602\u210c\u00f8\u1f24']
|
|
s = shlex.shlex(ss, punctuation_chars=True)
|
|
s.whitespace_split = True
|
|
self.assertEqual(list(s), ref)
|
|
# Without whitespace_split, uses wordchars and splits on all.
|
|
ref = ['\u2119', '\u01b4', '\u2602', '\u210c', '\u00f8', '\u1f24']
|
|
s = shlex.shlex(ss, punctuation_chars=True)
|
|
self.assertEqual(list(s), ref)
|
|
|
|
def testQuote(self):
|
|
safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./'
|
|
unicode_sample = '\xe9\xe0\xdf' # e + acute accent, a + grave, sharp s
|
|
unsafe = '"`$\\!' + unicode_sample
|
|
|
|
self.assertEqual(shlex.quote(''), "''")
|
|
self.assertEqual(shlex.quote(safeunquoted), safeunquoted)
|
|
self.assertEqual(shlex.quote('test file name'), "'test file name'")
|
|
for u in unsafe:
|
|
self.assertEqual(shlex.quote('test%sname' % u),
|
|
"'test%sname'" % u)
|
|
for u in unsafe:
|
|
self.assertEqual(shlex.quote("test%s'name'" % u),
|
|
"'test%s'\"'\"'name'\"'\"''" % u)
|
|
|
|
def testJoin(self):
|
|
for split_command, command in [
|
|
(['a ', 'b'], "'a ' b"),
|
|
(['a', ' b'], "a ' b'"),
|
|
(['a', ' ', 'b'], "a ' ' b"),
|
|
(['"a', 'b"'], '\'"a\' \'b"\''),
|
|
]:
|
|
with self.subTest(command=command):
|
|
joined = shlex.join(split_command)
|
|
self.assertEqual(joined, command)
|
|
|
|
def testJoinRoundtrip(self):
|
|
all_data = self.data + self.posix_data
|
|
for command, *split_command in all_data:
|
|
with self.subTest(command=command):
|
|
joined = shlex.join(split_command)
|
|
resplit = shlex.split(joined)
|
|
self.assertEqual(split_command, resplit)
|
|
|
|
def testPunctuationCharsReadOnly(self):
|
|
punctuation_chars = "/|$%^"
|
|
shlex_instance = shlex.shlex(punctuation_chars=punctuation_chars)
|
|
self.assertEqual(shlex_instance.punctuation_chars, punctuation_chars)
|
|
with self.assertRaises(AttributeError):
|
|
shlex_instance.punctuation_chars = False
|
|
|
|
|
|
# Allow this test to be used with old shlex.py
|
|
if not getattr(shlex, "split", None):
|
|
for methname in dir(ShlexTest):
|
|
if methname.startswith("test") and methname != "testCompat":
|
|
delattr(ShlexTest, methname)
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|