mirror of
https://github.com/python/cpython.git
synced 2025-01-22 08:25:42 +08:00
Fix Issue754016 - urlparse goes wrong with IP:port without scheme
This commit is contained in:
parent
4aa0d4d2d0
commit
84c7d9f87b
@ -48,6 +48,23 @@ The :mod:`urllib.parse` module defines the following functions:
|
||||
>>> o.geturl()
|
||||
'http://www.cwi.nl:80/%7Eguido/Python.html'
|
||||
|
||||
If the scheme value is not specified, urlparse following the syntax
|
||||
specifications from RFC 1808, expects the netloc value to start with '//',
|
||||
Otherwise, it is not possible to distinguish between net_loc and path
|
||||
component and would classify the indistinguishable component as path as in
|
||||
a relative url.
|
||||
|
||||
>>> from urlparse import urlparse
|
||||
>>> urlparse('//www.cwi.nl:80/%7Eguido/Python.html')
|
||||
ParseResult(scheme='', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html',
|
||||
params='', query='', fragment='')
|
||||
>>> urlparse('www.cwi.nl:80/%7Eguido/Python.html')
|
||||
ParseResult(scheme='', netloc='', path='www.cwi.nl:80/%7Eguido/Python.html',
|
||||
params='', query='', fragment='')
|
||||
>>> urlparse('help/Python.html')
|
||||
ParseResult(scheme='', netloc='', path='help/Python.html', params='',
|
||||
query='', fragment='')
|
||||
|
||||
If the *scheme* argument is specified, it gives the default addressing
|
||||
scheme, to be used only if the URL does not specify one. The default value for
|
||||
this argument is the empty string.
|
||||
|
@ -461,6 +461,27 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
|
||||
('http', 'example.com', '', '', 'blahblah=/foo', ''))
|
||||
|
||||
def test_withoutscheme(self):
|
||||
# Test urlparse without scheme
|
||||
# Issue 754016: urlparse goes wrong with IP:port without scheme
|
||||
# RFC 1808 specifies that netloc should start with //, urlparse expects
|
||||
# the same, otherwise it classifies the portion of url as path.
|
||||
self.assertEqual(urllib.parse.urlparse("path"),
|
||||
('','','path','','',''))
|
||||
self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
|
||||
('','www.python.org:80','','','',''))
|
||||
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
|
||||
('http','www.python.org:80','','','',''))
|
||||
|
||||
def test_portseparator(self):
|
||||
# Issue 754016 makes changes for port separator ':' from scheme separator
|
||||
self.assertEqual(urllib.parse.urlparse("path:80"),
|
||||
('','','path:80','','',''))
|
||||
self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
|
||||
self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
|
||||
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
|
||||
('http','www.python.org:80','','','',''))
|
||||
|
||||
def test_usingsys(self):
|
||||
# Issue 3314: sys module is used in the error
|
||||
self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
|
||||
|
@ -192,11 +192,12 @@ def urlsplit(url, scheme='', allow_fragments=True):
|
||||
v = SplitResult(scheme, netloc, url, query, fragment)
|
||||
_parse_cache[key] = v
|
||||
return v
|
||||
for c in url[:i]:
|
||||
if c not in scheme_chars:
|
||||
break
|
||||
else:
|
||||
scheme, url = url[:i].lower(), url[i+1:]
|
||||
if url.endswith(':') or not url[i+1].isdigit():
|
||||
for c in url[:i]:
|
||||
if c not in scheme_chars:
|
||||
break
|
||||
else:
|
||||
scheme, url = url[:i].lower(), url[i+1:]
|
||||
if url[:2] == '//':
|
||||
netloc, url = _splitnetloc(url, 2)
|
||||
if (('[' in netloc and ']' not in netloc) or
|
||||
|
Loading…
Reference in New Issue
Block a user