mirror of
https://github.com/python/cpython.git
synced 2024-11-26 19:34:19 +08:00
GH-84850: Remove urllib.request.URLopener
and FancyURLopener
(#125739)
This commit is contained in:
parent
a99dd23c1f
commit
4d771977b1
@ -145,10 +145,6 @@ although there is currently no date scheduled for their removal.
|
||||
* ``splitvalue()``
|
||||
* ``to_bytes()``
|
||||
|
||||
* :mod:`urllib.request`: :class:`~urllib.request.URLopener` and
|
||||
:class:`~urllib.request.FancyURLopener` style of invoking requests is
|
||||
deprecated. Use newer :func:`~urllib.request.urlopen` functions and methods.
|
||||
|
||||
* :mod:`wsgiref`: ``SimpleHandler.stdout.write()`` should not do partial
|
||||
writes.
|
||||
|
||||
|
@ -67,8 +67,7 @@ The :mod:`urllib.request` module defines the following functions:
|
||||
the response headers as it is specified in the documentation for
|
||||
:class:`~http.client.HTTPResponse`.
|
||||
|
||||
For FTP, file, and data URLs and requests explicitly handled by legacy
|
||||
:class:`URLopener` and :class:`FancyURLopener` classes, this function
|
||||
For FTP, file, and data URLs, this function
|
||||
returns a :class:`urllib.response.addinfourl` object.
|
||||
|
||||
Raises :exc:`~urllib.error.URLError` on protocol errors.
|
||||
@ -1339,7 +1338,7 @@ environment settings::
|
||||
|
||||
>>> import urllib.request
|
||||
>>> proxies = {'http': 'http://proxy.example.com:8080/'}
|
||||
>>> opener = urllib.request.FancyURLopener(proxies)
|
||||
>>> opener = urllib.request.build_opener(urllib.request.ProxyHandler(proxies))
|
||||
>>> with opener.open("http://www.python.org") as f:
|
||||
... f.read().decode('utf-8')
|
||||
...
|
||||
@ -1347,7 +1346,7 @@ environment settings::
|
||||
The following example uses no proxies at all, overriding environment settings::
|
||||
|
||||
>>> import urllib.request
|
||||
>>> opener = urllib.request.FancyURLopener({})
|
||||
>>> opener = urllib.request.build_opener(urllib.request.ProxyHandler({}}))
|
||||
>>> with opener.open("http://www.python.org/") as f:
|
||||
... f.read().decode('utf-8')
|
||||
...
|
||||
@ -1412,121 +1411,6 @@ some point in the future.
|
||||
Cleans up temporary files that may have been left behind by previous
|
||||
calls to :func:`urlretrieve`.
|
||||
|
||||
.. class:: URLopener(proxies=None, **x509)
|
||||
|
||||
.. deprecated:: 3.3
|
||||
|
||||
Base class for opening and reading URLs. Unless you need to support opening
|
||||
objects using schemes other than :file:`http:`, :file:`ftp:`, or :file:`file:`,
|
||||
you probably want to use :class:`FancyURLopener`.
|
||||
|
||||
By default, the :class:`URLopener` class sends a :mailheader:`User-Agent` header
|
||||
of ``urllib/VVV``, where *VVV* is the :mod:`urllib` version number.
|
||||
Applications can define their own :mailheader:`User-Agent` header by subclassing
|
||||
:class:`URLopener` or :class:`FancyURLopener` and setting the class attribute
|
||||
:attr:`version` to an appropriate string value in the subclass definition.
|
||||
|
||||
The optional *proxies* parameter should be a dictionary mapping scheme names to
|
||||
proxy URLs, where an empty dictionary turns proxies off completely. Its default
|
||||
value is ``None``, in which case environmental proxy settings will be used if
|
||||
present, as discussed in the definition of :func:`urlopen`, above.
|
||||
|
||||
Additional keyword parameters, collected in *x509*, may be used for
|
||||
authentication of the client when using the :file:`https:` scheme. The keywords
|
||||
*key_file* and *cert_file* are supported to provide an SSL key and certificate;
|
||||
both are needed to support client authentication.
|
||||
|
||||
:class:`URLopener` objects will raise an :exc:`OSError` exception if the server
|
||||
returns an error code.
|
||||
|
||||
.. method:: open(fullurl, data=None)
|
||||
|
||||
Open *fullurl* using the appropriate protocol. This method sets up cache and
|
||||
proxy information, then calls the appropriate open method with its input
|
||||
arguments. If the scheme is not recognized, :meth:`open_unknown` is called.
|
||||
The *data* argument has the same meaning as the *data* argument of
|
||||
:func:`urlopen`.
|
||||
|
||||
This method always quotes *fullurl* using :func:`~urllib.parse.quote`.
|
||||
|
||||
.. method:: open_unknown(fullurl, data=None)
|
||||
|
||||
Overridable interface to open unknown URL types.
|
||||
|
||||
|
||||
.. method:: retrieve(url, filename=None, reporthook=None, data=None)
|
||||
|
||||
Retrieves the contents of *url* and places it in *filename*. The return value
|
||||
is a tuple consisting of a local filename and either an
|
||||
:class:`email.message.Message` object containing the response headers (for remote
|
||||
URLs) or ``None`` (for local URLs). The caller must then open and read the
|
||||
contents of *filename*. If *filename* is not given and the URL refers to a
|
||||
local file, the input filename is returned. If the URL is non-local and
|
||||
*filename* is not given, the filename is the output of :func:`tempfile.mktemp`
|
||||
with a suffix that matches the suffix of the last path component of the input
|
||||
URL. If *reporthook* is given, it must be a function accepting three numeric
|
||||
parameters: A chunk number, the maximum size chunks are read in and the total size of the download
|
||||
(-1 if unknown). It will be called once at the start and after each chunk of data is read from the
|
||||
network. *reporthook* is ignored for local URLs.
|
||||
|
||||
If the *url* uses the :file:`http:` scheme identifier, the optional *data*
|
||||
argument may be given to specify a ``POST`` request (normally the request type
|
||||
is ``GET``). The *data* argument must in standard
|
||||
:mimetype:`application/x-www-form-urlencoded` format; see the
|
||||
:func:`urllib.parse.urlencode` function.
|
||||
|
||||
|
||||
.. attribute:: version
|
||||
|
||||
Variable that specifies the user agent of the opener object. To get
|
||||
:mod:`urllib` to tell servers that it is a particular user agent, set this in a
|
||||
subclass as a class variable or in the constructor before calling the base
|
||||
constructor.
|
||||
|
||||
|
||||
.. class:: FancyURLopener(...)
|
||||
|
||||
.. deprecated:: 3.3
|
||||
|
||||
:class:`FancyURLopener` subclasses :class:`URLopener` providing default handling
|
||||
for the following HTTP response codes: 301, 302, 303, 307 and 401. For the 30x
|
||||
response codes listed above, the :mailheader:`Location` header is used to fetch
|
||||
the actual URL. For 401 response codes (authentication required), basic HTTP
|
||||
authentication is performed. For the 30x response codes, recursion is bounded
|
||||
by the value of the *maxtries* attribute, which defaults to 10.
|
||||
|
||||
For all other response codes, the method :meth:`~BaseHandler.http_error_default` is called
|
||||
which you can override in subclasses to handle the error appropriately.
|
||||
|
||||
.. note::
|
||||
|
||||
According to the letter of :rfc:`2616`, 301 and 302 responses to POST requests
|
||||
must not be automatically redirected without confirmation by the user. In
|
||||
reality, browsers do allow automatic redirection of these responses, changing
|
||||
the POST to a GET, and :mod:`urllib` reproduces this behaviour.
|
||||
|
||||
The parameters to the constructor are the same as those for :class:`URLopener`.
|
||||
|
||||
.. note::
|
||||
|
||||
When performing basic authentication, a :class:`FancyURLopener` instance calls
|
||||
its :meth:`prompt_user_passwd` method. The default implementation asks the
|
||||
users for the required information on the controlling terminal. A subclass may
|
||||
override this method to support more appropriate behavior if needed.
|
||||
|
||||
The :class:`FancyURLopener` class offers one additional method that should be
|
||||
overloaded to provide the appropriate behavior:
|
||||
|
||||
.. method:: prompt_user_passwd(host, realm)
|
||||
|
||||
Return information needed to authenticate the user at the given host in the
|
||||
specified security realm. The return value should be a tuple, ``(user,
|
||||
password)``, which can be used for basic authentication.
|
||||
|
||||
The implementation prompts for this information on the terminal; an application
|
||||
should override this method to use an appropriate interaction model in the local
|
||||
environment.
|
||||
|
||||
|
||||
:mod:`urllib.request` Restrictions
|
||||
----------------------------------
|
||||
@ -1578,8 +1462,7 @@ some point in the future.
|
||||
you try to fetch a file whose read permissions make it inaccessible; the FTP
|
||||
code will try to read it, fail with a 550 error, and then perform a directory
|
||||
listing for the unreadable file. If fine-grained control is needed, consider
|
||||
using the :mod:`ftplib` module, subclassing :class:`FancyURLopener`, or changing
|
||||
*_urlopener* to meet your needs.
|
||||
using the :mod:`ftplib` module.
|
||||
|
||||
|
||||
|
||||
|
@ -769,6 +769,10 @@ urllib
|
||||
* Remove deprecated :class:`!Quoter` class from :mod:`urllib.parse`.
|
||||
It had previously raised a :exc:`DeprecationWarning` since Python 3.11.
|
||||
(Contributed by Nikita Sobolev in :gh:`118827`.)
|
||||
* Remove deprecated :class:`!URLopener` and :class:`!FancyURLopener` classes
|
||||
from :mod:`urllib.request`. They had previously raised a
|
||||
:exc:`DeprecationWarning` since Python 3.3.
|
||||
(Contributed by Barney Gale in :gh:`84850`.)
|
||||
|
||||
Others
|
||||
------
|
||||
|
@ -7,11 +7,9 @@ import http.client
|
||||
import email.message
|
||||
import io
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
from test import support
|
||||
from test.support import os_helper
|
||||
from test.support import socket_helper
|
||||
from test.support import warnings_helper
|
||||
import os
|
||||
try:
|
||||
import ssl
|
||||
@ -20,7 +18,6 @@ except ImportError:
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
from base64 import b64encode
|
||||
import collections
|
||||
|
||||
|
||||
@ -35,32 +32,6 @@ def hexescape(char):
|
||||
hex_repr = "0%s" % hex_repr
|
||||
return "%" + hex_repr
|
||||
|
||||
# Shortcut for testing FancyURLopener
|
||||
_urlopener = None
|
||||
|
||||
|
||||
def urlopen(url, data=None, proxies=None):
|
||||
"""urlopen(url [, data]) -> open file-like object"""
|
||||
global _urlopener
|
||||
if proxies is not None:
|
||||
opener = urllib.request.FancyURLopener(proxies=proxies)
|
||||
elif not _urlopener:
|
||||
opener = FancyURLopener()
|
||||
_urlopener = opener
|
||||
else:
|
||||
opener = _urlopener
|
||||
if data is None:
|
||||
return opener.open(url)
|
||||
else:
|
||||
return opener.open(url, data)
|
||||
|
||||
|
||||
def FancyURLopener():
|
||||
with warnings_helper.check_warnings(
|
||||
('FancyURLopener style of invoking requests is deprecated.',
|
||||
DeprecationWarning)):
|
||||
return urllib.request.FancyURLopener()
|
||||
|
||||
|
||||
def fakehttp(fakedata, mock_close=False):
|
||||
class FakeSocket(io.BytesIO):
|
||||
@ -119,26 +90,6 @@ class FakeHTTPMixin(object):
|
||||
http.client.HTTPConnection = self._connection_class
|
||||
|
||||
|
||||
class FakeFTPMixin(object):
|
||||
def fakeftp(self):
|
||||
class FakeFtpWrapper(object):
|
||||
def __init__(self, user, passwd, host, port, dirs, timeout=None,
|
||||
persistent=True):
|
||||
pass
|
||||
|
||||
def retrfile(self, file, type):
|
||||
return io.BytesIO(), 0
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
self._ftpwrapper_class = urllib.request.ftpwrapper
|
||||
urllib.request.ftpwrapper = FakeFtpWrapper
|
||||
|
||||
def unfakeftp(self):
|
||||
urllib.request.ftpwrapper = self._ftpwrapper_class
|
||||
|
||||
|
||||
class urlopen_FileTests(unittest.TestCase):
|
||||
"""Test urlopen() opening a temporary file.
|
||||
|
||||
@ -158,7 +109,7 @@ class urlopen_FileTests(unittest.TestCase):
|
||||
f.close()
|
||||
self.pathname = os_helper.TESTFN
|
||||
self.quoted_pathname = urllib.parse.quote(self.pathname)
|
||||
self.returned_obj = urlopen("file:%s" % self.quoted_pathname)
|
||||
self.returned_obj = urllib.request.urlopen("file:%s" % self.quoted_pathname)
|
||||
|
||||
def tearDown(self):
|
||||
"""Shut down the open object"""
|
||||
@ -205,7 +156,7 @@ class urlopen_FileTests(unittest.TestCase):
|
||||
self.assertIsInstance(self.returned_obj.headers, email.message.Message)
|
||||
|
||||
def test_url(self):
|
||||
self.assertEqual(self.returned_obj.url, self.quoted_pathname)
|
||||
self.assertEqual(self.returned_obj.url, "file://" + self.quoted_pathname)
|
||||
|
||||
def test_status(self):
|
||||
self.assertIsNone(self.returned_obj.status)
|
||||
@ -214,7 +165,7 @@ class urlopen_FileTests(unittest.TestCase):
|
||||
self.assertIsInstance(self.returned_obj.info(), email.message.Message)
|
||||
|
||||
def test_geturl(self):
|
||||
self.assertEqual(self.returned_obj.geturl(), self.quoted_pathname)
|
||||
self.assertEqual(self.returned_obj.geturl(), "file://" + self.quoted_pathname)
|
||||
|
||||
def test_getcode(self):
|
||||
self.assertIsNone(self.returned_obj.getcode())
|
||||
@ -339,13 +290,13 @@ class ProxyTests_withOrderedEnv(unittest.TestCase):
|
||||
self.assertEqual('http://somewhere:3128', proxies['http'])
|
||||
|
||||
|
||||
class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
|
||||
class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin):
|
||||
"""Test urlopen() opening a fake http connection."""
|
||||
|
||||
def check_read(self, ver):
|
||||
self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
|
||||
try:
|
||||
fp = urlopen("http://python.org/")
|
||||
fp = urllib.request.urlopen("http://python.org/")
|
||||
self.assertEqual(fp.readline(), b"Hello!")
|
||||
self.assertEqual(fp.readline(), b"")
|
||||
self.assertEqual(fp.geturl(), 'http://python.org/')
|
||||
@ -366,8 +317,8 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
|
||||
def test_willclose(self):
|
||||
self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
|
||||
try:
|
||||
resp = urlopen("http://www.python.org")
|
||||
self.assertTrue(resp.fp.will_close)
|
||||
resp = urllib.request.urlopen("http://www.python.org")
|
||||
self.assertTrue(resp.will_close)
|
||||
finally:
|
||||
self.unfakehttp()
|
||||
|
||||
@ -392,9 +343,6 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
|
||||
with self.assertRaisesRegex(
|
||||
InvalidURL, f"contain control.*{escaped_char_repr}"):
|
||||
urllib.request.urlopen(f"https:{schemeless_url}")
|
||||
# This code path quotes the URL so there is no injection.
|
||||
resp = urlopen(f"http:{schemeless_url}")
|
||||
self.assertNotIn(char, resp.geturl())
|
||||
finally:
|
||||
self.unfakehttp()
|
||||
|
||||
@ -416,11 +364,6 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
|
||||
urllib.request.urlopen(f"http:{schemeless_url}")
|
||||
with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
|
||||
urllib.request.urlopen(f"https:{schemeless_url}")
|
||||
# This code path quotes the URL so there is no injection.
|
||||
resp = urlopen(f"http:{schemeless_url}")
|
||||
self.assertNotIn(' ', resp.geturl())
|
||||
self.assertNotIn('\r', resp.geturl())
|
||||
self.assertNotIn('\n', resp.geturl())
|
||||
finally:
|
||||
self.unfakehttp()
|
||||
|
||||
@ -435,9 +378,9 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
|
||||
InvalidURL = http.client.InvalidURL
|
||||
with self.assertRaisesRegex(
|
||||
InvalidURL, f"contain control.*{escaped_char_repr}"):
|
||||
urlopen(f"http:{schemeless_url}")
|
||||
urllib.request.urlopen(f"http:{schemeless_url}")
|
||||
with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"):
|
||||
urlopen(f"https:{schemeless_url}")
|
||||
urllib.request.urlopen(f"https:{schemeless_url}")
|
||||
finally:
|
||||
self.unfakehttp()
|
||||
|
||||
@ -450,9 +393,9 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
|
||||
InvalidURL = http.client.InvalidURL
|
||||
with self.assertRaisesRegex(
|
||||
InvalidURL, r"contain control.*\\r"):
|
||||
urlopen(f"http:{schemeless_url}")
|
||||
urllib.request.urlopen(f"http:{schemeless_url}")
|
||||
with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
|
||||
urlopen(f"https:{schemeless_url}")
|
||||
urllib.request.urlopen(f"https:{schemeless_url}")
|
||||
finally:
|
||||
self.unfakehttp()
|
||||
|
||||
@ -476,7 +419,7 @@ Connection: close
|
||||
Content-Type: text/html; charset=iso-8859-1
|
||||
''', mock_close=True)
|
||||
try:
|
||||
self.assertRaises(OSError, urlopen, "http://python.org/")
|
||||
self.assertRaises(OSError, urllib.request.urlopen, "http://python.org/")
|
||||
finally:
|
||||
self.unfakehttp()
|
||||
|
||||
@ -492,20 +435,20 @@ Content-Type: text/html; charset=iso-8859-1
|
||||
try:
|
||||
msg = "Redirection to url 'file:"
|
||||
with self.assertRaisesRegex(urllib.error.HTTPError, msg):
|
||||
urlopen("http://python.org/")
|
||||
urllib.request.urlopen("http://python.org/")
|
||||
finally:
|
||||
self.unfakehttp()
|
||||
|
||||
def test_redirect_limit_independent(self):
|
||||
# Ticket #12923: make sure independent requests each use their
|
||||
# own retry limit.
|
||||
for i in range(FancyURLopener().maxtries):
|
||||
for i in range(urllib.request.HTTPRedirectHandler.max_redirections):
|
||||
self.fakehttp(b'''HTTP/1.1 302 Found
|
||||
Location: file://guidocomputer.athome.com:/python/license
|
||||
Connection: close
|
||||
''', mock_close=True)
|
||||
try:
|
||||
self.assertRaises(urllib.error.HTTPError, urlopen,
|
||||
self.assertRaises(urllib.error.HTTPError, urllib.request.urlopen,
|
||||
"http://something")
|
||||
finally:
|
||||
self.unfakehttp()
|
||||
@ -515,14 +458,14 @@ Connection: close
|
||||
# data. (#1680230)
|
||||
self.fakehttp(b'')
|
||||
try:
|
||||
self.assertRaises(OSError, urlopen, "http://something")
|
||||
self.assertRaises(OSError, urllib.request.urlopen, "http://something")
|
||||
finally:
|
||||
self.unfakehttp()
|
||||
|
||||
def test_missing_localfile(self):
|
||||
# Test for #10836
|
||||
with self.assertRaises(urllib.error.URLError) as e:
|
||||
urlopen('file://localhost/a/file/which/doesnot/exists.py')
|
||||
urllib.request.urlopen('file://localhost/a/file/which/doesnot/exists.py')
|
||||
self.assertTrue(e.exception.filename)
|
||||
self.assertTrue(e.exception.reason)
|
||||
|
||||
@ -531,71 +474,28 @@ Connection: close
|
||||
tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
|
||||
try:
|
||||
self.assertTrue(os.path.exists(tmp_file))
|
||||
with urlopen(tmp_fileurl) as fobj:
|
||||
with urllib.request.urlopen(tmp_fileurl) as fobj:
|
||||
self.assertTrue(fobj)
|
||||
finally:
|
||||
os.close(fd)
|
||||
os.unlink(tmp_file)
|
||||
self.assertFalse(os.path.exists(tmp_file))
|
||||
with self.assertRaises(urllib.error.URLError):
|
||||
urlopen(tmp_fileurl)
|
||||
urllib.request.urlopen(tmp_fileurl)
|
||||
|
||||
def test_ftp_nohost(self):
|
||||
test_ftp_url = 'ftp:///path'
|
||||
with self.assertRaises(urllib.error.URLError) as e:
|
||||
urlopen(test_ftp_url)
|
||||
urllib.request.urlopen(test_ftp_url)
|
||||
self.assertFalse(e.exception.filename)
|
||||
self.assertTrue(e.exception.reason)
|
||||
|
||||
def test_ftp_nonexisting(self):
|
||||
with self.assertRaises(urllib.error.URLError) as e:
|
||||
urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
|
||||
urllib.request.urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
|
||||
self.assertFalse(e.exception.filename)
|
||||
self.assertTrue(e.exception.reason)
|
||||
|
||||
@patch.object(urllib.request, 'MAXFTPCACHE', 0)
|
||||
def test_ftp_cache_pruning(self):
|
||||
self.fakeftp()
|
||||
try:
|
||||
urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
|
||||
urlopen('ftp://localhost')
|
||||
finally:
|
||||
self.unfakeftp()
|
||||
|
||||
def test_userpass_inurl(self):
|
||||
self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
|
||||
try:
|
||||
fp = urlopen("http://user:pass@python.org/")
|
||||
self.assertEqual(fp.readline(), b"Hello!")
|
||||
self.assertEqual(fp.readline(), b"")
|
||||
self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
|
||||
self.assertEqual(fp.getcode(), 200)
|
||||
finally:
|
||||
self.unfakehttp()
|
||||
|
||||
def test_userpass_inurl_w_spaces(self):
|
||||
self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
|
||||
try:
|
||||
userpass = "a b:c d"
|
||||
url = "http://{}@python.org/".format(userpass)
|
||||
fakehttp_wrapper = http.client.HTTPConnection
|
||||
authorization = ("Authorization: Basic %s\r\n" %
|
||||
b64encode(userpass.encode("ASCII")).decode("ASCII"))
|
||||
fp = urlopen(url)
|
||||
# The authorization header must be in place
|
||||
self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
|
||||
self.assertEqual(fp.readline(), b"Hello!")
|
||||
self.assertEqual(fp.readline(), b"")
|
||||
# the spaces are quoted in URL so no match
|
||||
self.assertNotEqual(fp.geturl(), url)
|
||||
self.assertEqual(fp.getcode(), 200)
|
||||
finally:
|
||||
self.unfakehttp()
|
||||
|
||||
def test_URLopener_deprecation(self):
|
||||
with warnings_helper.check_warnings(('',DeprecationWarning)):
|
||||
urllib.request.URLopener()
|
||||
|
||||
|
||||
class urlopen_DataTests(unittest.TestCase):
|
||||
"""Test urlopen() opening a data URL."""
|
||||
@ -1620,56 +1520,6 @@ class Utility_Tests(unittest.TestCase):
|
||||
self.assertIsInstance(urllib.request.thishost(), tuple)
|
||||
|
||||
|
||||
class URLopener_Tests(FakeHTTPMixin, unittest.TestCase):
|
||||
"""Testcase to test the open method of URLopener class."""
|
||||
|
||||
def test_quoted_open(self):
|
||||
class DummyURLopener(urllib.request.URLopener):
|
||||
def open_spam(self, url):
|
||||
return url
|
||||
with warnings_helper.check_warnings(
|
||||
('DummyURLopener style of invoking requests is deprecated.',
|
||||
DeprecationWarning)):
|
||||
self.assertEqual(DummyURLopener().open(
|
||||
'spam://example/ /'),'//example/%20/')
|
||||
|
||||
# test the safe characters are not quoted by urlopen
|
||||
self.assertEqual(DummyURLopener().open(
|
||||
"spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
|
||||
"//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
|
||||
|
||||
@warnings_helper.ignore_warnings(category=DeprecationWarning)
|
||||
def test_urlopener_retrieve_file(self):
|
||||
with os_helper.temp_dir() as tmpdir:
|
||||
fd, tmpfile = tempfile.mkstemp(dir=tmpdir)
|
||||
os.close(fd)
|
||||
fileurl = "file:" + urllib.request.pathname2url(tmpfile)
|
||||
filename, _ = urllib.request.URLopener().retrieve(fileurl)
|
||||
# Some buildbots have TEMP folder that uses a lowercase drive letter.
|
||||
self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile))
|
||||
|
||||
@warnings_helper.ignore_warnings(category=DeprecationWarning)
|
||||
def test_urlopener_retrieve_remote(self):
|
||||
url = "http://www.python.org/file.txt"
|
||||
self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
|
||||
self.addCleanup(self.unfakehttp)
|
||||
filename, _ = urllib.request.URLopener().retrieve(url)
|
||||
self.assertEqual(os.path.splitext(filename)[1], ".txt")
|
||||
|
||||
@warnings_helper.ignore_warnings(category=DeprecationWarning)
|
||||
def test_local_file_open(self):
|
||||
# bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
|
||||
class DummyURLopener(urllib.request.URLopener):
|
||||
def open_local_file(self, url):
|
||||
return url
|
||||
for url in ('local_file://example', 'local-file://example'):
|
||||
self.assertRaises(OSError, urllib.request.urlopen, url)
|
||||
self.assertRaises(OSError, urllib.request.URLopener().open, url)
|
||||
self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
|
||||
self.assertRaises(OSError, DummyURLopener().open, url)
|
||||
self.assertRaises(OSError, DummyURLopener().retrieve, url)
|
||||
|
||||
|
||||
class RequestTests(unittest.TestCase):
|
||||
"""Unit tests for urllib.request.Request."""
|
||||
|
||||
|
@ -5,6 +5,7 @@ from test.support import socket_helper
|
||||
|
||||
import contextlib
|
||||
import socket
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import os
|
||||
@ -101,13 +102,10 @@ class urlopenNetworkTests(unittest.TestCase):
|
||||
# test getcode() with the fancy opener to get 404 error codes
|
||||
URL = self.url + "XXXinvalidXXX"
|
||||
with socket_helper.transient_internet(URL):
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
open_url = urllib.request.FancyURLopener().open(URL)
|
||||
try:
|
||||
code = open_url.getcode()
|
||||
finally:
|
||||
open_url.close()
|
||||
self.assertEqual(code, 404)
|
||||
with self.assertRaises(urllib.error.URLError) as e:
|
||||
with urllib.request.urlopen(URL):
|
||||
pass
|
||||
self.assertEqual(e.exception.code, 404)
|
||||
|
||||
@support.requires_resource('walltime')
|
||||
def test_bad_address(self):
|
||||
|
@ -83,6 +83,7 @@ f = urllib.request.urlopen('https://www.python.org/')
|
||||
|
||||
import base64
|
||||
import bisect
|
||||
import contextlib
|
||||
import email
|
||||
import hashlib
|
||||
import http.client
|
||||
@ -94,15 +95,13 @@ import string
|
||||
import sys
|
||||
import time
|
||||
import tempfile
|
||||
import contextlib
|
||||
import warnings
|
||||
|
||||
|
||||
from urllib.error import URLError, HTTPError, ContentTooShortError
|
||||
from urllib.parse import (
|
||||
urlparse, urlsplit, urljoin, unwrap, quote, unquote,
|
||||
_splittype, _splithost, _splitport, _splituser, _splitpasswd,
|
||||
_splitattr, _splitquery, _splitvalue, _splittag, _to_bytes,
|
||||
_splitattr, _splitvalue, _splittag,
|
||||
unquote_to_bytes, urlunparse)
|
||||
from urllib.response import addinfourl, addclosehook
|
||||
|
||||
@ -128,7 +127,7 @@ __all__ = [
|
||||
'urlopen', 'install_opener', 'build_opener',
|
||||
'pathname2url', 'url2pathname', 'getproxies',
|
||||
# Legacy interface
|
||||
'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
|
||||
'urlretrieve', 'urlcleanup',
|
||||
]
|
||||
|
||||
# used in User-Agent header sent
|
||||
@ -165,8 +164,7 @@ def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
|
||||
the reason phrase returned by the server --- instead of the response
|
||||
headers as it is specified in the documentation for HTTPResponse.
|
||||
|
||||
For FTP, file, and data URLs and requests explicitly handled by legacy
|
||||
URLopener and FancyURLopener classes, this function returns a
|
||||
For FTP, file, and data URLs, this function returns a
|
||||
urllib.response.addinfourl object.
|
||||
|
||||
Note that None may be returned if no handler handles the request (though
|
||||
@ -940,6 +938,7 @@ class AbstractBasicAuthHandler:
|
||||
for mo in AbstractBasicAuthHandler.rx.finditer(header):
|
||||
scheme, quote, realm = mo.groups()
|
||||
if quote not in ['"', "'"]:
|
||||
import warnings
|
||||
warnings.warn("Basic Auth Realm was unquoted",
|
||||
UserWarning, 3)
|
||||
|
||||
@ -1495,7 +1494,7 @@ class FileHandler(BaseHandler):
|
||||
origurl = 'file://' + filename
|
||||
return addinfourl(open(localfile, 'rb'), headers, origurl)
|
||||
except OSError as exp:
|
||||
raise URLError(exp)
|
||||
raise URLError(exp, exp.filename)
|
||||
raise URLError('file not on local host')
|
||||
|
||||
def _safe_gethostbyname(host):
|
||||
@ -1647,8 +1646,6 @@ class DataHandler(BaseHandler):
|
||||
|
||||
# Code move from the old urllib module
|
||||
|
||||
MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
|
||||
|
||||
# Helper for non-unix systems
|
||||
if os.name == 'nt':
|
||||
from nturl2path import url2pathname, pathname2url
|
||||
@ -1668,678 +1665,6 @@ else:
|
||||
return quote(pathname)
|
||||
|
||||
|
||||
ftpcache = {}
|
||||
|
||||
|
||||
class URLopener:
|
||||
"""Class to open URLs.
|
||||
This is a class rather than just a subroutine because we may need
|
||||
more than one set of global protocol-specific options.
|
||||
Note -- this is a base class for those who don't want the
|
||||
automatic handling of errors type 302 (relocated) and 401
|
||||
(authorization needed)."""
|
||||
|
||||
__tempfiles = None
|
||||
|
||||
version = "Python-urllib/%s" % __version__
|
||||
|
||||
# Constructor
|
||||
def __init__(self, proxies=None, **x509):
|
||||
msg = "%(class)s style of invoking requests is deprecated. " \
|
||||
"Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
|
||||
warnings.warn(msg, DeprecationWarning, stacklevel=3)
|
||||
if proxies is None:
|
||||
proxies = getproxies()
|
||||
assert hasattr(proxies, 'keys'), "proxies must be a mapping"
|
||||
self.proxies = proxies
|
||||
self.key_file = x509.get('key_file')
|
||||
self.cert_file = x509.get('cert_file')
|
||||
self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
|
||||
self.__tempfiles = []
|
||||
self.__unlink = os.unlink # See cleanup()
|
||||
self.tempcache = None
|
||||
# Undocumented feature: if you assign {} to tempcache,
|
||||
# it is used to cache files retrieved with
|
||||
# self.retrieve(). This is not enabled by default
|
||||
# since it does not work for changing documents (and I
|
||||
# haven't got the logic to check expiration headers
|
||||
# yet).
|
||||
self.ftpcache = ftpcache
|
||||
# Undocumented feature: you can use a different
|
||||
# ftp cache by assigning to the .ftpcache member;
|
||||
# in case you want logically independent URL openers
|
||||
# XXX This is not threadsafe. Bah.
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
|
||||
def close(self):
|
||||
self.cleanup()
|
||||
|
||||
def cleanup(self):
|
||||
# This code sometimes runs when the rest of this module
|
||||
# has already been deleted, so it can't use any globals
|
||||
# or import anything.
|
||||
if self.__tempfiles:
|
||||
for file in self.__tempfiles:
|
||||
try:
|
||||
self.__unlink(file)
|
||||
except OSError:
|
||||
pass
|
||||
del self.__tempfiles[:]
|
||||
if self.tempcache:
|
||||
self.tempcache.clear()
|
||||
|
||||
def addheader(self, *args):
|
||||
"""Add a header to be used by the HTTP interface only
|
||||
e.g. u.addheader('Accept', 'sound/basic')"""
|
||||
self.addheaders.append(args)
|
||||
|
||||
# External interface
|
||||
def open(self, fullurl, data=None):
|
||||
"""Use URLopener().open(file) instead of open(file, 'r')."""
|
||||
fullurl = unwrap(_to_bytes(fullurl))
|
||||
fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
|
||||
if self.tempcache and fullurl in self.tempcache:
|
||||
filename, headers = self.tempcache[fullurl]
|
||||
fp = open(filename, 'rb')
|
||||
return addinfourl(fp, headers, fullurl)
|
||||
urltype, url = _splittype(fullurl)
|
||||
if not urltype:
|
||||
urltype = 'file'
|
||||
if urltype in self.proxies:
|
||||
proxy = self.proxies[urltype]
|
||||
urltype, proxyhost = _splittype(proxy)
|
||||
host, selector = _splithost(proxyhost)
|
||||
url = (host, fullurl) # Signal special case to open_*()
|
||||
else:
|
||||
proxy = None
|
||||
name = 'open_' + urltype
|
||||
self.type = urltype
|
||||
name = name.replace('-', '_')
|
||||
if not hasattr(self, name) or name == 'open_local_file':
|
||||
if proxy:
|
||||
return self.open_unknown_proxy(proxy, fullurl, data)
|
||||
else:
|
||||
return self.open_unknown(fullurl, data)
|
||||
try:
|
||||
if data is None:
|
||||
return getattr(self, name)(url)
|
||||
else:
|
||||
return getattr(self, name)(url, data)
|
||||
except (HTTPError, URLError):
|
||||
raise
|
||||
except OSError as msg:
|
||||
raise OSError('socket error', msg) from msg
|
||||
|
||||
def open_unknown(self, fullurl, data=None):
|
||||
"""Overridable interface to open unknown URL type."""
|
||||
type, url = _splittype(fullurl)
|
||||
raise OSError('url error', 'unknown url type', type)
|
||||
|
||||
def open_unknown_proxy(self, proxy, fullurl, data=None):
|
||||
"""Overridable interface to open unknown URL type."""
|
||||
type, url = _splittype(fullurl)
|
||||
raise OSError('url error', 'invalid proxy for %s' % type, proxy)
|
||||
|
||||
# External interface
|
||||
def retrieve(self, url, filename=None, reporthook=None, data=None):
|
||||
"""retrieve(url) returns (filename, headers) for a local object
|
||||
or (tempfilename, headers) for a remote object."""
|
||||
url = unwrap(_to_bytes(url))
|
||||
if self.tempcache and url in self.tempcache:
|
||||
return self.tempcache[url]
|
||||
type, url1 = _splittype(url)
|
||||
if filename is None and (not type or type == 'file'):
|
||||
try:
|
||||
fp = self.open_local_file(url1)
|
||||
hdrs = fp.info()
|
||||
fp.close()
|
||||
return url2pathname(_splithost(url1)[1]), hdrs
|
||||
except OSError:
|
||||
pass
|
||||
fp = self.open(url, data)
|
||||
try:
|
||||
headers = fp.info()
|
||||
if filename:
|
||||
tfp = open(filename, 'wb')
|
||||
else:
|
||||
garbage, path = _splittype(url)
|
||||
garbage, path = _splithost(path or "")
|
||||
path, garbage = _splitquery(path or "")
|
||||
path, garbage = _splitattr(path or "")
|
||||
suffix = os.path.splitext(path)[1]
|
||||
(fd, filename) = tempfile.mkstemp(suffix)
|
||||
self.__tempfiles.append(filename)
|
||||
tfp = os.fdopen(fd, 'wb')
|
||||
try:
|
||||
result = filename, headers
|
||||
if self.tempcache is not None:
|
||||
self.tempcache[url] = result
|
||||
bs = 1024*8
|
||||
size = -1
|
||||
read = 0
|
||||
blocknum = 0
|
||||
if "content-length" in headers:
|
||||
size = int(headers["Content-Length"])
|
||||
if reporthook:
|
||||
reporthook(blocknum, bs, size)
|
||||
while block := fp.read(bs):
|
||||
read += len(block)
|
||||
tfp.write(block)
|
||||
blocknum += 1
|
||||
if reporthook:
|
||||
reporthook(blocknum, bs, size)
|
||||
finally:
|
||||
tfp.close()
|
||||
finally:
|
||||
fp.close()
|
||||
|
||||
# raise exception if actual size does not match content-length header
|
||||
if size >= 0 and read < size:
|
||||
raise ContentTooShortError(
|
||||
"retrieval incomplete: got only %i out of %i bytes"
|
||||
% (read, size), result)
|
||||
|
||||
return result
|
||||
|
||||
# Each method named open_<type> knows how to open that type of URL
|
||||
|
||||
def _open_generic_http(self, connection_factory, url, data):
|
||||
"""Make an HTTP connection using connection_class.
|
||||
|
||||
This is an internal method that should be called from
|
||||
open_http() or open_https().
|
||||
|
||||
Arguments:
|
||||
- connection_factory should take a host name and return an
|
||||
HTTPConnection instance.
|
||||
- url is the url to retrieval or a host, relative-path pair.
|
||||
- data is payload for a POST request or None.
|
||||
"""
|
||||
|
||||
user_passwd = None
|
||||
proxy_passwd= None
|
||||
if isinstance(url, str):
|
||||
host, selector = _splithost(url)
|
||||
if host:
|
||||
user_passwd, host = _splituser(host)
|
||||
host = unquote(host)
|
||||
realhost = host
|
||||
else:
|
||||
host, selector = url
|
||||
# check whether the proxy contains authorization information
|
||||
proxy_passwd, host = _splituser(host)
|
||||
# now we proceed with the url we want to obtain
|
||||
urltype, rest = _splittype(selector)
|
||||
url = rest
|
||||
user_passwd = None
|
||||
if urltype.lower() != 'http':
|
||||
realhost = None
|
||||
else:
|
||||
realhost, rest = _splithost(rest)
|
||||
if realhost:
|
||||
user_passwd, realhost = _splituser(realhost)
|
||||
if user_passwd:
|
||||
selector = "%s://%s%s" % (urltype, realhost, rest)
|
||||
if proxy_bypass(realhost):
|
||||
host = realhost
|
||||
|
||||
if not host: raise OSError('http error', 'no host given')
|
||||
|
||||
if proxy_passwd:
|
||||
proxy_passwd = unquote(proxy_passwd)
|
||||
proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
|
||||
else:
|
||||
proxy_auth = None
|
||||
|
||||
if user_passwd:
|
||||
user_passwd = unquote(user_passwd)
|
||||
auth = base64.b64encode(user_passwd.encode()).decode('ascii')
|
||||
else:
|
||||
auth = None
|
||||
http_conn = connection_factory(host)
|
||||
headers = {}
|
||||
if proxy_auth:
|
||||
headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
|
||||
if auth:
|
||||
headers["Authorization"] = "Basic %s" % auth
|
||||
if realhost:
|
||||
headers["Host"] = realhost
|
||||
|
||||
# Add Connection:close as we don't support persistent connections yet.
|
||||
# This helps in closing the socket and avoiding ResourceWarning
|
||||
|
||||
headers["Connection"] = "close"
|
||||
|
||||
for header, value in self.addheaders:
|
||||
headers[header] = value
|
||||
|
||||
if data is not None:
|
||||
headers["Content-Type"] = "application/x-www-form-urlencoded"
|
||||
http_conn.request("POST", selector, data, headers)
|
||||
else:
|
||||
http_conn.request("GET", selector, headers=headers)
|
||||
|
||||
try:
|
||||
response = http_conn.getresponse()
|
||||
except http.client.BadStatusLine:
|
||||
# something went wrong with the HTTP status line
|
||||
raise URLError("http protocol error: bad status line")
|
||||
|
||||
# According to RFC 2616, "2xx" code indicates that the client's
|
||||
# request was successfully received, understood, and accepted.
|
||||
if 200 <= response.status < 300:
|
||||
return addinfourl(response, response.msg, "http:" + url,
|
||||
response.status)
|
||||
else:
|
||||
return self.http_error(
|
||||
url, response.fp,
|
||||
response.status, response.reason, response.msg, data)
|
||||
|
||||
def open_http(self, url, data=None):
|
||||
"""Use HTTP protocol."""
|
||||
return self._open_generic_http(http.client.HTTPConnection, url, data)
|
||||
|
||||
def http_error(self, url, fp, errcode, errmsg, headers, data=None):
|
||||
"""Handle http errors.
|
||||
|
||||
Derived class can override this, or provide specific handlers
|
||||
named http_error_DDD where DDD is the 3-digit error code."""
|
||||
# First check if there's a specific handler for this error
|
||||
name = 'http_error_%d' % errcode
|
||||
if hasattr(self, name):
|
||||
method = getattr(self, name)
|
||||
if data is None:
|
||||
result = method(url, fp, errcode, errmsg, headers)
|
||||
else:
|
||||
result = method(url, fp, errcode, errmsg, headers, data)
|
||||
if result: return result
|
||||
return self.http_error_default(url, fp, errcode, errmsg, headers)
|
||||
|
||||
def http_error_default(self, url, fp, errcode, errmsg, headers):
|
||||
"""Default error handler: close the connection and raise OSError."""
|
||||
fp.close()
|
||||
raise HTTPError(url, errcode, errmsg, headers, None)
|
||||
|
||||
if _have_ssl:
|
||||
def _https_connection(self, host):
|
||||
if self.key_file or self.cert_file:
|
||||
http_version = http.client.HTTPSConnection._http_vsn
|
||||
context = http.client._create_https_context(http_version)
|
||||
context.load_cert_chain(self.cert_file, self.key_file)
|
||||
# cert and key file means the user wants to authenticate.
|
||||
# enable TLS 1.3 PHA implicitly even for custom contexts.
|
||||
if context.post_handshake_auth is not None:
|
||||
context.post_handshake_auth = True
|
||||
else:
|
||||
context = None
|
||||
return http.client.HTTPSConnection(host, context=context)
|
||||
|
||||
def open_https(self, url, data=None):
|
||||
"""Use HTTPS protocol."""
|
||||
return self._open_generic_http(self._https_connection, url, data)
|
||||
|
||||
def open_file(self, url):
|
||||
"""Use local file or FTP depending on form of URL."""
|
||||
if not isinstance(url, str):
|
||||
raise URLError('file error: proxy support for file protocol currently not implemented')
|
||||
if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
|
||||
raise ValueError("file:// scheme is supported only on localhost")
|
||||
else:
|
||||
return self.open_local_file(url)
|
||||
|
||||
def open_local_file(self, url):
|
||||
"""Use local file."""
|
||||
import email.utils
|
||||
import mimetypes
|
||||
host, file = _splithost(url)
|
||||
localname = url2pathname(file)
|
||||
try:
|
||||
stats = os.stat(localname)
|
||||
except OSError as e:
|
||||
raise URLError(e.strerror, e.filename)
|
||||
size = stats.st_size
|
||||
modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
|
||||
mtype = mimetypes.guess_type(url)[0]
|
||||
headers = email.message_from_string(
|
||||
'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
|
||||
(mtype or 'text/plain', size, modified))
|
||||
if not host:
|
||||
urlfile = file
|
||||
if file[:1] == '/':
|
||||
urlfile = 'file://' + file
|
||||
return addinfourl(open(localname, 'rb'), headers, urlfile)
|
||||
host, port = _splitport(host)
|
||||
if (not port
|
||||
and socket.gethostbyname(host) in ((localhost(),) + thishost())):
|
||||
urlfile = file
|
||||
if file[:1] == '/':
|
||||
urlfile = 'file://' + file
|
||||
elif file[:2] == './':
|
||||
raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
|
||||
return addinfourl(open(localname, 'rb'), headers, urlfile)
|
||||
raise URLError('local file error: not on local host')
|
||||
|
||||
def open_ftp(self, url):
|
||||
"""Use FTP protocol."""
|
||||
if not isinstance(url, str):
|
||||
raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
|
||||
import mimetypes
|
||||
host, path = _splithost(url)
|
||||
if not host: raise URLError('ftp error: no host given')
|
||||
host, port = _splitport(host)
|
||||
user, host = _splituser(host)
|
||||
if user: user, passwd = _splitpasswd(user)
|
||||
else: passwd = None
|
||||
host = unquote(host)
|
||||
user = unquote(user or '')
|
||||
passwd = unquote(passwd or '')
|
||||
host = socket.gethostbyname(host)
|
||||
if not port:
|
||||
import ftplib
|
||||
port = ftplib.FTP_PORT
|
||||
else:
|
||||
port = int(port)
|
||||
path, attrs = _splitattr(path)
|
||||
path = unquote(path)
|
||||
dirs = path.split('/')
|
||||
dirs, file = dirs[:-1], dirs[-1]
|
||||
if dirs and not dirs[0]: dirs = dirs[1:]
|
||||
if dirs and not dirs[0]: dirs[0] = '/'
|
||||
key = user, host, port, '/'.join(dirs)
|
||||
# XXX thread unsafe!
|
||||
if len(self.ftpcache) > MAXFTPCACHE:
|
||||
# Prune the cache, rather arbitrarily
|
||||
for k in list(self.ftpcache):
|
||||
if k != key:
|
||||
v = self.ftpcache[k]
|
||||
del self.ftpcache[k]
|
||||
v.close()
|
||||
try:
|
||||
if key not in self.ftpcache:
|
||||
self.ftpcache[key] = \
|
||||
ftpwrapper(user, passwd, host, port, dirs)
|
||||
if not file: type = 'D'
|
||||
else: type = 'I'
|
||||
for attr in attrs:
|
||||
attr, value = _splitvalue(attr)
|
||||
if attr.lower() == 'type' and \
|
||||
value in ('a', 'A', 'i', 'I', 'd', 'D'):
|
||||
type = value.upper()
|
||||
(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
|
||||
mtype = mimetypes.guess_type("ftp:" + url)[0]
|
||||
headers = ""
|
||||
if mtype:
|
||||
headers += "Content-Type: %s\n" % mtype
|
||||
if retrlen is not None and retrlen >= 0:
|
||||
headers += "Content-Length: %d\n" % retrlen
|
||||
headers = email.message_from_string(headers)
|
||||
return addinfourl(fp, headers, "ftp:" + url)
|
||||
except ftperrors() as exp:
|
||||
raise URLError(f'ftp error: {exp}') from exp
|
||||
|
||||
def open_data(self, url, data=None):
|
||||
"""Use "data" URL."""
|
||||
if not isinstance(url, str):
|
||||
raise URLError('data error: proxy support for data protocol currently not implemented')
|
||||
# ignore POSTed data
|
||||
#
|
||||
# syntax of data URLs:
|
||||
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
||||
# mediatype := [ type "/" subtype ] *( ";" parameter )
|
||||
# data := *urlchar
|
||||
# parameter := attribute "=" value
|
||||
try:
|
||||
[type, data] = url.split(',', 1)
|
||||
except ValueError:
|
||||
raise OSError('data error', 'bad data URL')
|
||||
if not type:
|
||||
type = 'text/plain;charset=US-ASCII'
|
||||
semi = type.rfind(';')
|
||||
if semi >= 0 and '=' not in type[semi:]:
|
||||
encoding = type[semi+1:]
|
||||
type = type[:semi]
|
||||
else:
|
||||
encoding = ''
|
||||
msg = []
|
||||
msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
|
||||
time.gmtime(time.time())))
|
||||
msg.append('Content-type: %s' % type)
|
||||
if encoding == 'base64':
|
||||
# XXX is this encoding/decoding ok?
|
||||
data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
|
||||
else:
|
||||
data = unquote(data)
|
||||
msg.append('Content-Length: %d' % len(data))
|
||||
msg.append('')
|
||||
msg.append(data)
|
||||
msg = '\n'.join(msg)
|
||||
headers = email.message_from_string(msg)
|
||||
f = io.StringIO(msg)
|
||||
#f.fileno = None # needed for addinfourl
|
||||
return addinfourl(f, headers, url)
|
||||
|
||||
|
||||
class FancyURLopener(URLopener):
|
||||
"""Derived class with handlers for errors we can handle (perhaps)."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
URLopener.__init__(self, *args, **kwargs)
|
||||
self.auth_cache = {}
|
||||
self.tries = 0
|
||||
self.maxtries = 10
|
||||
|
||||
def http_error_default(self, url, fp, errcode, errmsg, headers):
|
||||
"""Default error handling -- don't raise an exception."""
|
||||
return addinfourl(fp, headers, "http:" + url, errcode)
|
||||
|
||||
def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
|
||||
"""Error 302 -- relocated (temporarily)."""
|
||||
self.tries += 1
|
||||
try:
|
||||
if self.maxtries and self.tries >= self.maxtries:
|
||||
if hasattr(self, "http_error_500"):
|
||||
meth = self.http_error_500
|
||||
else:
|
||||
meth = self.http_error_default
|
||||
return meth(url, fp, 500,
|
||||
"Internal Server Error: Redirect Recursion",
|
||||
headers)
|
||||
result = self.redirect_internal(url, fp, errcode, errmsg,
|
||||
headers, data)
|
||||
return result
|
||||
finally:
|
||||
self.tries = 0
|
||||
|
||||
def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
|
||||
if 'location' in headers:
|
||||
newurl = headers['location']
|
||||
elif 'uri' in headers:
|
||||
newurl = headers['uri']
|
||||
else:
|
||||
return
|
||||
fp.close()
|
||||
|
||||
# In case the server sent a relative URL, join with original:
|
||||
newurl = urljoin(self.type + ":" + url, newurl)
|
||||
|
||||
urlparts = urlparse(newurl)
|
||||
|
||||
# For security reasons, we don't allow redirection to anything other
|
||||
# than http, https and ftp.
|
||||
|
||||
# We are using newer HTTPError with older redirect_internal method
|
||||
# This older method will get deprecated in 3.3
|
||||
|
||||
if urlparts.scheme not in ('http', 'https', 'ftp', ''):
|
||||
raise HTTPError(newurl, errcode,
|
||||
errmsg +
|
||||
" Redirection to url '%s' is not allowed." % newurl,
|
||||
headers, fp)
|
||||
|
||||
return self.open(newurl)
|
||||
|
||||
def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
|
||||
"""Error 301 -- also relocated (permanently)."""
|
||||
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
|
||||
|
||||
def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
|
||||
"""Error 303 -- also relocated (essentially identical to 302)."""
|
||||
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
|
||||
|
||||
def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
|
||||
"""Error 307 -- relocated, but turn POST into error."""
|
||||
if data is None:
|
||||
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
|
||||
else:
|
||||
return self.http_error_default(url, fp, errcode, errmsg, headers)
|
||||
|
||||
def http_error_308(self, url, fp, errcode, errmsg, headers, data=None):
|
||||
"""Error 308 -- relocated, but turn POST into error."""
|
||||
if data is None:
|
||||
return self.http_error_301(url, fp, errcode, errmsg, headers, data)
|
||||
else:
|
||||
return self.http_error_default(url, fp, errcode, errmsg, headers)
|
||||
|
||||
def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
|
||||
retry=False):
|
||||
"""Error 401 -- authentication required.
|
||||
This function supports Basic authentication only."""
|
||||
if 'www-authenticate' not in headers:
|
||||
URLopener.http_error_default(self, url, fp,
|
||||
errcode, errmsg, headers)
|
||||
stuff = headers['www-authenticate']
|
||||
match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
|
||||
if not match:
|
||||
URLopener.http_error_default(self, url, fp,
|
||||
errcode, errmsg, headers)
|
||||
scheme, realm = match.groups()
|
||||
if scheme.lower() != 'basic':
|
||||
URLopener.http_error_default(self, url, fp,
|
||||
errcode, errmsg, headers)
|
||||
if not retry:
|
||||
URLopener.http_error_default(self, url, fp, errcode, errmsg,
|
||||
headers)
|
||||
name = 'retry_' + self.type + '_basic_auth'
|
||||
if data is None:
|
||||
return getattr(self,name)(url, realm)
|
||||
else:
|
||||
return getattr(self,name)(url, realm, data)
|
||||
|
||||
def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
|
||||
retry=False):
|
||||
"""Error 407 -- proxy authentication required.
|
||||
This function supports Basic authentication only."""
|
||||
if 'proxy-authenticate' not in headers:
|
||||
URLopener.http_error_default(self, url, fp,
|
||||
errcode, errmsg, headers)
|
||||
stuff = headers['proxy-authenticate']
|
||||
match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
|
||||
if not match:
|
||||
URLopener.http_error_default(self, url, fp,
|
||||
errcode, errmsg, headers)
|
||||
scheme, realm = match.groups()
|
||||
if scheme.lower() != 'basic':
|
||||
URLopener.http_error_default(self, url, fp,
|
||||
errcode, errmsg, headers)
|
||||
if not retry:
|
||||
URLopener.http_error_default(self, url, fp, errcode, errmsg,
|
||||
headers)
|
||||
name = 'retry_proxy_' + self.type + '_basic_auth'
|
||||
if data is None:
|
||||
return getattr(self,name)(url, realm)
|
||||
else:
|
||||
return getattr(self,name)(url, realm, data)
|
||||
|
||||
def retry_proxy_http_basic_auth(self, url, realm, data=None):
|
||||
host, selector = _splithost(url)
|
||||
newurl = 'http://' + host + selector
|
||||
proxy = self.proxies['http']
|
||||
urltype, proxyhost = _splittype(proxy)
|
||||
proxyhost, proxyselector = _splithost(proxyhost)
|
||||
i = proxyhost.find('@') + 1
|
||||
proxyhost = proxyhost[i:]
|
||||
user, passwd = self.get_user_passwd(proxyhost, realm, i)
|
||||
if not (user or passwd): return None
|
||||
proxyhost = "%s:%s@%s" % (quote(user, safe=''),
|
||||
quote(passwd, safe=''), proxyhost)
|
||||
self.proxies['http'] = 'http://' + proxyhost + proxyselector
|
||||
if data is None:
|
||||
return self.open(newurl)
|
||||
else:
|
||||
return self.open(newurl, data)
|
||||
|
||||
def retry_proxy_https_basic_auth(self, url, realm, data=None):
|
||||
host, selector = _splithost(url)
|
||||
newurl = 'https://' + host + selector
|
||||
proxy = self.proxies['https']
|
||||
urltype, proxyhost = _splittype(proxy)
|
||||
proxyhost, proxyselector = _splithost(proxyhost)
|
||||
i = proxyhost.find('@') + 1
|
||||
proxyhost = proxyhost[i:]
|
||||
user, passwd = self.get_user_passwd(proxyhost, realm, i)
|
||||
if not (user or passwd): return None
|
||||
proxyhost = "%s:%s@%s" % (quote(user, safe=''),
|
||||
quote(passwd, safe=''), proxyhost)
|
||||
self.proxies['https'] = 'https://' + proxyhost + proxyselector
|
||||
if data is None:
|
||||
return self.open(newurl)
|
||||
else:
|
||||
return self.open(newurl, data)
|
||||
|
||||
def retry_http_basic_auth(self, url, realm, data=None):
|
||||
host, selector = _splithost(url)
|
||||
i = host.find('@') + 1
|
||||
host = host[i:]
|
||||
user, passwd = self.get_user_passwd(host, realm, i)
|
||||
if not (user or passwd): return None
|
||||
host = "%s:%s@%s" % (quote(user, safe=''),
|
||||
quote(passwd, safe=''), host)
|
||||
newurl = 'http://' + host + selector
|
||||
if data is None:
|
||||
return self.open(newurl)
|
||||
else:
|
||||
return self.open(newurl, data)
|
||||
|
||||
def retry_https_basic_auth(self, url, realm, data=None):
|
||||
host, selector = _splithost(url)
|
||||
i = host.find('@') + 1
|
||||
host = host[i:]
|
||||
user, passwd = self.get_user_passwd(host, realm, i)
|
||||
if not (user or passwd): return None
|
||||
host = "%s:%s@%s" % (quote(user, safe=''),
|
||||
quote(passwd, safe=''), host)
|
||||
newurl = 'https://' + host + selector
|
||||
if data is None:
|
||||
return self.open(newurl)
|
||||
else:
|
||||
return self.open(newurl, data)
|
||||
|
||||
def get_user_passwd(self, host, realm, clear_cache=0):
|
||||
key = realm + '@' + host.lower()
|
||||
if key in self.auth_cache:
|
||||
if clear_cache:
|
||||
del self.auth_cache[key]
|
||||
else:
|
||||
return self.auth_cache[key]
|
||||
user, passwd = self.prompt_user_passwd(host, realm)
|
||||
if user or passwd: self.auth_cache[key] = (user, passwd)
|
||||
return user, passwd
|
||||
|
||||
def prompt_user_passwd(self, host, realm):
|
||||
"""Override this in a GUI environment!"""
|
||||
import getpass
|
||||
try:
|
||||
user = input("Enter username for %s at %s: " % (realm, host))
|
||||
passwd = getpass.getpass("Enter password for %s in %s at %s: " %
|
||||
(user, realm, host))
|
||||
return user, passwd
|
||||
except KeyboardInterrupt:
|
||||
print()
|
||||
return None, None
|
||||
|
||||
|
||||
# Utility functions
|
||||
|
||||
_localhost = None
|
||||
@ -2485,9 +1810,7 @@ def getproxies_environment():
|
||||
"""Return a dictionary of scheme -> proxy server URL mappings.
|
||||
|
||||
Scan the environment for variables named <scheme>_proxy;
|
||||
this seems to be the standard convention. If you need a
|
||||
different way, you can pass a proxies dictionary to the
|
||||
[Fancy]URLopener constructor.
|
||||
this seems to be the standard convention.
|
||||
"""
|
||||
# in order to prefer lowercase variables, process environment in
|
||||
# two passes: first matches any, second pass matches lowercase only
|
||||
|
@ -0,0 +1,3 @@
|
||||
Remove :class:`!URLopener` and :class:`!FancyURLopener` classes from
|
||||
:mod:`urllib.request`. They had previously raised :exc:`DeprecationWarning`
|
||||
since Python 3.3.
|
Loading…
Reference in New Issue
Block a user