mirror of
https://github.com/python/cpython.git
synced 2025-01-13 03:55:03 +08:00
318909b297
'nonnumeric port'. It parses to a host name of "http:" which is equivalent to http:80.
354 lines
15 KiB
Python
354 lines
15 KiB
Python
"""Tests for the packaging.pypi.simple module."""
|
|
import re
|
|
import os
|
|
import sys
|
|
import http.client
|
|
import urllib.error
|
|
import urllib.parse
|
|
import urllib.request
|
|
|
|
from packaging.pypi.simple import Crawler
|
|
|
|
from packaging.tests import unittest
|
|
from packaging.tests.support import (TempdirManager, LoggingCatcher,
|
|
fake_dec)
|
|
|
|
try:
|
|
import _thread
|
|
from packaging.tests.pypi_server import (use_pypi_server, PyPIServer,
|
|
PYPI_DEFAULT_STATIC_PATH)
|
|
except ImportError:
|
|
_thread = None
|
|
use_pypi_server = fake_dec
|
|
PYPI_DEFAULT_STATIC_PATH = os.path.join(
|
|
os.path.dirname(os.path.abspath(__file__)), 'pypiserver')
|
|
|
|
|
|
|
|
class SimpleCrawlerTestCase(TempdirManager,
|
|
LoggingCatcher,
|
|
unittest.TestCase):
|
|
|
|
def _get_simple_crawler(self, server, base_url="/simple/", hosts=None,
|
|
*args, **kwargs):
|
|
"""Build and return a SimpleIndex with the test server urls"""
|
|
if hosts is None:
|
|
hosts = (server.full_address.replace("http://", ""),)
|
|
kwargs['hosts'] = hosts
|
|
return Crawler(server.full_address + base_url, *args,
|
|
**kwargs)
|
|
|
|
@unittest.skipIf(_thread is None, 'needs threads')
|
|
@use_pypi_server()
|
|
def test_bad_urls(self, server):
|
|
crawler = Crawler()
|
|
url = 'http://127.0.0.1:0/nonesuch/test_simple'
|
|
try:
|
|
v = crawler._open_url(url)
|
|
except Exception as v:
|
|
self.assertIn(url, str(v))
|
|
else:
|
|
v.close()
|
|
self.assertIsInstance(v, urllib.error.HTTPError)
|
|
|
|
# issue 16
|
|
# easy_install inquant.contentmirror.plone breaks because of a typo
|
|
# in its home URL
|
|
crawler = Crawler(hosts=('example.org',))
|
|
url = ('url:%20https://svn.plone.org/svn/collective/'
|
|
'inquant.contentmirror.plone/trunk')
|
|
try:
|
|
v = crawler._open_url(url)
|
|
except Exception as v:
|
|
self.assertIn(url, str(v))
|
|
else:
|
|
v.close()
|
|
self.assertIsInstance(v, urllib.error.HTTPError)
|
|
|
|
def _urlopen(*args):
|
|
raise http.client.BadStatusLine('line')
|
|
|
|
old_urlopen = urllib.request.urlopen
|
|
urllib.request.urlopen = _urlopen
|
|
url = 'http://example.org'
|
|
try:
|
|
v = crawler._open_url(url)
|
|
except Exception as v:
|
|
self.assertIn('line', str(v))
|
|
else:
|
|
v.close()
|
|
# TODO use self.assertRaises
|
|
raise AssertionError('Should have raise here!')
|
|
finally:
|
|
urllib.request.urlopen = old_urlopen
|
|
|
|
# issue 20
|
|
url = 'http://http://svn.pythonpaste.org/Paste/wphp/trunk'
|
|
try:
|
|
crawler._open_url(url)
|
|
except Exception as v:
|
|
self.assertIn('Download error', str(v))
|
|
|
|
# issue #160
|
|
url = server.full_address
|
|
page = ('<a href="http://www.famfamfam.com]('
|
|
'http://www.famfamfam.com/">')
|
|
crawler._process_url(url, page)
|
|
|
|
@unittest.skipIf(_thread is None, 'needs threads')
|
|
@use_pypi_server("test_found_links")
|
|
def test_found_links(self, server):
|
|
# Browse the index, asking for a specified release version
|
|
# The PyPI index contains links for version 1.0, 1.1, 2.0 and 2.0.1
|
|
crawler = self._get_simple_crawler(server)
|
|
last_release = crawler.get_release("foobar")
|
|
|
|
# we have scanned the index page
|
|
self.assertIn(server.full_address + "/simple/foobar/",
|
|
crawler._processed_urls)
|
|
|
|
# we have found 4 releases in this page
|
|
self.assertEqual(len(crawler._projects["foobar"]), 4)
|
|
|
|
# and returned the most recent one
|
|
self.assertEqual("%s" % last_release.version, '2.0.1')
|
|
|
|
def test_is_browsable(self):
|
|
crawler = Crawler(follow_externals=False)
|
|
self.assertTrue(crawler._is_browsable(crawler.index_url + "test"))
|
|
|
|
# Now, when following externals, we can have a list of hosts to trust.
|
|
# and don't follow other external links than the one described here.
|
|
crawler = Crawler(hosts=["pypi.python.org", "example.org"],
|
|
follow_externals=True)
|
|
good_urls = (
|
|
"http://pypi.python.org/foo/bar",
|
|
"http://pypi.python.org/simple/foobar",
|
|
"http://example.org",
|
|
"http://example.org/",
|
|
"http://example.org/simple/",
|
|
)
|
|
bad_urls = (
|
|
"http://python.org",
|
|
"http://example.tld",
|
|
)
|
|
|
|
for url in good_urls:
|
|
self.assertTrue(crawler._is_browsable(url))
|
|
|
|
for url in bad_urls:
|
|
self.assertFalse(crawler._is_browsable(url))
|
|
|
|
# allow all hosts
|
|
crawler = Crawler(follow_externals=True, hosts=("*",))
|
|
self.assertTrue(crawler._is_browsable("http://an-external.link/path"))
|
|
self.assertTrue(crawler._is_browsable("pypi.example.org/a/path"))
|
|
|
|
# specify a list of hosts we want to allow
|
|
crawler = Crawler(follow_externals=True,
|
|
hosts=("*.example.org",))
|
|
self.assertFalse(crawler._is_browsable("http://an-external.link/path"))
|
|
self.assertTrue(
|
|
crawler._is_browsable("http://pypi.example.org/a/path"))
|
|
|
|
@unittest.skipIf(_thread is None, 'needs threads')
|
|
@use_pypi_server("with_externals")
|
|
def test_follow_externals(self, server):
|
|
# Include external pages
|
|
# Try to request the package index, wich contains links to "externals"
|
|
# resources. They have to be scanned too.
|
|
crawler = self._get_simple_crawler(server, follow_externals=True)
|
|
crawler.get_release("foobar")
|
|
self.assertIn(server.full_address + "/external/external.html",
|
|
crawler._processed_urls)
|
|
|
|
@unittest.skipIf(_thread is None, 'needs threads')
|
|
@use_pypi_server("with_real_externals")
|
|
def test_restrict_hosts(self, server):
|
|
# Only use a list of allowed hosts is possible
|
|
# Test that telling the simple pyPI client to not retrieve external
|
|
# works
|
|
crawler = self._get_simple_crawler(server, follow_externals=False)
|
|
crawler.get_release("foobar")
|
|
self.assertNotIn(server.full_address + "/external/external.html",
|
|
crawler._processed_urls)
|
|
|
|
@unittest.skipIf(_thread is None, 'needs threads')
|
|
@use_pypi_server(static_filesystem_paths=["with_externals"],
|
|
static_uri_paths=["simple", "external"])
|
|
def test_links_priority(self, server):
|
|
# Download links from the pypi simple index should be used before
|
|
# external download links.
|
|
# http://bitbucket.org/tarek/distribute/issue/163/md5-validation-error
|
|
#
|
|
# Usecase :
|
|
# - someone uploads a package on pypi, a md5 is generated
|
|
# - someone manually coindexes this link (with the md5 in the url) onto
|
|
# an external page accessible from the package page.
|
|
# - someone reuploads the package (with a different md5)
|
|
# - while easy_installing, an MD5 error occurs because the external
|
|
# link is used
|
|
# -> The index should use the link from pypi, not the external one.
|
|
|
|
# start an index server
|
|
index_url = server.full_address + '/simple/'
|
|
|
|
# scan a test index
|
|
crawler = Crawler(index_url, follow_externals=True)
|
|
releases = crawler.get_releases("foobar")
|
|
server.stop()
|
|
|
|
# we have only one link, because links are compared without md5
|
|
self.assertEqual(1, len(releases))
|
|
self.assertEqual(1, len(releases[0].dists))
|
|
# the link should be from the index
|
|
self.assertEqual(2, len(releases[0].dists['sdist'].urls))
|
|
self.assertEqual('12345678901234567',
|
|
releases[0].dists['sdist'].url['hashval'])
|
|
self.assertEqual('md5', releases[0].dists['sdist'].url['hashname'])
|
|
|
|
@unittest.skipIf(_thread is None, 'needs threads')
|
|
@use_pypi_server(static_filesystem_paths=["with_norel_links"],
|
|
static_uri_paths=["simple", "external"])
|
|
def test_not_scan_all_links(self, server):
|
|
# Do not follow all index page links.
|
|
# The links not tagged with rel="download" and rel="homepage" have
|
|
# to not be processed by the package index, while processing "pages".
|
|
|
|
# process the pages
|
|
crawler = self._get_simple_crawler(server, follow_externals=True)
|
|
crawler.get_releases("foobar")
|
|
# now it should have processed only pages with links rel="download"
|
|
# and rel="homepage"
|
|
self.assertIn("%s/simple/foobar/" % server.full_address,
|
|
crawler._processed_urls) # it's the simple index page
|
|
self.assertIn("%s/external/homepage.html" % server.full_address,
|
|
crawler._processed_urls) # the external homepage is rel="homepage"
|
|
self.assertNotIn("%s/external/nonrel.html" % server.full_address,
|
|
crawler._processed_urls) # this link contains no rel=*
|
|
self.assertNotIn("%s/unrelated-0.2.tar.gz" % server.full_address,
|
|
crawler._processed_urls) # linked from simple index (no rel)
|
|
self.assertIn("%s/foobar-0.1.tar.gz" % server.full_address,
|
|
crawler._processed_urls) # linked from simple index (rel)
|
|
self.assertIn("%s/foobar-2.0.tar.gz" % server.full_address,
|
|
crawler._processed_urls) # linked from external homepage (rel)
|
|
|
|
@unittest.skipIf(_thread is None, 'needs threads')
|
|
def test_uses_mirrors(self):
|
|
# When the main repository seems down, try using the given mirrors"""
|
|
server = PyPIServer("foo_bar_baz")
|
|
mirror = PyPIServer("foo_bar_baz")
|
|
mirror.start() # we dont start the server here
|
|
|
|
try:
|
|
# create the index using both servers
|
|
crawler = Crawler(server.full_address + "/simple/", hosts=('*',),
|
|
# set the timeout to 1s for the tests
|
|
timeout=1, mirrors=[mirror.full_address])
|
|
|
|
# this should not raise a timeout
|
|
self.assertEqual(4, len(crawler.get_releases("foo")))
|
|
finally:
|
|
mirror.stop()
|
|
server.stop()
|
|
|
|
def test_simple_link_matcher(self):
|
|
# Test that the simple link matcher finds the right links"""
|
|
crawler = Crawler(follow_externals=False)
|
|
|
|
# Here, we define:
|
|
# 1. one link that must be followed, cause it's a download one
|
|
# 2. one link that must *not* be followed, cause the is_browsable
|
|
# returns false for it.
|
|
# 3. one link that must be followed cause it's a homepage that is
|
|
# browsable
|
|
# 4. one link that must be followed, because it contain a md5 hash
|
|
self.assertTrue(crawler._is_browsable("%stest" % crawler.index_url))
|
|
self.assertFalse(crawler._is_browsable("http://dl-link2"))
|
|
content = """
|
|
<a href="http://dl-link1" rel="download">download_link1</a>
|
|
<a href="http://dl-link2" rel="homepage">homepage_link1</a>
|
|
<a href="%(index_url)stest" rel="homepage">homepage_link2</a>
|
|
<a href="%(index_url)stest/foobar-1.tar.gz#md5=abcdef>download_link2</a>
|
|
""" % {'index_url': crawler.index_url}
|
|
|
|
# Test that the simple link matcher yield the good links.
|
|
generator = crawler._simple_link_matcher(content, crawler.index_url)
|
|
self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' %
|
|
crawler.index_url, True), next(generator))
|
|
self.assertEqual(('http://dl-link1', True), next(generator))
|
|
self.assertEqual(('%stest' % crawler.index_url, False),
|
|
next(generator))
|
|
self.assertRaises(StopIteration, generator.__next__)
|
|
|
|
# Follow the external links is possible (eg. homepages)
|
|
crawler.follow_externals = True
|
|
generator = crawler._simple_link_matcher(content, crawler.index_url)
|
|
self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' %
|
|
crawler.index_url, True), next(generator))
|
|
self.assertEqual(('http://dl-link1', True), next(generator))
|
|
self.assertEqual(('http://dl-link2', False), next(generator))
|
|
self.assertEqual(('%stest' % crawler.index_url, False),
|
|
next(generator))
|
|
self.assertRaises(StopIteration, generator.__next__)
|
|
|
|
def test_browse_local_files(self):
|
|
# Test that we can browse local files"""
|
|
index_url = "file://" + PYPI_DEFAULT_STATIC_PATH
|
|
if sys.platform == 'win32':
|
|
# under windows the correct syntax is:
|
|
# file:///C|\the\path\here
|
|
# instead of
|
|
# file://C:\the\path\here
|
|
fix = re.compile(r'^(file://)([A-Za-z])(:)')
|
|
index_url = fix.sub('\\1/\\2|', index_url)
|
|
|
|
index_path = os.sep.join([index_url, "test_found_links", "simple"])
|
|
crawler = Crawler(index_path)
|
|
dists = crawler.get_releases("foobar")
|
|
self.assertEqual(4, len(dists))
|
|
|
|
def test_get_link_matcher(self):
|
|
crawler = Crawler("http://example.org")
|
|
self.assertEqual('_simple_link_matcher', crawler._get_link_matcher(
|
|
"http://example.org/some/file").__name__)
|
|
self.assertEqual('_default_link_matcher', crawler._get_link_matcher(
|
|
"http://other-url").__name__)
|
|
|
|
def test_default_link_matcher(self):
|
|
crawler = Crawler("http://example.org", mirrors=[])
|
|
crawler.follow_externals = True
|
|
crawler._is_browsable = lambda *args: True
|
|
base_url = "http://example.org/some/file/"
|
|
content = """
|
|
<a href="../homepage" rel="homepage">link</a>
|
|
<a href="../download" rel="download">link2</a>
|
|
<a href="../simpleurl">link2</a>
|
|
"""
|
|
found_links = set(uri for uri, _ in
|
|
crawler._default_link_matcher(content, base_url))
|
|
self.assertIn('http://example.org/some/homepage', found_links)
|
|
self.assertIn('http://example.org/some/simpleurl', found_links)
|
|
self.assertIn('http://example.org/some/download', found_links)
|
|
|
|
@unittest.skipIf(_thread is None, 'needs threads')
|
|
@use_pypi_server("project_list")
|
|
def test_search_projects(self, server):
|
|
# we can search the index for some projects, on their names
|
|
# the case used no matters here
|
|
crawler = self._get_simple_crawler(server)
|
|
tests = (('Foobar', ['FooBar-bar', 'Foobar-baz', 'Baz-FooBar']),
|
|
('foobar*', ['FooBar-bar', 'Foobar-baz']),
|
|
('*foobar', ['Baz-FooBar']))
|
|
|
|
for search, expected in tests:
|
|
projects = [p.name for p in crawler.search_projects(search)]
|
|
self.assertListEqual(expected, projects)
|
|
|
|
|
|
def test_suite():
|
|
return unittest.makeSuite(SimpleCrawlerTestCase)
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main(defaultTest="test_suite")
|