mirror of
https://github.com/python/cpython.git
synced 2024-12-26 10:14:58 +08:00
47b49bf6dc
svn+ssh://pythondev@svn.python.org/python/trunk ........ r57771 | thomas.wouters | 2007-08-30 23:54:39 +0200 (Thu, 30 Aug 2007) | 5 lines Don't lie in __all__ attributes when SSL is not available: only add the SSL classes when they are actually created. ........ r57620 | walter.doerwald | 2007-08-28 18:38:26 +0200 (Tue, 28 Aug 2007) | 5 lines Fix title endtag in HTMLCalender.formatyearpage(). Fix documentation for HTMLCalender.formatyearpage() (there's no themonth parameter). This fixes issue1046. ........ r57622 | georg.brandl | 2007-08-28 20:54:44 +0200 (Tue, 28 Aug 2007) | 2 lines Add a crasher for the thread-unsafety of file objects. ........ r57626 | skip.montanaro | 2007-08-29 01:22:52 +0200 (Wed, 29 Aug 2007) | 1 line fixes 813986 ........ r57628 | walter.doerwald | 2007-08-29 01:35:33 +0200 (Wed, 29 Aug 2007) | 2 lines Fix test output. ........ r57631 | skip.montanaro | 2007-08-29 03:24:11 +0200 (Wed, 29 Aug 2007) | 2 lines Install pygettext (once the scriptsinstall target is working again). ........ r57633 | skip.montanaro | 2007-08-29 03:33:45 +0200 (Wed, 29 Aug 2007) | 2 lines Recent items. ........ r57650 | neal.norwitz | 2007-08-29 08:15:33 +0200 (Wed, 29 Aug 2007) | 1 line Add Bill as a developer ........ r57651 | facundo.batista | 2007-08-29 12:28:28 +0200 (Wed, 29 Aug 2007) | 5 lines Ignore test failures caused by 'resource temporarily unavailable' exceptions raised during FailingServerTestCase tests. [GSoC - Alan McIntyre] ........ r57680 | bill.janssen | 2007-08-30 00:35:05 +0200 (Thu, 30 Aug 2007) | 17 lines This contains a number of things: 1) Improve the documentation of the SSL module, with a fuller explanation of certificate usage, another reference, proper formatting of this and that. 2) Fix Windows bug in ssl.py, and general bug in sslsocket.close(). Remove some unused code from ssl.py. Allow accept() to be called on sslsocket sockets. 3) Use try-except-else in import of ssl in socket.py. Deprecate use of socket.ssl(). 4) Remove use of socket.ssl() in every library module, except for test_socket_ssl.py and test_ssl.py. ........ r57714 | georg.brandl | 2007-08-30 12:09:42 +0200 (Thu, 30 Aug 2007) | 2 lines Stronger urge to convert filenames to str before using them as argument to ZipFile.write(). ........ r57716 | georg.brandl | 2007-08-30 12:38:56 +0200 (Thu, 30 Aug 2007) | 2 lines Patch #1680959: add test suite for pipes module. ........ r57717 | georg.brandl | 2007-08-30 14:32:23 +0200 (Thu, 30 Aug 2007) | 3 lines * Skip test_pipes on non-POSIX. * Don't raise TestSkipped within a test function. ........ r57723 | mark.summerfield | 2007-08-30 17:03:03 +0200 (Thu, 30 Aug 2007) | 3 lines Added more cross-references. ........ r57726 | walter.doerwald | 2007-08-30 17:30:09 +0200 (Thu, 30 Aug 2007) | 2 lines Rewrap line. ........ r57727 | walter.doerwald | 2007-08-30 17:34:55 +0200 (Thu, 30 Aug 2007) | 2 lines Set startinpos before calling the error handler. ........ r57730 | bill.janssen | 2007-08-30 19:07:28 +0200 (Thu, 30 Aug 2007) | 3 lines Added docstrings to methods and functions. ........ r57743 | bill.janssen | 2007-08-30 20:08:06 +0200 (Thu, 30 Aug 2007) | 1 line added note on new ssl module and deprecation of socket.ssl ........ r57747 | martin.v.loewis | 2007-08-30 20:14:01 +0200 (Thu, 30 Aug 2007) | 1 line Fix popen usage. ........ r57748 | martin.v.loewis | 2007-08-30 20:15:22 +0200 (Thu, 30 Aug 2007) | 1 line Fix typo. ........ r57750 | martin.v.loewis | 2007-08-30 20:25:47 +0200 (Thu, 30 Aug 2007) | 1 line Bug #1746880: Correctly install DLLs into system32 folder on Win64. ........ r57760 | martin.v.loewis | 2007-08-30 21:04:09 +0200 (Thu, 30 Aug 2007) | 1 line Bug #1709599: Run test_1565150 only if the file system is NTFS. ........ r57762 | martin.v.loewis | 2007-08-30 22:10:57 +0200 (Thu, 30 Aug 2007) | 2 lines Bump autoconf minimum version to 2.61. ........ r57764 | lars.gustaebel | 2007-08-30 22:24:31 +0200 (Thu, 30 Aug 2007) | 2 lines Warn about possible risks when extracting untrusted archives. ........ r57769 | thomas.wouters | 2007-08-30 23:01:17 +0200 (Thu, 30 Aug 2007) | 7 lines Somewhat-preliminary slice-object and extended slicing support for ctypes. The exact behaviour of omitted and negative indices for the Pointer type may need a closer look (especially as it's subtly different from simple slices) but there's time yet before 2.6, and not enough before 3.0a1 :-) ........
155 lines
3.5 KiB
Python
155 lines
3.5 KiB
Python
import unittest, robotparser
|
|
import io
|
|
from test import test_support
|
|
|
|
class RobotTestCase(unittest.TestCase):
|
|
def __init__(self, index, parser, url, good, agent):
|
|
unittest.TestCase.__init__(self)
|
|
if good:
|
|
self.str = "RobotTest(%d, good, %s)" % (index, url)
|
|
else:
|
|
self.str = "RobotTest(%d, bad, %s)" % (index, url)
|
|
self.parser = parser
|
|
self.url = url
|
|
self.good = good
|
|
self.agent = agent
|
|
|
|
def runTest(self):
|
|
if isinstance(self.url, tuple):
|
|
agent, url = self.url
|
|
else:
|
|
url = self.url
|
|
agent = self.agent
|
|
if self.good:
|
|
self.failUnless(self.parser.can_fetch(agent, url))
|
|
else:
|
|
self.failIf(self.parser.can_fetch(agent, url))
|
|
|
|
def __str__(self):
|
|
return self.str
|
|
|
|
tests = unittest.TestSuite()
|
|
|
|
def RobotTest(index, robots_txt, good_urls, bad_urls,
|
|
agent="test_robotparser"):
|
|
|
|
lines = io.StringIO(robots_txt).readlines()
|
|
parser = robotparser.RobotFileParser()
|
|
parser.parse(lines)
|
|
for url in good_urls:
|
|
tests.addTest(RobotTestCase(index, parser, url, 1, agent))
|
|
for url in bad_urls:
|
|
tests.addTest(RobotTestCase(index, parser, url, 0, agent))
|
|
|
|
# Examples from http://www.robotstxt.org/wc/norobots.html (fetched 2002)
|
|
|
|
# 1.
|
|
doc = """
|
|
User-agent: *
|
|
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
|
|
Disallow: /tmp/ # these will soon disappear
|
|
Disallow: /foo.html
|
|
"""
|
|
|
|
good = ['/','/test.html']
|
|
bad = ['/cyberworld/map/index.html','/tmp/xxx','/foo.html']
|
|
|
|
RobotTest(1, doc, good, bad)
|
|
|
|
# 2.
|
|
doc = """
|
|
# robots.txt for http://www.example.com/
|
|
|
|
User-agent: *
|
|
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
|
|
|
|
# Cybermapper knows where to go.
|
|
User-agent: cybermapper
|
|
Disallow:
|
|
|
|
"""
|
|
|
|
good = ['/','/test.html',('cybermapper','/cyberworld/map/index.html')]
|
|
bad = ['/cyberworld/map/index.html']
|
|
|
|
RobotTest(2, doc, good, bad)
|
|
|
|
# 3.
|
|
doc = """
|
|
# go away
|
|
User-agent: *
|
|
Disallow: /
|
|
"""
|
|
|
|
good = []
|
|
bad = ['/cyberworld/map/index.html','/','/tmp/']
|
|
|
|
RobotTest(3, doc, good, bad)
|
|
|
|
# Examples from http://www.robotstxt.org/wc/norobots-rfc.html (fetched 2002)
|
|
|
|
# 4.
|
|
doc = """
|
|
User-agent: figtree
|
|
Disallow: /tmp
|
|
Disallow: /a%3cd.html
|
|
Disallow: /a%2fb.html
|
|
Disallow: /%7ejoe/index.html
|
|
"""
|
|
|
|
good = [] # XFAIL '/a/b.html'
|
|
bad = ['/tmp','/tmp.html','/tmp/a.html',
|
|
'/a%3cd.html','/a%3Cd.html','/a%2fb.html',
|
|
'/~joe/index.html'
|
|
]
|
|
|
|
RobotTest(4, doc, good, bad, 'figtree')
|
|
RobotTest(5, doc, good, bad, 'FigTree Robot libwww-perl/5.04')
|
|
|
|
# 6.
|
|
doc = """
|
|
User-agent: *
|
|
Disallow: /tmp/
|
|
Disallow: /a%3Cd.html
|
|
Disallow: /a/b.html
|
|
Disallow: /%7ejoe/index.html
|
|
"""
|
|
|
|
good = ['/tmp',] # XFAIL: '/a%2fb.html'
|
|
bad = ['/tmp/','/tmp/a.html',
|
|
'/a%3cd.html','/a%3Cd.html',"/a/b.html",
|
|
'/%7Ejoe/index.html']
|
|
|
|
RobotTest(6, doc, good, bad)
|
|
|
|
# From bug report #523041
|
|
|
|
# 7.
|
|
doc = """
|
|
User-Agent: *
|
|
Disallow: /.
|
|
"""
|
|
|
|
good = ['/foo.html']
|
|
bad = [] # Bug report says "/" should be denied, but that is not in the RFC
|
|
|
|
RobotTest(7, doc, good, bad)
|
|
|
|
class TestCase(unittest.TestCase):
|
|
def runTest(self):
|
|
test_support.requires('network')
|
|
# whole site is password-protected.
|
|
url = 'http://mueblesmoraleda.com'
|
|
parser = robotparser.RobotFileParser()
|
|
parser.set_url(url)
|
|
parser.read()
|
|
self.assertEqual(parser.can_fetch("*", url+"/robots.txt"), False)
|
|
|
|
def test_main():
|
|
test_support.run_unittest(tests)
|
|
TestCase().run()
|
|
|
|
if __name__=='__main__':
|
|
test_support.Verbose = 1
|
|
test_main()
|