mirror of https://github.com/python/cpython
Issue #28151: Merge from 3.6
This commit is contained in:
commit
9ef04eba28
|
@ -1,4 +1,5 @@
|
||||||
import io
|
import io
|
||||||
|
import os
|
||||||
import unittest
|
import unittest
|
||||||
import urllib.robotparser
|
import urllib.robotparser
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
@ -272,14 +273,42 @@ class PasswordProtectedSiteTestCase(unittest.TestCase):
|
||||||
|
|
||||||
class NetworkTestCase(unittest.TestCase):
|
class NetworkTestCase(unittest.TestCase):
|
||||||
|
|
||||||
def testPythonOrg(self):
|
base_url = 'http://www.pythontest.net/'
|
||||||
|
robots_txt = '{}elsewhere/robots.txt'.format(base_url)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
support.requires('network')
|
support.requires('network')
|
||||||
with support.transient_internet('www.python.org'):
|
with support.transient_internet(cls.base_url):
|
||||||
parser = urllib.robotparser.RobotFileParser(
|
cls.parser = urllib.robotparser.RobotFileParser(cls.robots_txt)
|
||||||
"http://www.python.org/robots.txt")
|
cls.parser.read()
|
||||||
parser.read()
|
|
||||||
self.assertTrue(
|
def url(self, path):
|
||||||
parser.can_fetch("*", "http://www.python.org/robots.txt"))
|
return '{}{}{}'.format(
|
||||||
|
self.base_url, path, '/' if not os.path.splitext(path)[1] else ''
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_basic(self):
|
||||||
|
self.assertFalse(self.parser.disallow_all)
|
||||||
|
self.assertFalse(self.parser.allow_all)
|
||||||
|
self.assertGreater(self.parser.mtime(), 0)
|
||||||
|
self.assertFalse(self.parser.crawl_delay('*'))
|
||||||
|
self.assertFalse(self.parser.request_rate('*'))
|
||||||
|
|
||||||
|
def test_can_fetch(self):
|
||||||
|
self.assertTrue(self.parser.can_fetch('*', self.url('elsewhere')))
|
||||||
|
self.assertFalse(self.parser.can_fetch('Nutch', self.base_url))
|
||||||
|
self.assertFalse(self.parser.can_fetch('Nutch', self.url('brian')))
|
||||||
|
self.assertFalse(self.parser.can_fetch('Nutch', self.url('webstats')))
|
||||||
|
self.assertFalse(self.parser.can_fetch('*', self.url('webstats')))
|
||||||
|
self.assertTrue(self.parser.can_fetch('*', self.base_url))
|
||||||
|
|
||||||
|
def test_read_404(self):
|
||||||
|
parser = urllib.robotparser.RobotFileParser(self.url('i-robot.txt'))
|
||||||
|
parser.read()
|
||||||
|
self.assertTrue(parser.allow_all)
|
||||||
|
self.assertFalse(parser.disallow_all)
|
||||||
|
self.assertEqual(parser.mtime(), 0)
|
||||||
|
|
||||||
if __name__=='__main__':
|
if __name__=='__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
Loading…
Reference in New Issue