mirror of https://github.com/python/cpython
Merged revisions 83449 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/release27-maint ................ r83449 | georg.brandl | 2010-08-01 22:59:03 +0200 (So, 01 Aug 2010) | 9 lines Merged revisions 83238 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r83238 | georg.brandl | 2010-07-29 19:55:01 +0200 (Do, 29 Jul 2010) | 1 line #4108: the first default entry (User-agent: *) wins. ........ ................
This commit is contained in:
parent
86edb14057
commit
0ba1f01adf
|
@ -68,7 +68,9 @@ class RobotFileParser:
|
||||||
def _add_entry(self, entry):
|
def _add_entry(self, entry):
|
||||||
if "*" in entry.useragents:
|
if "*" in entry.useragents:
|
||||||
# the default entry is considered last
|
# the default entry is considered last
|
||||||
self.default_entry = entry
|
if self.default_entry is None:
|
||||||
|
# the first default entry wins
|
||||||
|
self.default_entry = entry
|
||||||
else:
|
else:
|
||||||
self.entries.append(entry)
|
self.entries.append(entry)
|
||||||
|
|
||||||
|
@ -120,7 +122,7 @@ class RobotFileParser:
|
||||||
entry.rulelines.append(RuleLine(line[1], True))
|
entry.rulelines.append(RuleLine(line[1], True))
|
||||||
state = 2
|
state = 2
|
||||||
if state == 2:
|
if state == 2:
|
||||||
self.entries.append(entry)
|
self._add_entry(entry)
|
||||||
|
|
||||||
|
|
||||||
def can_fetch(self, useragent, url):
|
def can_fetch(self, useragent, url):
|
||||||
|
|
|
@ -202,6 +202,20 @@ bad = ['/folder1/anotherfile.html']
|
||||||
RobotTest(13, doc, good, bad, agent="googlebot")
|
RobotTest(13, doc, good, bad, agent="googlebot")
|
||||||
|
|
||||||
|
|
||||||
|
# 14. For issue #4108 (obey first * entry)
|
||||||
|
doc = """
|
||||||
|
User-agent: *
|
||||||
|
Disallow: /some/path
|
||||||
|
|
||||||
|
User-agent: *
|
||||||
|
Disallow: /another/path
|
||||||
|
"""
|
||||||
|
|
||||||
|
good = ['/another/path']
|
||||||
|
bad = ['/some/path']
|
||||||
|
|
||||||
|
RobotTest(14, doc, good, bad)
|
||||||
|
|
||||||
|
|
||||||
class TestCase(unittest.TestCase):
|
class TestCase(unittest.TestCase):
|
||||||
def runTest(self):
|
def runTest(self):
|
||||||
|
|
|
@ -35,6 +35,9 @@ Core and Builtins
|
||||||
when turned into an exception: in this case the exception simply
|
when turned into an exception: in this case the exception simply
|
||||||
gets ignored.
|
gets ignored.
|
||||||
|
|
||||||
|
- Issue #4108: In urllib.robotparser, if there are multiple 'User-agent: *'
|
||||||
|
entries, consider the first one.
|
||||||
|
|
||||||
- Issue #9354: Provide getsockopt() in asyncore's file_wrapper.
|
- Issue #9354: Provide getsockopt() in asyncore's file_wrapper.
|
||||||
|
|
||||||
- In the unicode/str.format(), raise a ValueError when indexes to arguments are
|
- In the unicode/str.format(), raise a ValueError when indexes to arguments are
|
||||||
|
|
Loading…
Reference in New Issue