mirror of https://github.com/python/cpython
Process <img> and <frame> tags. Don't bother skipping second href.
This commit is contained in:
parent
de99d310cc
commit
6133ec656e
|
@ -564,11 +564,21 @@ class MyHTMLParser(sgmllib.SGMLParser):
|
||||||
sgmllib.SGMLParser.__init__ (self)
|
sgmllib.SGMLParser.__init__ (self)
|
||||||
|
|
||||||
def start_a(self, attributes):
|
def start_a(self, attributes):
|
||||||
|
self.link_attr(attributes, 'href')
|
||||||
|
|
||||||
|
def end_a(self): pass
|
||||||
|
|
||||||
|
def do_img(self, attributes):
|
||||||
|
self.link_attr(attributes, 'src', 'lowsrc')
|
||||||
|
|
||||||
|
def do_frame(self, attributes):
|
||||||
|
self.link_attr(attributes, 'src')
|
||||||
|
|
||||||
|
def link_attr(self, attributes, *args):
|
||||||
for name, value in attributes:
|
for name, value in attributes:
|
||||||
if name == 'href':
|
if name in args:
|
||||||
if value: value = string.strip(value)
|
if value: value = string.strip(value)
|
||||||
if value: self.links[value] = None
|
if value: self.links[value] = None
|
||||||
return # match only first href
|
|
||||||
|
|
||||||
def do_base(self, attributes):
|
def do_base(self, attributes):
|
||||||
for name, value in attributes:
|
for name, value in attributes:
|
||||||
|
@ -578,7 +588,6 @@ class MyHTMLParser(sgmllib.SGMLParser):
|
||||||
if verbose > 1:
|
if verbose > 1:
|
||||||
print " Base", value
|
print " Base", value
|
||||||
self.base = value
|
self.base = value
|
||||||
return # match only first href
|
|
||||||
|
|
||||||
def getlinks(self):
|
def getlinks(self):
|
||||||
return self.links.keys()
|
return self.links.keys()
|
||||||
|
|
Loading…
Reference in New Issue