Patch by Lars Marius Garshol:
- Handle <? processing instructions >. - Allow . and - in entity names. Also fixed an oversight in the previous fix (in one place, [ \t\r\n] was used instead of string.whitespace).
This commit is contained in:
parent
ae621ff7b7
commit
1ad00717fb
|
@ -20,12 +20,14 @@ incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|'
|
||||||
'/([a-zA-Z][^<>]*)?|'
|
'/([a-zA-Z][^<>]*)?|'
|
||||||
'![^<>]*)?')
|
'![^<>]*)?')
|
||||||
|
|
||||||
entityref = re.compile('&([a-zA-Z][a-zA-Z0-9]*)[^a-zA-Z0-9]')
|
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
|
||||||
charref = re.compile('&#([0-9]+)[^0-9]')
|
charref = re.compile('&#([0-9]+)[^0-9]')
|
||||||
|
|
||||||
starttagopen = re.compile('<[>a-zA-Z]')
|
starttagopen = re.compile('<[>a-zA-Z]')
|
||||||
shorttagopen = re.compile('<[a-zA-Z][a-zA-Z0-9]*/')
|
shorttagopen = re.compile('<[a-zA-Z][a-zA-Z0-9]*/')
|
||||||
shorttag = re.compile('<([a-zA-Z][a-zA-Z0-9]*)/([^/]*)/')
|
shorttag = re.compile('<([a-zA-Z][a-zA-Z0-9]*)/([^/]*)/')
|
||||||
|
piopen = re.compile('<\?')
|
||||||
|
piclose = re.compile('>')
|
||||||
endtagopen = re.compile('</[<>a-zA-Z]')
|
endtagopen = re.compile('</[<>a-zA-Z]')
|
||||||
endbracket = re.compile('[<>]')
|
endbracket = re.compile('[<>]')
|
||||||
special = re.compile('<![^<>]*>')
|
special = re.compile('<![^<>]*>')
|
||||||
|
@ -33,7 +35,7 @@ commentopen = re.compile('<!--')
|
||||||
commentclose = re.compile('--[%s]*>' % string.whitespace)
|
commentclose = re.compile('--[%s]*>' % string.whitespace)
|
||||||
tagfind = re.compile('[a-zA-Z][a-zA-Z0-9]*')
|
tagfind = re.compile('[a-zA-Z][a-zA-Z0-9]*')
|
||||||
attrfind = re.compile(
|
attrfind = re.compile(
|
||||||
'[ \t\n\r]+([a-zA-Z_][-.a-zA-Z_0-9]*)'
|
'[%s]+([a-zA-Z_][-.a-zA-Z_0-9]*)' % string.whitespace
|
||||||
+ ('([%s]*=[%s]*' % (string.whitespace, string.whitespace))
|
+ ('([%s]*=[%s]*' % (string.whitespace, string.whitespace))
|
||||||
+ r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:+*%?!\(\)_#=~]*))?')
|
+ r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:+*%?!\(\)_#=~]*))?')
|
||||||
|
|
||||||
|
@ -127,6 +129,15 @@ class SGMLParser:
|
||||||
if k < 0: break
|
if k < 0: break
|
||||||
i = i+k
|
i = i+k
|
||||||
continue
|
continue
|
||||||
|
if piopen.match(rawdata, i):
|
||||||
|
if self.literal:
|
||||||
|
self.handle_data(rawdata[i])
|
||||||
|
i = i+1
|
||||||
|
continue
|
||||||
|
k = self.parse_pi(i)
|
||||||
|
if k < 0: break
|
||||||
|
i = i+k
|
||||||
|
continue
|
||||||
match = special.match(rawdata, i)
|
match = special.match(rawdata, i)
|
||||||
if match:
|
if match:
|
||||||
if self.literal:
|
if self.literal:
|
||||||
|
@ -184,6 +195,19 @@ class SGMLParser:
|
||||||
j = match.end(0)
|
j = match.end(0)
|
||||||
return j-i
|
return j-i
|
||||||
|
|
||||||
|
# Internal -- parse processing instr, return length or -1 if not terminated
|
||||||
|
def parse_pi(self, i):
|
||||||
|
rawdata = self.rawdata
|
||||||
|
if rawdata[i:i+2] <> '<?':
|
||||||
|
raise RuntimeError, 'unexpected call to handle_pi'
|
||||||
|
match = piclose.search(rawdata, i+2)
|
||||||
|
if not match:
|
||||||
|
return -1
|
||||||
|
j = match.start(0)
|
||||||
|
self.handle_pi(rawdata[i+2: j])
|
||||||
|
j = match.end(0)
|
||||||
|
return j-i
|
||||||
|
|
||||||
# Internal -- handle starttag, return length or -1 if not terminated
|
# Internal -- handle starttag, return length or -1 if not terminated
|
||||||
def parse_starttag(self, i):
|
def parse_starttag(self, i):
|
||||||
rawdata = self.rawdata
|
rawdata = self.rawdata
|
||||||
|
@ -348,6 +372,10 @@ class SGMLParser:
|
||||||
def handle_comment(self, data):
|
def handle_comment(self, data):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Example -- handle processing instruction, could be overridden
|
||||||
|
def handle_pi(self, data):
|
||||||
|
pass
|
||||||
|
|
||||||
# To be overridden -- handlers for unknown objects
|
# To be overridden -- handlers for unknown objects
|
||||||
def unknown_starttag(self, tag, attrs): pass
|
def unknown_starttag(self, tag, attrs): pass
|
||||||
def unknown_endtag(self, tag): pass
|
def unknown_endtag(self, tag): pass
|
||||||
|
|
Loading…
Reference in New Issue