From b46696c0ed640992b4524aab888a26a56d993142 Mon Sep 17 00:00:00 2001 From: Fred Drake Date: Thu, 29 Jun 2000 18:50:59 +0000 Subject: [PATCH] [Old patch that hadn't been checked in.] get_starttag_text(): New method. Return the text of the most recently parsed start tag, from the '<' to the '>' or '/'. Not really useful for structure processing, but requested for Web-related use. May also be useful for being able to re-generate the input from the parse events, but there's no equivalent for end tags. attrfind: Be a little more forgiving of unquoted attribute values. --- Lib/sgmllib.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py index 8be7d55bf6c..d7e83195236 100644 --- a/Lib/sgmllib.py +++ b/Lib/sgmllib.py @@ -37,7 +37,7 @@ tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9]*') attrfind = re.compile( '[%s]*([a-zA-Z_][-.a-zA-Z_0-9]*)' % string.whitespace + ('([%s]*=[%s]*' % (string.whitespace, string.whitespace)) - + r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:+*%?!\(\)_#=~]*))?') + + r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:+*%?!&$\(\)_#=~]*))?') # SGML parser base class -- find tags and call handler functions. @@ -207,9 +207,15 @@ class SGMLParser: self.handle_pi(rawdata[i+2: j]) j = match.end(0) return j-i + + __starttag_text = None + def get_starttag_text(self): + return self.__starttag_text # Internal -- handle starttag, return length or -1 if not terminated def parse_starttag(self, i): + self.__starttag_text = None + start_pos = i rawdata = self.rawdata if shorttagopen.match(rawdata, i): # SGML shorthand: data @@ -220,9 +226,11 @@ class SGMLParser: if not match: return -1 tag, data = match.group(1, 2) + self.__starttag_text = '<%s/' % tag tag = string.lower(tag) - self.finish_shorttag(tag, data) k = match.end(0) + self.finish_shorttag(tag, data) + self.__starttag_text = rawdata[start_pos:match.end(1) + 1] return k # XXX The following should skip matching quotes (' or ") match = endbracket.search(rawdata, i+1) @@ -255,6 +263,7 @@ class SGMLParser: k = match.end(0) if rawdata[j] == '>': j = j+1 + self.__starttag_text = rawdata[start_pos:j] self.finish_starttag(tag, attrs) return j