Deal with macros that have to be replaced with simple text; only a
couple of these are currently found in index data, but these should all be handled in the same way. Closes SF bug #952737.
This commit is contained in:
parent
1c0423a2da
commit
63a0191c8a
|
@ -13,17 +13,27 @@ from xml.sax.saxutils import quoteattr
|
||||||
bang_join = "!".join
|
bang_join = "!".join
|
||||||
null_join = "".join
|
null_join = "".join
|
||||||
|
|
||||||
|
REPLACEMENTS = [
|
||||||
|
# Hackish way to deal with macros replaced with simple text
|
||||||
|
(re.compile(r"\\ABC\b"), "ABC"),
|
||||||
|
(re.compile(r"\\ASCII\b"), "ASCII"),
|
||||||
|
(re.compile(r"\\Cpp\b"), "C++"),
|
||||||
|
(re.compile(r"\\EOF\b"), "EOF"),
|
||||||
|
(re.compile(r"\\NULL\b"), "NULL"),
|
||||||
|
(re.compile(r"\\POSIX\b"), "POSIX"),
|
||||||
|
(re.compile(r"\\UNIX\b"), "Unix"),
|
||||||
|
# deal with turds left over from LaTeX2HTML
|
||||||
|
(re.compile(r"<#\d+#>"), ""),
|
||||||
|
]
|
||||||
|
|
||||||
class Node:
|
class Node:
|
||||||
__rmjunk = re.compile("<#\d+#>")
|
|
||||||
|
|
||||||
continuation = 0
|
continuation = 0
|
||||||
|
|
||||||
def __init__(self, link, str, seqno):
|
def __init__(self, link, str, seqno):
|
||||||
self.links = [link]
|
self.links = [link]
|
||||||
self.seqno = seqno
|
self.seqno = seqno
|
||||||
# remove <#\d+#> left in by moving the data out of LaTeX2HTML
|
for pattern, replacement in REPLACEMENTS:
|
||||||
str = self.__rmjunk.sub('', str)
|
str = pattern.sub(replacement, str)
|
||||||
# build up the text
|
# build up the text
|
||||||
self.text = split_entry_text(str)
|
self.text = split_entry_text(str)
|
||||||
self.key = split_entry_key(str)
|
self.key = split_entry_key(str)
|
||||||
|
|
Loading…
Reference in New Issue