Jeffrey's latest -- reorder my chages somewhat,
removed some of his own cruft. Added \g<...> references in replacement text.
This commit is contained in:
parent
c12da6980f
commit
71fa97c60d
166
Lib/re.py
166
Lib/re.py
|
@ -77,14 +77,37 @@ def split(pattern, string, maxsplit=0):
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
|
|
||||||
|
def _expand(m, repl):
|
||||||
|
results = []
|
||||||
|
index = 0
|
||||||
|
size = len(repl)
|
||||||
|
while index < size:
|
||||||
|
found = string.find(repl, '\\', index)
|
||||||
|
if found < 0:
|
||||||
|
results.append(repl[index:])
|
||||||
|
break
|
||||||
|
if found > index:
|
||||||
|
results.append(repl[index:found])
|
||||||
|
escape_type, value, index = expand_escape(repl, found+1, REPLACEMENT)
|
||||||
|
if escape_type == CHAR:
|
||||||
|
results.append(value)
|
||||||
|
elif escape_type == MEMORY_REFERENCE:
|
||||||
|
r = m.group(value)
|
||||||
|
if r is None:
|
||||||
|
raise error, ('group "' + str(value) + '" did not contribute '
|
||||||
|
'to the match')
|
||||||
|
results.append(m.group(value))
|
||||||
|
else:
|
||||||
|
raise error, "bad escape in replacement"
|
||||||
|
return string.join(results, '')
|
||||||
|
|
||||||
class RegexObject:
|
class RegexObject:
|
||||||
def __init__(self, pattern, flags, code, num_regs, groupindex, callouts):
|
def __init__(self, pattern, flags, code, num_regs, groupindex):
|
||||||
self.code = code
|
self.code = code
|
||||||
self.num_regs = num_regs
|
self.num_regs = num_regs
|
||||||
self.flags = flags
|
self.flags = flags
|
||||||
self.pattern = pattern
|
self.pattern = pattern
|
||||||
self.groupindex = groupindex
|
self.groupindex = groupindex
|
||||||
self.callouts = callouts
|
|
||||||
self.fastmap = build_fastmap(code)
|
self.fastmap = build_fastmap(code)
|
||||||
|
|
||||||
if code[0].name == 'bol':
|
if code[0].name == 'bol':
|
||||||
|
@ -132,44 +155,52 @@ class RegexObject:
|
||||||
regs)
|
regs)
|
||||||
|
|
||||||
def sub(self, repl, string, count=0):
|
def sub(self, repl, string, count=0):
|
||||||
return self.subn(repl, string, count)[0]
|
return self.subn(repl, string, count)[0]
|
||||||
|
|
||||||
def subn(self, repl, source, count=0):
|
def subn(self, repl, source, count=0):
|
||||||
if count < 0: raise error, "negative substibution count"
|
if count < 0:
|
||||||
if count == 0: import sys; count = sys.maxint
|
raise ValueError, "negative substibution count"
|
||||||
|
if count == 0:
|
||||||
|
import sys
|
||||||
|
count = sys.maxint
|
||||||
if type(repl) == type(''):
|
if type(repl) == type(''):
|
||||||
if '\\' in repl:
|
if '\\' in repl:
|
||||||
repl = lambda m, r=repl: _expand(m, r)
|
repl = lambda m, r=repl: _expand(m, r)
|
||||||
else:
|
else:
|
||||||
repl = lambda m, r=repl: r
|
repl = lambda m, r=repl: r
|
||||||
n = 0 # Number of matches
|
n = 0 # Number of matches
|
||||||
pos = 0 # Where to start searching
|
pos = 0 # Where to start searching
|
||||||
lastmatch = -1 # End of last match
|
lastmatch = -1 # End of last match
|
||||||
results = [] # Substrings making up the result
|
results = [] # Substrings making up the result
|
||||||
end = len(source)
|
end = len(source)
|
||||||
while n < count and pos <= end:
|
while n < count and pos <= end:
|
||||||
m = self.search(source, pos)
|
m = self.search(source, pos)
|
||||||
if not m: break
|
if not m:
|
||||||
|
break
|
||||||
i, j = m.span(0)
|
i, j = m.span(0)
|
||||||
if i == j == lastmatch:
|
if i == j == lastmatch:
|
||||||
# Empty match adjacent to previous match
|
# Empty match adjacent to previous match
|
||||||
pos = pos+1
|
pos = pos + 1
|
||||||
results.append(source[lastmatch:pos])
|
results.append(source[lastmatch:pos])
|
||||||
continue
|
continue
|
||||||
if pos < i: results.append(source[pos:i])
|
if pos < i:
|
||||||
|
results.append(source[pos:i])
|
||||||
results.append(repl(m))
|
results.append(repl(m))
|
||||||
pos = lastmatch = j
|
pos = lastmatch = j
|
||||||
if i == j:
|
if i == j:
|
||||||
# Last match was empty; don't try here again
|
# Last match was empty; don't try here again
|
||||||
pos = pos+1
|
pos = pos + 1
|
||||||
results.append(source[lastmatch:pos])
|
results.append(source[lastmatch:pos])
|
||||||
n = n+1
|
n = n + 1
|
||||||
results.append(source[pos:])
|
results.append(source[pos:])
|
||||||
return (string.join(results, ''), n)
|
return (string.join(results, ''), n)
|
||||||
|
|
||||||
def split(self, source, maxsplit=0):
|
def split(self, source, maxsplit=0):
|
||||||
if maxsplit < 0: raise error, "negative split count"
|
if maxsplit < 0:
|
||||||
if maxsplit == 0: import sys; maxsplit = sys.maxint
|
raise error, "negative split count"
|
||||||
|
if maxsplit == 0:
|
||||||
|
import sys
|
||||||
|
maxsplit = sys.maxint
|
||||||
n = 0
|
n = 0
|
||||||
pos = 0
|
pos = 0
|
||||||
lastmatch = 0
|
lastmatch = 0
|
||||||
|
@ -177,11 +208,13 @@ class RegexObject:
|
||||||
end = len(source)
|
end = len(source)
|
||||||
while n < maxsplit:
|
while n < maxsplit:
|
||||||
m = self.search(source, pos)
|
m = self.search(source, pos)
|
||||||
if not m: break
|
if not m:
|
||||||
|
break
|
||||||
i, j = m.span(0)
|
i, j = m.span(0)
|
||||||
if i == j:
|
if i == j:
|
||||||
# Empty match
|
# Empty match
|
||||||
if pos >= end: break
|
if pos >= end:
|
||||||
|
break
|
||||||
pos = pos+1
|
pos = pos+1
|
||||||
continue
|
continue
|
||||||
results.append(source[lastmatch:i])
|
results.append(source[lastmatch:i])
|
||||||
|
@ -192,26 +225,6 @@ class RegexObject:
|
||||||
results.append(source[lastmatch:])
|
results.append(source[lastmatch:])
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def _expand(m, repl):
|
|
||||||
results = []
|
|
||||||
index = 0
|
|
||||||
size = len(repl)
|
|
||||||
while index < size:
|
|
||||||
found = string.find(repl, '\\', index)
|
|
||||||
if found < 0:
|
|
||||||
results.append(repl[index:])
|
|
||||||
break
|
|
||||||
if found > index:
|
|
||||||
results.append(repl[index:found])
|
|
||||||
escape_type, value, index = expand_escape(repl, found+1, REPLACEMENT)
|
|
||||||
if escape_type == CHAR:
|
|
||||||
results.append(value)
|
|
||||||
elif escape_type == MEMORY_REFERENCE:
|
|
||||||
results.append(m.group(value))
|
|
||||||
else:
|
|
||||||
raise error, "bad escape in replacement"
|
|
||||||
return string.join(results, '')
|
|
||||||
|
|
||||||
class MatchObject:
|
class MatchObject:
|
||||||
def __init__(self, re, string, pos, regs):
|
def __init__(self, re, string, pos, regs):
|
||||||
self.re = re
|
self.re = re
|
||||||
|
@ -280,16 +293,6 @@ class Instruction:
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '%-15s' % (self.name)
|
return '%-15s' % (self.name)
|
||||||
|
|
||||||
class FunctionCallout(Instruction):
|
|
||||||
name = 'function'
|
|
||||||
def __init__(self, function):
|
|
||||||
self.function = function
|
|
||||||
Instruction.__init__(self, chr(22), 2 + len(self.function))
|
|
||||||
def assemble(self, position, labels):
|
|
||||||
return self.opcode + chr(len(self.function)) + self.function
|
|
||||||
def __repr__(self):
|
|
||||||
return '%-15s %-10s' % (self.name, self.function)
|
|
||||||
|
|
||||||
class End(Instruction):
|
class End(Instruction):
|
||||||
name = 'end'
|
name = 'end'
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -608,11 +611,6 @@ def build_fastmap_aux(code, pos, visited, fastmap):
|
||||||
find_label(code, instruction.label),
|
find_label(code, instruction.label),
|
||||||
visited,
|
visited,
|
||||||
fastmap)
|
fastmap)
|
||||||
elif instruction.name == 'function':
|
|
||||||
for char in map(chr, range(256)):
|
|
||||||
fastmap.add(char)
|
|
||||||
fastmap.can_be_null = 1
|
|
||||||
return
|
|
||||||
|
|
||||||
def build_fastmap(code, pos=0):
|
def build_fastmap(code, pos=0):
|
||||||
visited = [0] * len(code)
|
visited = [0] * len(code)
|
||||||
|
@ -825,10 +823,25 @@ def expand_escape(pattern, index, context=NORMAL):
|
||||||
value = string.atoi(pattern[index])
|
value = string.atoi(pattern[index])
|
||||||
return MEMORY_REFERENCE, value, index + 1
|
return MEMORY_REFERENCE, value, index + 1
|
||||||
|
|
||||||
while (end < len(pattern)) and (pattern[end] in string.digits):
|
elif pattern[index] == 'g':
|
||||||
end = end + 1
|
if context != REPLACEMENT:
|
||||||
value = pattern[index:end]
|
return CHAR, 'g', index + 1
|
||||||
|
|
||||||
|
index = index + 1
|
||||||
|
if index >= len(pattern):
|
||||||
|
raise error, 'unfinished symbolic reference'
|
||||||
|
if pattern[index] != '<':
|
||||||
|
raise error, 'missing < in symbolic reference'
|
||||||
|
|
||||||
|
index = index + 1
|
||||||
|
end = string.find(pattern, '>', index)
|
||||||
|
if end == -1:
|
||||||
|
raise error, 'unfinished symbolic reference'
|
||||||
|
value = pattern[index:end]
|
||||||
|
if not valid_identifier(value):
|
||||||
|
raise error, 'illegal symbolic reference'
|
||||||
|
return MEMORY_REFERENCE, value, end + 1
|
||||||
|
|
||||||
else:
|
else:
|
||||||
return CHAR, pattern[index], index + 1
|
return CHAR, pattern[index], index + 1
|
||||||
|
|
||||||
|
@ -837,7 +850,6 @@ def compile(pattern, flags=0):
|
||||||
label = 0
|
label = 0
|
||||||
register = 1
|
register = 1
|
||||||
groupindex = {}
|
groupindex = {}
|
||||||
callouts = []
|
|
||||||
lastop = ''
|
lastop = ''
|
||||||
|
|
||||||
# look for embedded pattern modifiers at the beginning of the pattern
|
# look for embedded pattern modifiers at the beginning of the pattern
|
||||||
|
@ -989,21 +1001,6 @@ def compile(pattern, flags=0):
|
||||||
index = end + 1
|
index = end + 1
|
||||||
lastop = '(?P=)'
|
lastop = '(?P=)'
|
||||||
|
|
||||||
elif pattern[index] == '!':
|
|
||||||
# function callout
|
|
||||||
if index >= len(pattern):
|
|
||||||
raise error, 'no function callout name'
|
|
||||||
start = index + 1
|
|
||||||
end = string.find(pattern, ')', start)
|
|
||||||
if end == -1:
|
|
||||||
raise error, 'no ) to end function callout name'
|
|
||||||
name = pattern[start:end]
|
|
||||||
if name not in callouts:
|
|
||||||
raise error, ('function callout name not listed '
|
|
||||||
'in callouts dict')
|
|
||||||
stack.append([FunctionCallout(name)])
|
|
||||||
lastop = '(?P!)'
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise error, ('unknown Python extension: ' + \
|
raise error, ('unknown Python extension: ' + \
|
||||||
pattern[index])
|
pattern[index])
|
||||||
|
@ -1490,25 +1487,4 @@ def compile(pattern, flags=0):
|
||||||
code.append(Label(label))
|
code.append(Label(label))
|
||||||
label = label + 1
|
label = label + 1
|
||||||
code.append(End())
|
code.append(End())
|
||||||
return RegexObject(pattern, flags, code, register, groupindex, callouts)
|
return RegexObject(pattern, flags, code, register, groupindex)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
print compile('a(b)*')
|
|
||||||
print compile('a{3}')
|
|
||||||
print compile('(a){2}')
|
|
||||||
print compile('a{2,4}')
|
|
||||||
print compile('a|b')
|
|
||||||
print compile('a(b|c)')
|
|
||||||
print compile('a*')
|
|
||||||
print compile('a+')
|
|
||||||
print compile('a|b|c')
|
|
||||||
print compile('a(b|c)*')
|
|
||||||
print compile('\\n')
|
|
||||||
print compile('a(?# huh huh)b')
|
|
||||||
print compile('[a-c\\w]')
|
|
||||||
print compile('[[]')
|
|
||||||
print compile('[]]')
|
|
||||||
print compile('(<hello>a)')
|
|
||||||
print compile('\Q*\e')
|
|
||||||
print compile('a{0,}')
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue