Jeffrey's latest -- reorder my chages somewhat,

removed some of his own cruft.  Added \g<...> references in replacement text.
This commit is contained in:
Guido van Rossum 1997-07-18 04:26:03 +00:00
parent c12da6980f
commit 71fa97c60d
1 changed files with 71 additions and 95 deletions

166
Lib/re.py
View File

@ -77,14 +77,37 @@ def split(pattern, string, maxsplit=0):
#
#
def _expand(m, repl):
results = []
index = 0
size = len(repl)
while index < size:
found = string.find(repl, '\\', index)
if found < 0:
results.append(repl[index:])
break
if found > index:
results.append(repl[index:found])
escape_type, value, index = expand_escape(repl, found+1, REPLACEMENT)
if escape_type == CHAR:
results.append(value)
elif escape_type == MEMORY_REFERENCE:
r = m.group(value)
if r is None:
raise error, ('group "' + str(value) + '" did not contribute '
'to the match')
results.append(m.group(value))
else:
raise error, "bad escape in replacement"
return string.join(results, '')
class RegexObject:
def __init__(self, pattern, flags, code, num_regs, groupindex, callouts):
def __init__(self, pattern, flags, code, num_regs, groupindex):
self.code = code
self.num_regs = num_regs
self.flags = flags
self.pattern = pattern
self.groupindex = groupindex
self.callouts = callouts
self.fastmap = build_fastmap(code)
if code[0].name == 'bol':
@ -132,44 +155,52 @@ class RegexObject:
regs)
def sub(self, repl, string, count=0):
return self.subn(repl, string, count)[0]
return self.subn(repl, string, count)[0]
def subn(self, repl, source, count=0):
if count < 0: raise error, "negative substibution count"
if count == 0: import sys; count = sys.maxint
if count < 0:
raise ValueError, "negative substibution count"
if count == 0:
import sys
count = sys.maxint
if type(repl) == type(''):
if '\\' in repl:
repl = lambda m, r=repl: _expand(m, r)
else:
repl = lambda m, r=repl: r
n = 0 # Number of matches
pos = 0 # Where to start searching
lastmatch = -1 # End of last match
results = [] # Substrings making up the result
n = 0 # Number of matches
pos = 0 # Where to start searching
lastmatch = -1 # End of last match
results = [] # Substrings making up the result
end = len(source)
while n < count and pos <= end:
m = self.search(source, pos)
if not m: break
if not m:
break
i, j = m.span(0)
if i == j == lastmatch:
# Empty match adjacent to previous match
pos = pos+1
pos = pos + 1
results.append(source[lastmatch:pos])
continue
if pos < i: results.append(source[pos:i])
if pos < i:
results.append(source[pos:i])
results.append(repl(m))
pos = lastmatch = j
if i == j:
# Last match was empty; don't try here again
pos = pos+1
pos = pos + 1
results.append(source[lastmatch:pos])
n = n+1
n = n + 1
results.append(source[pos:])
return (string.join(results, ''), n)
def split(self, source, maxsplit=0):
if maxsplit < 0: raise error, "negative split count"
if maxsplit == 0: import sys; maxsplit = sys.maxint
if maxsplit < 0:
raise error, "negative split count"
if maxsplit == 0:
import sys
maxsplit = sys.maxint
n = 0
pos = 0
lastmatch = 0
@ -177,11 +208,13 @@ class RegexObject:
end = len(source)
while n < maxsplit:
m = self.search(source, pos)
if not m: break
if not m:
break
i, j = m.span(0)
if i == j:
# Empty match
if pos >= end: break
if pos >= end:
break
pos = pos+1
continue
results.append(source[lastmatch:i])
@ -192,26 +225,6 @@ class RegexObject:
results.append(source[lastmatch:])
return results
def _expand(m, repl):
results = []
index = 0
size = len(repl)
while index < size:
found = string.find(repl, '\\', index)
if found < 0:
results.append(repl[index:])
break
if found > index:
results.append(repl[index:found])
escape_type, value, index = expand_escape(repl, found+1, REPLACEMENT)
if escape_type == CHAR:
results.append(value)
elif escape_type == MEMORY_REFERENCE:
results.append(m.group(value))
else:
raise error, "bad escape in replacement"
return string.join(results, '')
class MatchObject:
def __init__(self, re, string, pos, regs):
self.re = re
@ -280,16 +293,6 @@ class Instruction:
def __repr__(self):
return '%-15s' % (self.name)
class FunctionCallout(Instruction):
name = 'function'
def __init__(self, function):
self.function = function
Instruction.__init__(self, chr(22), 2 + len(self.function))
def assemble(self, position, labels):
return self.opcode + chr(len(self.function)) + self.function
def __repr__(self):
return '%-15s %-10s' % (self.name, self.function)
class End(Instruction):
name = 'end'
def __init__(self):
@ -608,11 +611,6 @@ def build_fastmap_aux(code, pos, visited, fastmap):
find_label(code, instruction.label),
visited,
fastmap)
elif instruction.name == 'function':
for char in map(chr, range(256)):
fastmap.add(char)
fastmap.can_be_null = 1
return
def build_fastmap(code, pos=0):
visited = [0] * len(code)
@ -825,10 +823,25 @@ def expand_escape(pattern, index, context=NORMAL):
value = string.atoi(pattern[index])
return MEMORY_REFERENCE, value, index + 1
while (end < len(pattern)) and (pattern[end] in string.digits):
end = end + 1
value = pattern[index:end]
elif pattern[index] == 'g':
if context != REPLACEMENT:
return CHAR, 'g', index + 1
index = index + 1
if index >= len(pattern):
raise error, 'unfinished symbolic reference'
if pattern[index] != '<':
raise error, 'missing < in symbolic reference'
index = index + 1
end = string.find(pattern, '>', index)
if end == -1:
raise error, 'unfinished symbolic reference'
value = pattern[index:end]
if not valid_identifier(value):
raise error, 'illegal symbolic reference'
return MEMORY_REFERENCE, value, end + 1
else:
return CHAR, pattern[index], index + 1
@ -837,7 +850,6 @@ def compile(pattern, flags=0):
label = 0
register = 1
groupindex = {}
callouts = []
lastop = ''
# look for embedded pattern modifiers at the beginning of the pattern
@ -989,21 +1001,6 @@ def compile(pattern, flags=0):
index = end + 1
lastop = '(?P=)'
elif pattern[index] == '!':
# function callout
if index >= len(pattern):
raise error, 'no function callout name'
start = index + 1
end = string.find(pattern, ')', start)
if end == -1:
raise error, 'no ) to end function callout name'
name = pattern[start:end]
if name not in callouts:
raise error, ('function callout name not listed '
'in callouts dict')
stack.append([FunctionCallout(name)])
lastop = '(?P!)'
else:
raise error, ('unknown Python extension: ' + \
pattern[index])
@ -1490,25 +1487,4 @@ def compile(pattern, flags=0):
code.append(Label(label))
label = label + 1
code.append(End())
return RegexObject(pattern, flags, code, register, groupindex, callouts)
if __name__ == '__main__':
print compile('a(b)*')
print compile('a{3}')
print compile('(a){2}')
print compile('a{2,4}')
print compile('a|b')
print compile('a(b|c)')
print compile('a*')
print compile('a+')
print compile('a|b|c')
print compile('a(b|c)*')
print compile('\\n')
print compile('a(?# huh huh)b')
print compile('[a-c\\w]')
print compile('[[]')
print compile('[]]')
print compile('(<hello>a)')
print compile('\Q*\e')
print compile('a{0,}')
return RegexObject(pattern, flags, code, register, groupindex)