merge
This commit is contained in:
commit
c3361b9ae1
|
@ -1333,7 +1333,7 @@ successive matches::
|
|||
|
||||
Token = collections.namedtuple('Token', ['typ', 'value', 'line', 'column'])
|
||||
|
||||
def tokenize(s):
|
||||
def tokenize(code):
|
||||
keywords = {'IF', 'THEN', 'ENDIF', 'FOR', 'NEXT', 'GOSUB', 'RETURN'}
|
||||
token_specification = [
|
||||
('NUMBER', r'\d+(\.\d*)?'), # Integer or decimal number
|
||||
|
@ -1343,26 +1343,27 @@ successive matches::
|
|||
('OP', r'[+\-*/]'), # Arithmetic operators
|
||||
('NEWLINE', r'\n'), # Line endings
|
||||
('SKIP', r'[ \t]+'), # Skip over spaces and tabs
|
||||
('MISMATCH',r'.'), # Any other character
|
||||
]
|
||||
tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
|
||||
get_token = re.compile(tok_regex).match
|
||||
line = 1
|
||||
pos = line_start = 0
|
||||
mo = get_token(s)
|
||||
while mo is not None:
|
||||
typ = mo.lastgroup
|
||||
if typ == 'NEWLINE':
|
||||
line_start = pos
|
||||
line += 1
|
||||
elif typ != 'SKIP':
|
||||
val = mo.group(typ)
|
||||
if typ == 'ID' and val in keywords:
|
||||
typ = val
|
||||
yield Token(typ, val, line, mo.start()-line_start)
|
||||
pos = mo.end()
|
||||
mo = get_token(s, pos)
|
||||
if pos != len(s):
|
||||
raise RuntimeError('Unexpected character %r on line %d' %(s[pos], line))
|
||||
line_num = 1
|
||||
line_start = 0
|
||||
for mo in re.finditer(tok_regex, code):
|
||||
kind = mo.lastgroup
|
||||
value = mo.group(kind)
|
||||
if kind == 'NEWLINE':
|
||||
line_start = mo.end()
|
||||
line_num += 1
|
||||
elif kind == 'SKIP':
|
||||
pass
|
||||
elif kind == 'MISMATCH':
|
||||
raise RuntimeError('%r unexpected on line %d' % (value, line_num))
|
||||
else:
|
||||
if kind == 'ID' and value in keywords:
|
||||
kind = value
|
||||
column = mo.start() - line_start
|
||||
yield Token(kind, value, line_num, column)
|
||||
|
||||
statements = '''
|
||||
IF quantity THEN
|
||||
|
@ -1376,22 +1377,22 @@ successive matches::
|
|||
|
||||
The tokenizer produces the following output::
|
||||
|
||||
Token(typ='IF', value='IF', line=2, column=5)
|
||||
Token(typ='ID', value='quantity', line=2, column=8)
|
||||
Token(typ='THEN', value='THEN', line=2, column=17)
|
||||
Token(typ='ID', value='total', line=3, column=9)
|
||||
Token(typ='ASSIGN', value=':=', line=3, column=15)
|
||||
Token(typ='ID', value='total', line=3, column=18)
|
||||
Token(typ='OP', value='+', line=3, column=24)
|
||||
Token(typ='ID', value='price', line=3, column=26)
|
||||
Token(typ='OP', value='*', line=3, column=32)
|
||||
Token(typ='ID', value='quantity', line=3, column=34)
|
||||
Token(typ='END', value=';', line=3, column=42)
|
||||
Token(typ='ID', value='tax', line=4, column=9)
|
||||
Token(typ='ASSIGN', value=':=', line=4, column=13)
|
||||
Token(typ='ID', value='price', line=4, column=16)
|
||||
Token(typ='OP', value='*', line=4, column=22)
|
||||
Token(typ='NUMBER', value='0.05', line=4, column=24)
|
||||
Token(typ='END', value=';', line=4, column=28)
|
||||
Token(typ='ENDIF', value='ENDIF', line=5, column=5)
|
||||
Token(typ='END', value=';', line=5, column=10)
|
||||
Token(typ='IF', value='IF', line=2, column=4)
|
||||
Token(typ='ID', value='quantity', line=2, column=7)
|
||||
Token(typ='THEN', value='THEN', line=2, column=16)
|
||||
Token(typ='ID', value='total', line=3, column=8)
|
||||
Token(typ='ASSIGN', value=':=', line=3, column=14)
|
||||
Token(typ='ID', value='total', line=3, column=17)
|
||||
Token(typ='OP', value='+', line=3, column=23)
|
||||
Token(typ='ID', value='price', line=3, column=25)
|
||||
Token(typ='OP', value='*', line=3, column=31)
|
||||
Token(typ='ID', value='quantity', line=3, column=33)
|
||||
Token(typ='END', value=';', line=3, column=41)
|
||||
Token(typ='ID', value='tax', line=4, column=8)
|
||||
Token(typ='ASSIGN', value=':=', line=4, column=12)
|
||||
Token(typ='ID', value='price', line=4, column=15)
|
||||
Token(typ='OP', value='*', line=4, column=21)
|
||||
Token(typ='NUMBER', value='0.05', line=4, column=23)
|
||||
Token(typ='END', value=';', line=4, column=27)
|
||||
Token(typ='ENDIF', value='ENDIF', line=5, column=4)
|
||||
Token(typ='END', value=';', line=5, column=9)
|
||||
|
|
Loading…
Reference in New Issue