cpython/Tools/scripts/abitype.py

201 lines
5.3 KiB
Python
Executable File

#!/usr/bin/env python3
# This script converts a C file to use the PEP 384 type definition API
# Usage: abitype.py < old_code > new_code
import re, sys
############ Simplistic C scanner ##################################
tokenizer = re.compile(
r"(?P<preproc>#.*\n)"
r"|(?P<comment>/\*.*?\*/)"
r"|(?P<ident>[a-zA-Z_][a-zA-Z0-9_]*)"
r"|(?P<ws>[ \t\n]+)"
r"|(?P<other>.)",
re.MULTILINE)
tokens = []
source = sys.stdin.read()
pos = 0
while pos != len(source):
m = tokenizer.match(source, pos)
tokens.append([m.lastgroup, m.group()])
pos += len(tokens[-1][1])
if tokens[-1][0] == 'preproc':
# continuation lines are considered
# only in preprocess statements
while tokens[-1][1].endswith('\\\n'):
nl = source.find('\n', pos)
if nl == -1:
line = source[pos:]
else:
line = source[pos:nl+1]
tokens[-1][1] += line
pos += len(line)
###### Replacement of PyTypeObject static instances ##############
# classify each token, giving it a one-letter code:
# S: static
# T: PyTypeObject
# I: ident
# W: whitespace
# =, {, }, ; : themselves
def classify():
res = []
for t,v in tokens:
if t == 'other' and v in "={};":
res.append(v)
elif t == 'ident':
if v == 'PyTypeObject':
res.append('T')
elif v == 'static':
res.append('S')
else:
res.append('I')
elif t == 'ws':
res.append('W')
else:
res.append('.')
return ''.join(res)
# Obtain a list of fields of a PyTypeObject, in declaration order,
# skipping ob_base
# All comments are dropped from the variable (which are typically
# just the slot names, anyway), and information is discarded whether
# the original type was static.
def get_fields(start, real_end):
pos = start
# static?
if tokens[pos][1] == 'static':
pos += 2
# PyTypeObject
pos += 2
# name
name = tokens[pos][1]
pos += 1
while tokens[pos][1] != '{':
pos += 1
pos += 1
# PyVarObject_HEAD_INIT
while tokens[pos][0] in ('ws', 'comment'):
pos += 1
if tokens[pos][1] != 'PyVarObject_HEAD_INIT':
raise Exception, '%s has no PyVarObject_HEAD_INIT' % name
while tokens[pos][1] != ')':
pos += 1
pos += 1
# field definitions: various tokens, comma-separated
fields = []
while True:
while tokens[pos][0] in ('ws', 'comment'):
pos += 1
end = pos
while tokens[end][1] not in ',}':
if tokens[end][1] == '(':
nesting = 1
while nesting:
end += 1
if tokens[end][1] == '(': nesting+=1
if tokens[end][1] == ')': nesting-=1
end += 1
assert end < real_end
# join field, excluding separator and trailing ws
end1 = end-1
while tokens[end1][0] in ('ws', 'comment'):
end1 -= 1
fields.append(''.join(t[1] for t in tokens[pos:end1+1]))
if tokens[end][1] == '}':
break
pos = end+1
return name, fields
# List of type slots as of Python 3.2, omitting ob_base
typeslots = [
'tp_name',
'tp_basicsize',
'tp_itemsize',
'tp_dealloc',
'tp_print',
'tp_getattr',
'tp_setattr',
'tp_reserved',
'tp_repr',
'tp_as_number',
'tp_as_sequence',
'tp_as_mapping',
'tp_hash',
'tp_call',
'tp_str',
'tp_getattro',
'tp_setattro',
'tp_as_buffer',
'tp_flags',
'tp_doc',
'tp_traverse',
'tp_clear',
'tp_richcompare',
'tp_weaklistoffset',
'tp_iter',
'iternextfunc',
'tp_methods',
'tp_members',
'tp_getset',
'tp_base',
'tp_dict',
'tp_descr_get',
'tp_descr_set',
'tp_dictoffset',
'tp_init',
'tp_alloc',
'tp_new',
'tp_free',
'tp_is_gc',
'tp_bases',
'tp_mro',
'tp_cache',
'tp_subclasses',
'tp_weaklist',
'tp_del'
'tp_version_tag'
]
# Generate a PyType_Spec definition
def make_slots(name, fields):
res = []
res.append('static PyType_Slot %s_slots[] = {' % name)
# defaults for spec
spec = { 'tp_itemsize':'0' }
for i, val in enumerate(fields):
if val.endswith('0'):
continue
if typeslots[i] in ('tp_name', 'tp_doc', 'tp_basicsize',
'tp_itemsize', 'tp_flags'):
spec[typeslots[i]] = val
continue
res.append(' {Py_%s, %s},' % (typeslots[i], val))
res.append('};')
res.append('static PyType_Spec %s_spec = {' % name)
res.append(' %s,' % spec['tp_name'])
res.append(' %s,' % spec['tp_basicsize'])
res.append(' %s,' % spec['tp_itemsize'])
res.append(' %s,' % spec['tp_flags'])
res.append(' %s_slots,' % name)
res.append('};\n')
return '\n'.join(res)
# Main loop: replace all static PyTypeObjects until
# there are none left.
while 1:
c = classify()
m = re.search('(SW)?TWIW?=W?{.*?};', c)
if not m:
break
start = m.start()
end = m.end()
name, fields = get_fields(start, m)
tokens[start:end] = [('',make_slots(name, fields))]
# Output result to stdout
for t, v in tokens:
sys.stdout.write(v)