RE: Pre-split the list of opcode names (GH-91859)

1. It makes them interned.
2. It allows to add comments to individual opcodes.
This commit is contained in:
Serhiy Storchaka 2022-04-23 18:49:23 +03:00 committed by GitHub
parent 92c1037afc
commit 28890427c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 64 additions and 63 deletions

View File

@ -64,88 +64,89 @@ class _NamedIntConstant(int):
MAXREPEAT = _NamedIntConstant(MAXREPEAT, 'MAXREPEAT') MAXREPEAT = _NamedIntConstant(MAXREPEAT, 'MAXREPEAT')
def _makecodes(names): def _makecodes(*names):
names = names.strip().split()
items = [_NamedIntConstant(i, name) for i, name in enumerate(names)] items = [_NamedIntConstant(i, name) for i, name in enumerate(names)]
globals().update({item.name: item for item in items}) globals().update({item.name: item for item in items})
return items return items
# operators # operators
# failure=0 success=1 (just because it looks better that way :-) OPCODES = _makecodes(
OPCODES = _makecodes(""" # failure=0 success=1 (just because it looks better that way :-)
FAILURE SUCCESS 'FAILURE', 'SUCCESS',
ANY ANY_ALL 'ANY', 'ANY_ALL',
ASSERT ASSERT_NOT 'ASSERT', 'ASSERT_NOT',
AT 'AT',
BRANCH 'BRANCH',
CALL 'CALL',
CATEGORY 'CATEGORY',
CHARSET BIGCHARSET 'CHARSET', 'BIGCHARSET',
GROUPREF GROUPREF_EXISTS 'GROUPREF', 'GROUPREF_EXISTS',
IN 'IN',
INFO 'INFO',
JUMP 'JUMP',
LITERAL 'LITERAL',
MARK 'MARK',
MAX_UNTIL 'MAX_UNTIL',
MIN_UNTIL 'MIN_UNTIL',
NOT_LITERAL 'NOT_LITERAL',
NEGATE 'NEGATE',
RANGE 'RANGE',
REPEAT 'REPEAT',
REPEAT_ONE 'REPEAT_ONE',
SUBPATTERN 'SUBPATTERN',
MIN_REPEAT_ONE 'MIN_REPEAT_ONE',
ATOMIC_GROUP 'ATOMIC_GROUP',
POSSESSIVE_REPEAT 'POSSESSIVE_REPEAT',
POSSESSIVE_REPEAT_ONE 'POSSESSIVE_REPEAT_ONE',
GROUPREF_IGNORE 'GROUPREF_IGNORE',
IN_IGNORE 'IN_IGNORE',
LITERAL_IGNORE 'LITERAL_IGNORE',
NOT_LITERAL_IGNORE 'NOT_LITERAL_IGNORE',
GROUPREF_LOC_IGNORE 'GROUPREF_LOC_IGNORE',
IN_LOC_IGNORE 'IN_LOC_IGNORE',
LITERAL_LOC_IGNORE 'LITERAL_LOC_IGNORE',
NOT_LITERAL_LOC_IGNORE 'NOT_LITERAL_LOC_IGNORE',
GROUPREF_UNI_IGNORE 'GROUPREF_UNI_IGNORE',
IN_UNI_IGNORE 'IN_UNI_IGNORE',
LITERAL_UNI_IGNORE 'LITERAL_UNI_IGNORE',
NOT_LITERAL_UNI_IGNORE 'NOT_LITERAL_UNI_IGNORE',
RANGE_UNI_IGNORE 'RANGE_UNI_IGNORE',
MIN_REPEAT MAX_REPEAT # The following opcodes are only occurred in the parser output,
""") # but not in the compiled code.
'MIN_REPEAT', 'MAX_REPEAT',
)
del OPCODES[-2:] # remove MIN_REPEAT and MAX_REPEAT del OPCODES[-2:] # remove MIN_REPEAT and MAX_REPEAT
# positions # positions
ATCODES = _makecodes(""" ATCODES = _makecodes(
AT_BEGINNING AT_BEGINNING_LINE AT_BEGINNING_STRING 'AT_BEGINNING', 'AT_BEGINNING_LINE', 'AT_BEGINNING_STRING',
AT_BOUNDARY AT_NON_BOUNDARY 'AT_BOUNDARY', 'AT_NON_BOUNDARY',
AT_END AT_END_LINE AT_END_STRING 'AT_END', 'AT_END_LINE', 'AT_END_STRING',
AT_LOC_BOUNDARY AT_LOC_NON_BOUNDARY 'AT_LOC_BOUNDARY', 'AT_LOC_NON_BOUNDARY',
AT_UNI_BOUNDARY AT_UNI_NON_BOUNDARY 'AT_UNI_BOUNDARY', 'AT_UNI_NON_BOUNDARY',
""") )
# categories # categories
CHCODES = _makecodes(""" CHCODES = _makecodes(
CATEGORY_DIGIT CATEGORY_NOT_DIGIT 'CATEGORY_DIGIT', 'CATEGORY_NOT_DIGIT',
CATEGORY_SPACE CATEGORY_NOT_SPACE 'CATEGORY_SPACE', 'CATEGORY_NOT_SPACE',
CATEGORY_WORD CATEGORY_NOT_WORD 'CATEGORY_WORD', 'CATEGORY_NOT_WORD',
CATEGORY_LINEBREAK CATEGORY_NOT_LINEBREAK 'CATEGORY_LINEBREAK', 'CATEGORY_NOT_LINEBREAK',
CATEGORY_LOC_WORD CATEGORY_LOC_NOT_WORD 'CATEGORY_LOC_WORD', 'CATEGORY_LOC_NOT_WORD',
CATEGORY_UNI_DIGIT CATEGORY_UNI_NOT_DIGIT 'CATEGORY_UNI_DIGIT', 'CATEGORY_UNI_NOT_DIGIT',
CATEGORY_UNI_SPACE CATEGORY_UNI_NOT_SPACE 'CATEGORY_UNI_SPACE', 'CATEGORY_UNI_NOT_SPACE',
CATEGORY_UNI_WORD CATEGORY_UNI_NOT_WORD 'CATEGORY_UNI_WORD', 'CATEGORY_UNI_NOT_WORD',
CATEGORY_UNI_LINEBREAK CATEGORY_UNI_NOT_LINEBREAK 'CATEGORY_UNI_LINEBREAK', 'CATEGORY_UNI_NOT_LINEBREAK',
""") )
# replacement operations for "ignore case" mode # replacement operations for "ignore case" mode