bpo-32940: IDLE: Simplify StringTranslatePseudoMapping in pyparse (GH-5862)

The new code also runs faster.
This commit is contained in:
Cheryl Sabella 2018-02-28 17:23:58 -05:00 committed by Terry Jan Reedy
parent 45ab51c142
commit f0daa880a4
3 changed files with 30 additions and 87 deletions

View File

@ -8,44 +8,20 @@ import unittest
from idlelib import pyparse from idlelib import pyparse
class StringTranslatePseudoMappingTest(unittest.TestCase): class ParseMapTest(unittest.TestCase):
@classmethod def test_parsemap(self):
def setUpClass(cls): keepwhite = {ord(c): ord(c) for c in ' \t\n\r'}
whitespace_chars = ' \t\n\r' mapping = pyparse.ParseMap(keepwhite)
cls.preserve_dict = {ord(c): ord(c) for c in whitespace_chars} self.assertEqual(mapping[ord('\t')], ord('\t'))
cls.default = ord('x') self.assertEqual(mapping[ord('a')], ord('x'))
cls.mapping = pyparse.StringTranslatePseudoMapping( self.assertEqual(mapping[1000], ord('x'))
cls.preserve_dict, default_value=ord('x'))
@classmethod def test_trans(self):
def tearDownClass(cls): # trans is the production instance of ParseMap, used in _study1
del cls.preserve_dict, cls.default, cls.mapping parser = pyparse.Parser(4, 4)
self.assertEqual('\t a([{b}])b"c\'d\n'.translate(pyparse.trans),
def test__init__(self): 'xxx(((x)))x"x\'x\n')
m = self.mapping
self.assertEqual(m._non_defaults, self.preserve_dict)
self.assertEqual(m._default_value, self.default)
def test__get_item__(self):
self.assertEqual(self.mapping[ord('\t')], ord('\t'))
self.assertEqual(self.mapping[ord('a')], self.default)
def test__len__(self):
self.assertEqual(len(self.mapping), len(self.preserve_dict))
def test__iter__(self):
count = 0
for key, value in self.mapping.items():
self.assertIn(key, self.preserve_dict)
count += 1
self.assertEqual(count, len(self.mapping))
def test_get(self):
self.assertEqual(self.mapping.get(ord('\t')), ord('\t'))
self.assertEqual(self.mapping.get('a'), self.default)
# Default is a parameter, but it isn't used.
self.assertEqual(self.mapping.get('a', default=500), self.default)
class PyParseTest(unittest.TestCase): class PyParseTest(unittest.TestCase):
@ -152,10 +128,6 @@ class PyParseTest(unittest.TestCase):
p.set_lo(44) p.set_lo(44)
self.assertEqual(p.code, code[44:]) self.assertEqual(p.code, code[44:])
def test_tran(self):
self.assertEqual('\t a([{b}])b"c\'d\n'.translate(self.parser._tran),
'xxx(((x)))x"x\'x\n')
def test_study1(self): def test_study1(self):
eq = self.assertEqual eq = self.assertEqual
p = self.parser p = self.parser

View File

@ -1,6 +1,6 @@
"""Define partial Python code Parser used by editor and hyperparser. """Define partial Python code Parser used by editor and hyperparser.
Instances of StringTranslatePseudoMapping are used with str.translate. Instances of ParseMap are used with str.translate.
The following bound search and match functions are defined: The following bound search and match functions are defined:
_synchre - start of popular statement; _synchre - start of popular statement;
@ -10,7 +10,6 @@ _itemre - line that may have bracket structure start;
_closere - line that must be followed by dedent. _closere - line that must be followed by dedent.
_chew_ordinaryre - non-special characters. _chew_ordinaryre - non-special characters.
""" """
from collections.abc import Mapping
import re import re
import sys import sys
@ -101,46 +100,27 @@ _chew_ordinaryre = re.compile(r"""
""", re.VERBOSE).match """, re.VERBOSE).match
class StringTranslatePseudoMapping(Mapping): class ParseMap(dict):
r"""Utility class to be used with str.translate() r"""Dict subclass that maps anything not in dict to 'x'.
This Mapping class wraps a given dict. When a value for a key is This is designed to be used with str.translate in study1.
requested via __getitem__() or get(), the key is looked up in the Anything not specifically mapped otherwise becomes 'x'.
given dict. If found there, the value from the dict is returned. Example: replace everything except whitespace with 'x'.
Otherwise, the default value given upon initialization is returned.
This allows using str.translate() to make some replacements, and to >>> keepwhite = ParseMap((ord(c), ord(c)) for c in ' \t\n\r')
replace all characters for which no replacement was specified with >>> "a + b\tc\nd".translate(keepwhite)
a given character instead of leaving them as-is.
For example, to replace everything except whitespace with 'x':
>>> whitespace_chars = ' \t\n\r'
>>> preserve_dict = {ord(c): ord(c) for c in whitespace_chars}
>>> mapping = StringTranslatePseudoMapping(preserve_dict, ord('x'))
>>> text = "a + b\tc\nd"
>>> text.translate(mapping)
'x x x\tx\nx' 'x x x\tx\nx'
""" """
def __init__(self, non_defaults, default_value): # Calling this triples access time; see bpo-32940
self._non_defaults = non_defaults def __missing__(self, key):
self._default_value = default_value return 120 # ord('x')
def _get(key, _get=non_defaults.get, _default=default_value):
return _get(key, _default)
self._get = _get
def __getitem__(self, item): # Map all ascii to 120 to avoid __missing__ call, then replace some.
return self._get(item) trans = ParseMap.fromkeys(range(128), 120)
trans.update((ord(c), ord('(')) for c in "({[") # open brackets => '(';
def __len__(self): trans.update((ord(c), ord(')')) for c in ")}]") # close brackets => ')'.
return len(self._non_defaults) trans.update((ord(c), ord(c)) for c in "\"'\\\n#") # Keep these.
def __iter__(self):
return iter(self._non_defaults)
def get(self, key, default=None):
return self._get(key)
class Parser: class Parser:
@ -224,16 +204,6 @@ class Parser:
if lo > 0: if lo > 0:
self.code = self.code[lo:] self.code = self.code[lo:]
# Build a translation table to map uninteresting chars to 'x', open
# brackets to '(', close brackets to ')' while preserving quotes,
# backslashes, newlines and hashes. This is to be passed to
# str.translate() in _study1().
_tran = {}
_tran.update((ord(c), ord('(')) for c in "({[")
_tran.update((ord(c), ord(')')) for c in ")}]")
_tran.update((ord(c), ord(c)) for c in "\"'\\\n#")
_tran = StringTranslatePseudoMapping(_tran, default_value=ord('x'))
def _study1(self): def _study1(self):
"""Find the line numbers of non-continuation lines. """Find the line numbers of non-continuation lines.
@ -250,7 +220,7 @@ class Parser:
# uninteresting characters. This can cut the number of chars # uninteresting characters. This can cut the number of chars
# by a factor of 10-40, and so greatly speed the following loop. # by a factor of 10-40, and so greatly speed the following loop.
code = self.code code = self.code
code = code.translate(self._tran) code = code.translate(trans)
code = code.replace('xxxxxxxx', 'x') code = code.replace('xxxxxxxx', 'x')
code = code.replace('xxxx', 'x') code = code.replace('xxxx', 'x')
code = code.replace('xx', 'x') code = code.replace('xx', 'x')

View File

@ -0,0 +1 @@
Simplify and rename StringTranslatePseudoMapping in pyparse.