gh-126807: pygettext: Do not attempt to extract messages from function definitions. (GH-126808)

Fixes a bug where pygettext would attempt
to extract a message from a code like this:

def _(x): pass

This is because pygettext only looks at one
token at a time and '_(x)' looks like a
function call.

However, since 'x' is not a string literal,
it would erroneously issue a warning.
This commit is contained in:
Tomas R. 2024-11-14 23:17:42 +01:00 committed by GitHub
parent cae9d9d20f
commit 9a456383be
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 36 additions and 5 deletions

View File

@ -87,17 +87,23 @@ class Test_pygettext(unittest.TestCase):
self.maxDiff = None self.maxDiff = None
self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual)) self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual))
def extract_docstrings_from_str(self, module_content): def extract_from_str(self, module_content, *, args=(), strict=True):
""" utility: return all msgids extracted from module_content """ """Return all msgids extracted from module_content."""
filename = 'test_docstrings.py' filename = 'test.py'
with temp_cwd(None) as cwd: with temp_cwd(None):
with open(filename, 'w', encoding='utf-8') as fp: with open(filename, 'w', encoding='utf-8') as fp:
fp.write(module_content) fp.write(module_content)
assert_python_ok('-Xutf8', self.script, '-D', filename) res = assert_python_ok('-Xutf8', self.script, *args, filename)
if strict:
self.assertEqual(res.err, b'')
with open('messages.pot', encoding='utf-8') as fp: with open('messages.pot', encoding='utf-8') as fp:
data = fp.read() data = fp.read()
return self.get_msgids(data) return self.get_msgids(data)
def extract_docstrings_from_str(self, module_content):
"""Return all docstrings extracted from module_content."""
return self.extract_from_str(module_content, args=('--docstrings',), strict=False)
def test_header(self): def test_header(self):
"""Make sure the required fields are in the header, according to: """Make sure the required fields are in the header, according to:
http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry
@ -344,6 +350,23 @@ class Test_pygettext(unittest.TestCase):
self.assertNotIn('foo', msgids) self.assertNotIn('foo', msgids)
self.assertIn('bar', msgids) self.assertIn('bar', msgids)
def test_function_and_class_names(self):
"""Test that function and class names are not mistakenly extracted."""
msgids = self.extract_from_str(dedent('''\
def _(x):
pass
def _(x="foo"):
pass
async def _(x):
pass
class _(object):
pass
'''))
self.assertEqual(msgids, [''])
def test_pygettext_output(self): def test_pygettext_output(self):
"""Test that the pygettext output exactly matches snapshots.""" """Test that the pygettext output exactly matches snapshots."""
for input_file in DATA_DIR.glob('*.py'): for input_file in DATA_DIR.glob('*.py'):

View File

@ -0,0 +1,2 @@
Fix extraction warnings in :program:`pygettext.py` caused by mistaking
function definitions for function calls.

View File

@ -341,6 +341,9 @@ class TokenEater:
if ttype == tokenize.NAME and tstring in ('class', 'def'): if ttype == tokenize.NAME and tstring in ('class', 'def'):
self.__state = self.__suiteseen self.__state = self.__suiteseen
return return
if ttype == tokenize.NAME and tstring in ('class', 'def'):
self.__state = self.__ignorenext
return
if ttype == tokenize.NAME and tstring in opts.keywords: if ttype == tokenize.NAME and tstring in opts.keywords:
self.__state = self.__keywordseen self.__state = self.__keywordseen
return return
@ -448,6 +451,9 @@ class TokenEater:
}, file=sys.stderr) }, file=sys.stderr)
self.__state = self.__waiting self.__state = self.__waiting
def __ignorenext(self, ttype, tstring, lineno):
self.__state = self.__waiting
def __addentry(self, msg, lineno=None, isdocstring=0): def __addentry(self, msg, lineno=None, isdocstring=0):
if lineno is None: if lineno is None:
lineno = self.__lineno lineno = self.__lineno