gh-104400: Add more tests to pygettext (GH-108173)

This commit is contained in:
Tomas R. 2024-11-03 15:01:09 +01:00 committed by GitHub
parent 556dc9b8a7
commit dcae5cd6ab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 363 additions and 21 deletions

View File

@ -0,0 +1,40 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR ORGANIZATION
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"POT-Creation-Date: 2000-01-01 00:00+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"
#: docstrings.py:7
#, docstring
msgid ""
msgstr ""
#: docstrings.py:18
#, docstring
msgid ""
"multiline\n"
" docstring\n"
" "
msgstr ""
#: docstrings.py:25
#, docstring
msgid "docstring1"
msgstr ""
#: docstrings.py:30
#, docstring
msgid "Hello, {}!"
msgstr ""

View File

@ -0,0 +1,41 @@
# Test docstring extraction
from gettext import gettext as _
# Empty docstring
def test(x):
""""""
# Leading empty line
def test2(x):
"""docstring""" # XXX This should be extracted but isn't.
# XXX Multiline docstrings should be cleaned with `inspect.cleandoc`.
def test3(x):
"""multiline
docstring
"""
# Multiple docstrings - only the first should be extracted
def test4(x):
"""docstring1"""
"""docstring2"""
def test5(x):
"""Hello, {}!""".format("world!") # XXX This should not be extracted.
# Nested docstrings
def test6(x):
def inner(y):
"""nested docstring""" # XXX This should be extracted but isn't.
class Outer:
class Inner:
"nested class docstring" # XXX This should be extracted but isn't.

View File

@ -0,0 +1,35 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR ORGANIZATION
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"POT-Creation-Date: 2000-01-01 00:00+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"
#: fileloc.py:5 fileloc.py:6
msgid "foo"
msgstr ""
#: fileloc.py:9
msgid "bar"
msgstr ""
#: fileloc.py:14 fileloc.py:18
#, docstring
msgid "docstring"
msgstr ""
#: fileloc.py:22 fileloc.py:26
#, docstring
msgid "baz"
msgstr ""

View File

@ -0,0 +1,26 @@
# Test file locations
from gettext import gettext as _
# Duplicate strings
_('foo')
_('foo')
# Duplicate strings on the same line should only add one location to the output
_('bar'), _('bar')
# Duplicate docstrings
class A:
"""docstring"""
def f():
"""docstring"""
# Duplicate message and docstring
_('baz')
def g():
"""baz"""

View File

@ -0,0 +1,67 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR ORGANIZATION
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"POT-Creation-Date: 2000-01-01 00:00+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"
#: messages.py:5
msgid ""
msgstr ""
#: messages.py:8 messages.py:9
msgid "parentheses"
msgstr ""
#: messages.py:12
msgid "Hello, world!"
msgstr ""
#: messages.py:15
msgid ""
"Hello,\n"
" multiline!\n"
msgstr ""
#: messages.py:29
msgid "Hello, {}!"
msgstr ""
#: messages.py:33
msgid "1"
msgstr ""
#: messages.py:33
msgid "2"
msgstr ""
#: messages.py:34 messages.py:35
msgid "A"
msgstr ""
#: messages.py:34 messages.py:35
msgid "B"
msgstr ""
#: messages.py:36
msgid "set"
msgstr ""
#: messages.py:42
msgid "nested string"
msgstr ""
#: messages.py:47
msgid "baz"
msgstr ""

View File

@ -0,0 +1,64 @@
# Test message extraction
from gettext import gettext as _
# Empty string
_("")
# Extra parentheses
(_("parentheses"))
((_("parentheses")))
# Multiline strings
_("Hello, "
"world!")
_("""Hello,
multiline!
""")
# Invalid arguments
_()
_(None)
_(1)
_(False)
_(x="kwargs are not allowed")
_("foo", "bar")
_("something", x="something else")
# .format()
_("Hello, {}!").format("world") # valid
_("Hello, {}!".format("world")) # invalid
# Nested structures
_("1"), _("2")
arr = [_("A"), _("B")]
obj = {'a': _("A"), 'b': _("B")}
{{{_('set')}}}
# Nested functions and classes
def test():
_("nested string") # XXX This should be extracted but isn't.
[_("nested string")]
class Foo:
def bar(self):
return _("baz")
def bar(x=_('default value')): # XXX This should be extracted but isn't.
pass
def baz(x=[_('default value')]): # XXX This should be extracted but isn't.
pass
# Shadowing _()
def _(x):
pass
def _(x="don't extract me"):
pass

View File

@ -1,9 +1,11 @@
"""Tests to cover the Tools/i18n package"""
import os
import re
import sys
import unittest
from textwrap import dedent
from pathlib import Path
from test.support.script_helper import assert_python_ok
from test.test_tools import skip_if_missing, toolsdir
@ -12,20 +14,47 @@ from test.support.os_helper import temp_cwd, temp_dir
skip_if_missing()
DATA_DIR = Path(__file__).resolve().parent / 'i18n_data'
def normalize_POT_file(pot):
"""Normalize the POT creation timestamp, charset and
file locations to make the POT file easier to compare.
"""
# Normalize the creation date.
date_pattern = re.compile(r'"POT-Creation-Date: .+?\\n"')
header = r'"POT-Creation-Date: 2000-01-01 00:00+0000\\n"'
pot = re.sub(date_pattern, header, pot)
# Normalize charset to UTF-8 (currently there's no way to specify the output charset).
charset_pattern = re.compile(r'"Content-Type: text/plain; charset=.+?\\n"')
charset = r'"Content-Type: text/plain; charset=UTF-8\\n"'
pot = re.sub(charset_pattern, charset, pot)
# Normalize file location path separators in case this test is
# running on Windows (which uses '\').
fileloc_pattern = re.compile(r'#:.+')
def replace(match):
return match[0].replace(os.sep, "/")
pot = re.sub(fileloc_pattern, replace, pot)
return pot
class Test_pygettext(unittest.TestCase):
"""Tests for the pygettext.py tool"""
script = os.path.join(toolsdir,'i18n', 'pygettext.py')
script = Path(toolsdir, 'i18n', 'pygettext.py')
def get_header(self, data):
""" utility: return the header of a .po file as a dictionary """
headers = {}
for line in data.split('\n'):
if not line or line.startswith(('#', 'msgid','msgstr')):
if not line or line.startswith(('#', 'msgid', 'msgstr')):
continue
line = line.strip('"')
key, val = line.split(':',1)
key, val = line.split(':', 1)
headers[key] = val.strip()
return headers
@ -53,13 +82,18 @@ class Test_pygettext(unittest.TestCase):
return msgids
def assert_POT_equal(self, expected, actual):
"""Check if two POT files are equal"""
self.maxDiff = None
self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual))
def extract_docstrings_from_str(self, module_content):
""" utility: return all msgids extracted from module_content """
filename = 'test_docstrings.py'
with temp_cwd(None) as cwd:
with open(filename, 'w', encoding='utf-8') as fp:
fp.write(module_content)
assert_python_ok(self.script, '-D', filename)
assert_python_ok('-Xutf8', self.script, '-D', filename)
with open('messages.pot', encoding='utf-8') as fp:
data = fp.read()
return self.get_msgids(data)
@ -69,7 +103,7 @@ class Test_pygettext(unittest.TestCase):
http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry
"""
with temp_cwd(None) as cwd:
assert_python_ok(self.script)
assert_python_ok('-Xutf8', self.script)
with open('messages.pot', encoding='utf-8') as fp:
data = fp.read()
header = self.get_header(data)
@ -96,7 +130,7 @@ class Test_pygettext(unittest.TestCase):
""" Match the date format from xgettext for POT-Creation-Date """
from datetime import datetime
with temp_cwd(None) as cwd:
assert_python_ok(self.script)
assert_python_ok('-Xutf8', self.script)
with open('messages.pot', encoding='utf-8') as fp:
data = fp.read()
header = self.get_header(data)
@ -310,6 +344,20 @@ class Test_pygettext(unittest.TestCase):
self.assertNotIn('foo', msgids)
self.assertIn('bar', msgids)
def test_pygettext_output(self):
"""Test that the pygettext output exactly matches snapshots."""
for input_file in DATA_DIR.glob('*.py'):
output_file = input_file.with_suffix('.pot')
with self.subTest(input_file=f'i18n_data/{input_file}'):
contents = input_file.read_text(encoding='utf-8')
with temp_cwd(None):
Path(input_file.name).write_text(contents)
assert_python_ok('-Xutf8', self.script, '--docstrings', input_file.name)
output = Path('messages.pot').read_text(encoding='utf-8')
expected = output_file.read_text(encoding='utf-8')
self.assert_POT_equal(expected, output)
def test_files_list(self):
"""Make sure the directories are inspected for source files
bpo-31920
@ -318,21 +366,41 @@ class Test_pygettext(unittest.TestCase):
text2 = 'Text to translate2'
text3 = 'Text to ignore'
with temp_cwd(None), temp_dir(None) as sdir:
os.mkdir(os.path.join(sdir, 'pypkg'))
with open(os.path.join(sdir, 'pypkg', 'pymod.py'), 'w',
encoding='utf-8') as sfile:
sfile.write(f'_({text1!r})')
os.mkdir(os.path.join(sdir, 'pkg.py'))
with open(os.path.join(sdir, 'pkg.py', 'pymod2.py'), 'w',
encoding='utf-8') as sfile:
sfile.write(f'_({text2!r})')
os.mkdir(os.path.join(sdir, 'CVS'))
with open(os.path.join(sdir, 'CVS', 'pymod3.py'), 'w',
encoding='utf-8') as sfile:
sfile.write(f'_({text3!r})')
assert_python_ok(self.script, sdir)
with open('messages.pot', encoding='utf-8') as fp:
data = fp.read()
pymod = Path(sdir, 'pypkg', 'pymod.py')
pymod.parent.mkdir()
pymod.write_text(f'_({text1!r})', encoding='utf-8')
pymod2 = Path(sdir, 'pkg.py', 'pymod2.py')
pymod2.parent.mkdir()
pymod2.write_text(f'_({text2!r})', encoding='utf-8')
pymod3 = Path(sdir, 'CVS', 'pymod3.py')
pymod3.parent.mkdir()
pymod3.write_text(f'_({text3!r})', encoding='utf-8')
assert_python_ok('-Xutf8', self.script, sdir)
data = Path('messages.pot').read_text(encoding='utf-8')
self.assertIn(f'msgid "{text1}"', data)
self.assertIn(f'msgid "{text2}"', data)
self.assertNotIn(text3, data)
def update_POT_snapshots():
for input_file in DATA_DIR.glob('*.py'):
output_file = input_file.with_suffix('.pot')
contents = input_file.read_bytes()
with temp_cwd(None):
Path(input_file.name).write_bytes(contents)
assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', input_file.name)
output = Path('messages.pot').read_text(encoding='utf-8')
output = normalize_POT_file(output)
output_file.write_text(output, encoding='utf-8')
if __name__ == '__main__':
# To regenerate POT files
if len(sys.argv) > 1 and sys.argv[1] == '--snapshot-update':
update_POT_snapshots()
sys.exit(0)
unittest.main()

View File

@ -2545,6 +2545,7 @@ TESTSUBDIRS= idlelib/idle_test \
test/test_tomllib/data/valid/dates-and-times \
test/test_tomllib/data/valid/multiline-basic-str \
test/test_tools \
test/test_tools/i18n_data \
test/test_ttk \
test/test_unittest \
test/test_unittest/namespace_test_pkg \