From 120b4ab2b68aebf96ce0de243eab89a25fc2d282 Mon Sep 17 00:00:00 2001 From: Jakub Kuczys Date: Sat, 15 Oct 2022 16:57:53 +0200 Subject: [PATCH] gh-95731: Fix module docstring extraction in pygettext (#95732) --- Lib/test/test_tools/test_i18n.py | 20 +++++++++++++++++++ ...2-08-05-23-25-59.gh-issue-95731.N2KohU.rst | 1 + Tools/i18n/pygettext.py | 7 ++++--- 3 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2022-08-05-23-25-59.gh-issue-95731.N2KohU.rst diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index 7f18edaaa8c..c083a04475e 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -155,6 +155,26 @@ class Test_pygettext(unittest.TestCase): ''')) self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + def test_moduledocstring(self): + for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'): + with self.subTest(doc): + msgids = self.extract_docstrings_from_str(dedent('''\ + %s + ''' % doc)) + self.assertIn('doc', msgids) + + def test_moduledocstring_bytes(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + b"""doc""" + ''')) + self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + + def test_moduledocstring_fstring(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + f"""doc""" + ''')) + self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + def test_msgid(self): msgids = self.extract_docstrings_from_str( '''_("""doc""" r'str' u"ing")''') diff --git a/Misc/NEWS.d/next/Tools-Demos/2022-08-05-23-25-59.gh-issue-95731.N2KohU.rst b/Misc/NEWS.d/next/Tools-Demos/2022-08-05-23-25-59.gh-issue-95731.N2KohU.rst new file mode 100644 index 00000000000..6b214616c0a --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2022-08-05-23-25-59.gh-issue-95731.N2KohU.rst @@ -0,0 +1 @@ +Fix handling of module docstrings in :file:`Tools/i18n/pygettext.py`. diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 6f889adffe6..7ada79105db 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -335,9 +335,10 @@ class TokenEater: if ttype == tokenize.STRING and is_literal_string(tstring): self.__addentry(safe_eval(tstring), lineno, isdocstring=1) self.__freshmodule = 0 - elif ttype not in (tokenize.COMMENT, tokenize.NL): - self.__freshmodule = 0 - return + return + if ttype in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING): + return + self.__freshmodule = 0 # class or func/method docstring? if ttype == tokenize.NAME and tstring in ('class', 'def'): self.__state = self.__suiteseen