diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index 56a27342989..8b2b90d6142 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -3,7 +3,7 @@ import os import sys import unittest -import textwrap +from textwrap import dedent from test.support.script_helper import assert_python_ok from test.test_tools import skip_if_missing, toolsdir @@ -109,9 +109,68 @@ class Test_pygettext(unittest.TestCase): # This will raise if the date format does not exactly match. datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z') + def test_funcdocstring(self): + for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'): + with self.subTest(doc): + msgids = self.extract_docstrings_from_str(dedent('''\ + def foo(bar): + %s + ''' % doc)) + self.assertIn('doc', msgids) + + def test_funcdocstring_bytes(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + def foo(bar): + b"""doc""" + ''')) + self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + + def test_funcdocstring_fstring(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + def foo(bar): + f"""doc""" + ''')) + self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + + def test_classdocstring(self): + for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'): + with self.subTest(doc): + msgids = self.extract_docstrings_from_str(dedent('''\ + class C: + %s + ''' % doc)) + self.assertIn('doc', msgids) + + def test_classdocstring_bytes(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + class C: + b"""doc""" + ''')) + self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + + def test_classdocstring_fstring(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + class C: + f"""doc""" + ''')) + self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + + def test_msgid(self): + msgids = self.extract_docstrings_from_str( + '''_("""doc""" r'str' u"ing")''') + self.assertIn('docstring', msgids) + + def test_msgid_bytes(self): + msgids = self.extract_docstrings_from_str('_(b"""doc""")') + self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + + def test_msgid_fstring(self): + msgids = self.extract_docstrings_from_str('_(f"""doc""")') + self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + def test_funcdocstring_annotated_args(self): """ Test docstrings for functions with annotated args """ - msgids = self.extract_docstrings_from_str(textwrap.dedent('''\ + msgids = self.extract_docstrings_from_str(dedent('''\ def foo(bar: str): """doc""" ''')) @@ -119,7 +178,7 @@ class Test_pygettext(unittest.TestCase): def test_funcdocstring_annotated_return(self): """ Test docstrings for functions with annotated return type """ - msgids = self.extract_docstrings_from_str(textwrap.dedent('''\ + msgids = self.extract_docstrings_from_str(dedent('''\ def foo(bar) -> str: """doc""" ''')) @@ -127,7 +186,7 @@ class Test_pygettext(unittest.TestCase): def test_funcdocstring_defvalue_args(self): """ Test docstring for functions with default arg values """ - msgids = self.extract_docstrings_from_str(textwrap.dedent('''\ + msgids = self.extract_docstrings_from_str(dedent('''\ def foo(bar=()): """doc""" ''')) @@ -137,7 +196,7 @@ class Test_pygettext(unittest.TestCase): """ Test docstring extraction for multiple functions combining annotated args, annotated return types and default arg values """ - msgids = self.extract_docstrings_from_str(textwrap.dedent('''\ + msgids = self.extract_docstrings_from_str(dedent('''\ def foo1(bar: tuple=()) -> str: """doc1""" @@ -155,7 +214,7 @@ class Test_pygettext(unittest.TestCase): """ Test docstring extraction for a class with colons occuring within the parentheses. """ - msgids = self.extract_docstrings_from_str(textwrap.dedent('''\ + msgids = self.extract_docstrings_from_str(dedent('''\ class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)): """doc""" ''')) diff --git a/Misc/NEWS.d/next/Tools-Demos/2018-04-03-18-10-00.bpo-33189.QrXR00.rst b/Misc/NEWS.d/next/Tools-Demos/2018-04-03-18-10-00.bpo-33189.QrXR00.rst new file mode 100644 index 00000000000..4d4137240e6 --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2018-04-03-18-10-00.bpo-33189.QrXR00.rst @@ -0,0 +1,2 @@ +:program:`pygettext.py` now recognizes only literal strings as docstrings +and translatable strings, and rejects bytes literals and f-string expressions. diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 13d7a649aec..b46dd339736 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -232,6 +232,10 @@ def escape_nonascii(s, encoding): return ''.join(escapes[b] for b in s.encode(encoding)) +def is_literal_string(s): + return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"') + + def safe_eval(s): # unwrap quotes, safely return eval(s, {'__builtins__':{}}, {}) @@ -317,8 +321,8 @@ class TokenEater: def __call__(self, ttype, tstring, stup, etup, line): # dispatch ## import token -## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \ -## 'tstring:', tstring +## print('ttype:', token.tok_name[ttype], 'tstring:', tstring, +## file=sys.stderr) self.__state(ttype, tstring, stup[0]) def __waiting(self, ttype, tstring, lineno): @@ -327,7 +331,7 @@ class TokenEater: if opts.docstrings and not opts.nodocstrings.get(self.__curfile): # module docstring? if self.__freshmodule: - if ttype == tokenize.STRING: + if ttype == tokenize.STRING and is_literal_string(tstring): self.__addentry(safe_eval(tstring), lineno, isdocstring=1) self.__freshmodule = 0 elif ttype not in (tokenize.COMMENT, tokenize.NL): @@ -353,7 +357,7 @@ class TokenEater: def __suitedocstring(self, ttype, tstring, lineno): # ignore any intervening noise - if ttype == tokenize.STRING: + if ttype == tokenize.STRING and is_literal_string(tstring): self.__addentry(safe_eval(tstring), lineno, isdocstring=1) self.__state = self.__waiting elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, @@ -378,7 +382,7 @@ class TokenEater: if self.__data: self.__addentry(EMPTYSTRING.join(self.__data)) self.__state = self.__waiting - elif ttype == tokenize.STRING: + elif ttype == tokenize.STRING and is_literal_string(tstring): self.__data.append(safe_eval(tstring)) elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]: