From f3c65de460de58e5538bd4b9e27c0810d772e240 Mon Sep 17 00:00:00 2001 From: George Yoshida Date: Sun, 28 May 2006 16:39:09 +0000 Subject: [PATCH] Patch #1080727: add "encoding" parameter to doctest.DocFileSuite Contributed by Bjorn Tillenius. --- Doc/lib/libdoctest.tex | 17 ++++++++++-- Lib/doctest.py | 19 ++++++++++++-- Lib/test/test_doctest.py | 54 +++++++++++++++++++++++++++++++++----- Lib/test/test_doctest4.txt | 17 ++++++++++++ Misc/NEWS | 2 ++ 5 files changed, 99 insertions(+), 10 deletions(-) create mode 100644 Lib/test/test_doctest4.txt diff --git a/Doc/lib/libdoctest.tex b/Doc/lib/libdoctest.tex index 73b29ad71cd..f9a97fa6193 100644 --- a/Doc/lib/libdoctest.tex +++ b/Doc/lib/libdoctest.tex @@ -868,7 +868,7 @@ sections \ref{doctest-simple-testmod} and globs}\optional{, verbose}\optional{, report}\optional{, optionflags}\optional{, extraglobs}\optional{, raise_on_error}\optional{, - parser}} + parser}\optional{, encoding}} All arguments except \var{filename} are optional, and should be specified in keyword form. @@ -941,7 +941,13 @@ sections \ref{doctest-simple-testmod} and subclass) that should be used to extract tests from the files. It defaults to a normal parser (i.e., \code{\class{DocTestParser}()}). + Optional argument \var{encoding} specifies an encoding that should + be used to convert the file to unicode. + \versionadded{2.4} + + \versionchanged[The parameter \var{encoding} was added]{2.5} + \end{funcdesc} \begin{funcdesc}{testmod}{\optional{m}\optional{, name}\optional{, @@ -1061,7 +1067,8 @@ instances from text files and modules with doctests: \begin{funcdesc}{DocFileSuite}{\optional{module_relative}\optional{, package}\optional{, setUp}\optional{, tearDown}\optional{, globs}\optional{, - optionflags}\optional{, parser}} + optionflags}\optional{, parser}\optional{, + encoding}} Convert doctest tests from one or more text files to a \class{\refmodule{unittest}.TestSuite}. @@ -1128,11 +1135,17 @@ instances from text files and modules with doctests: subclass) that should be used to extract tests from the files. It defaults to a normal parser (i.e., \code{\class{DocTestParser}()}). + Optional argument \var{encoding} specifies an encoding that should + be used to convert the file to unicode. + \versionadded{2.4} \versionchanged[The global \code{__file__} was added to the globals provided to doctests loaded from a text file using \function{DocFileSuite()}]{2.5} + + \versionchanged[The parameter \var{encoding} was added]{2.5} + \end{funcdesc} \begin{funcdesc}{DocTestSuite}{\optional{module}\optional{, diff --git a/Lib/doctest.py b/Lib/doctest.py index 857bc1a6eb4..971ec6cc4c1 100644 --- a/Lib/doctest.py +++ b/Lib/doctest.py @@ -1869,7 +1869,8 @@ def testmod(m=None, name=None, globs=None, verbose=None, isprivate=None, def testfile(filename, module_relative=True, name=None, package=None, globs=None, verbose=None, report=True, optionflags=0, - extraglobs=None, raise_on_error=False, parser=DocTestParser()): + extraglobs=None, raise_on_error=False, parser=DocTestParser(), + encoding=None): """ Test examples in the given file. Return (#failures, #tests). @@ -1935,6 +1936,9 @@ def testfile(filename, module_relative=True, name=None, package=None, Optional keyword arg "parser" specifies a DocTestParser (or subclass) that should be used to extract tests from the files. + Optional keyword arg "encoding" specifies an encoding that should + be used to convert the file to unicode. + Advanced tomfoolery: testmod runs methods of a local instance of class doctest.Tester, then merges the results into (or creates) global Tester instance doctest.master. Methods of doctest.master @@ -1969,6 +1973,9 @@ def testfile(filename, module_relative=True, name=None, package=None, else: runner = DocTestRunner(verbose=verbose, optionflags=optionflags) + if encoding is not None: + text = text.decode(encoding) + # Read the file, convert it to a test, and run it. test = parser.get_doctest(text, globs, name, filename, 0) runner.run(test) @@ -2339,7 +2346,8 @@ class DocFileCase(DocTestCase): ) def DocFileTest(path, module_relative=True, package=None, - globs=None, parser=DocTestParser(), **options): + globs=None, parser=DocTestParser(), + encoding=None, **options): if globs is None: globs = {} else: @@ -2357,6 +2365,10 @@ def DocFileTest(path, module_relative=True, package=None, # Find the file and read it. name = os.path.basename(path) + + # If an encoding is specified, use it to convert the file to unicode + if encoding is not None: + doc = doc.decode(encoding) # Convert it to a test, and wrap it in a DocFileCase. test = parser.get_doctest(doc, globs, name, path, 0) @@ -2414,6 +2426,9 @@ def DocFileSuite(*paths, **kw): parser A DocTestParser (or subclass) that should be used to extract tests from the files. + + encoding + An encoding that will be used to convert the files to unicode. """ suite = unittest.TestSuite() diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py index 443c9624987..92d2d74eafd 100644 --- a/Lib/test/test_doctest.py +++ b/Lib/test/test_doctest.py @@ -1937,9 +1937,10 @@ def test_DocFileSuite(): >>> import unittest >>> suite = doctest.DocFileSuite('test_doctest.txt', - ... 'test_doctest2.txt') + ... 'test_doctest2.txt', + ... 'test_doctest4.txt') >>> suite.run(unittest.TestResult()) - + The test files are looked for in the directory containing the calling module. A package keyword argument can be provided to @@ -1948,9 +1949,10 @@ def test_DocFileSuite(): >>> import unittest >>> suite = doctest.DocFileSuite('test_doctest.txt', ... 'test_doctest2.txt', + ... 'test_doctest4.txt', ... package='test') >>> suite.run(unittest.TestResult()) - + '/' should be used as a path separator. It will be converted to a native separator at run time: @@ -1995,19 +1997,21 @@ def test_DocFileSuite(): >>> suite = doctest.DocFileSuite('test_doctest.txt', ... 'test_doctest2.txt', + ... 'test_doctest4.txt', ... globs={'favorite_color': 'blue'}) >>> suite.run(unittest.TestResult()) - + In this case, we supplied a missing favorite color. You can provide doctest options: >>> suite = doctest.DocFileSuite('test_doctest.txt', ... 'test_doctest2.txt', + ... 'test_doctest4.txt', ... optionflags=doctest.DONT_ACCEPT_BLANKLINE, ... globs={'favorite_color': 'blue'}) >>> suite.run(unittest.TestResult()) - + And, you can provide setUp and tearDown functions: @@ -2025,9 +2029,10 @@ def test_DocFileSuite(): >>> suite = doctest.DocFileSuite('test_doctest.txt', ... 'test_doctest2.txt', + ... 'test_doctest4.txt', ... setUp=setUp, tearDown=tearDown) >>> suite.run(unittest.TestResult()) - + But the tearDown restores sanity: @@ -2060,6 +2065,17 @@ def test_DocFileSuite(): >>> suite.run(unittest.TestResult()) + If the tests contain non-ASCII characters, we have to specify which + encoding the file is encoded with. We do so by using the `encoding` + parameter: + + >>> suite = doctest.DocFileSuite('test_doctest.txt', + ... 'test_doctest2.txt', + ... 'test_doctest4.txt', + ... encoding='utf-8') + >>> suite.run(unittest.TestResult()) + + """ def test_trailing_space_in_test(): @@ -2266,6 +2282,32 @@ debugging): Traceback (most recent call last): UnexpectedException: ... >>> doctest.master = None # Reset master. + +If the tests contain non-ASCII characters, the tests might fail, since +it's unknown which encoding is used. The encoding can be specified +using the optional keyword argument `encoding`: + + >>> doctest.testfile('test_doctest4.txt') # doctest: +ELLIPSIS + ********************************************************************** + File "...", line 7, in test_doctest4.txt + Failed example: + u'...' + Expected: + u'f\xf6\xf6' + Got: + u'f\xc3\xb6\xc3\xb6' + ********************************************************************** + ... + ********************************************************************** + 1 items had failures: + 2 of 4 in test_doctest4.txt + ***Test Failed*** 2 failures. + (2, 4) + >>> doctest.master = None # Reset master. + + >>> doctest.testfile('test_doctest4.txt', encoding='utf-8') + (0, 4) + >>> doctest.master = None # Reset master. """ # old_test1, ... used to live in doctest.py, but cluttered it. Note diff --git a/Lib/test/test_doctest4.txt b/Lib/test/test_doctest4.txt new file mode 100644 index 00000000000..a219d16a6ea --- /dev/null +++ b/Lib/test/test_doctest4.txt @@ -0,0 +1,17 @@ +This is a sample doctest in a text file that contains non-ASCII characters. +This file is encoded using UTF-8. + +In order to get this test to pass, we have to manually specify the +encoding. + + >>> u'föö' + u'f\xf6\xf6' + + >>> u'bąr' + u'b\u0105r' + + >>> 'föö' + 'f\xc3\xb6\xc3\xb6' + + >>> 'bąr' + 'b\xc4\x85r' diff --git a/Misc/NEWS b/Misc/NEWS index 8d1063d5e30..31a5af93027 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -85,6 +85,8 @@ Extension Modules Library ------- +- Patch #1080727: add "encoding" parameter to doctest.DocFileSuite. + - Patch #1281707: speed up gzip.readline. - Patch #1180296: Two new functions were added to the locale module: