cpython/Lib/test/test_importlib/source/test_source_encoding.py

from . import util as source_util

from importlib import _bootstrap
import codecs
import re
import sys
# Because sys.path gets essentially blanked, need to have unicodedata already
# imported for the parser to use.
import unicodedata
import unittest


CODING_RE = re.compile(r'coding[:=]\s*([-\w.]+)')


class EncodingTest(unittest.TestCase):

    """PEP 3120 makes UTF-8 the default encoding for source code
    [default encoding].

    PEP 263 specifies how that can change on a per-file basis. Either the first
    or second line can contain the encoding line [encoding first line]
    encoding second line]. If the file has the BOM marker it is considered UTF-8
    implicitly [BOM]. If any encoding is specified it must be UTF-8, else it is
    an error [BOM and utf-8][BOM conflict].

    """

    variable = '\u00fc'
    character = '\u00c9'
    source_line = "{0} = '{1}'\n".format(variable, character)
    module_name = '_temp'

    def run_test(self, source):
        with source_util.create_modules(self.module_name) as mapping:
            with open(mapping[self.module_name], 'wb') as file:
                file.write(source)
            loader = _bootstrap.SourceFileLoader(self.module_name,
                                                  mapping[self.module_name])
            return loader.load_module(self.module_name)

    def create_source(self, encoding):
        encoding_line = "# coding={0}".format(encoding)
        assert CODING_RE.search(encoding_line)
        source_lines = [encoding_line.encode('utf-8')]
        source_lines.append(self.source_line.encode(encoding))
        return b'\n'.join(source_lines)

    def test_non_obvious_encoding(self):
        # Make sure that an encoding that has never been a standard one for
        # Python works.
        encoding_line = "# coding=koi8-r"
        assert CODING_RE.search(encoding_line)
        source = "{0}\na=42\n".format(encoding_line).encode("koi8-r")
        self.run_test(source)

    # [default encoding]
    def test_default_encoding(self):
        self.run_test(self.source_line.encode('utf-8'))

    # [encoding first line]
    def test_encoding_on_first_line(self):
        encoding = 'Latin-1'
        source = self.create_source(encoding)
        self.run_test(source)

    # [encoding second line]
    def test_encoding_on_second_line(self):
        source = b"#/usr/bin/python\n" + self.create_source('Latin-1')
        self.run_test(source)

    # [BOM]
    def test_bom(self):
        self.run_test(codecs.BOM_UTF8 + self.source_line.encode('utf-8'))

    # [BOM and utf-8]
    def test_bom_and_utf_8(self):
        source = codecs.BOM_UTF8 + self.create_source('utf-8')
        self.run_test(source)

    # [BOM conflict]
    def test_bom_conflict(self):
        source = codecs.BOM_UTF8 + self.create_source('latin-1')
        with self.assertRaises(SyntaxError):
            self.run_test(source)


class LineEndingTest(unittest.TestCase):

    r"""Source written with the three types of line endings (\n, \r\n, \r)
    need to be readable [cr][crlf][lf]."""

    def run_test(self, line_ending):
        module_name = '_temp'
        source_lines = [b"a = 42", b"b = -13", b'']
        source = line_ending.join(source_lines)
        with source_util.create_modules(module_name) as mapping:
            with open(mapping[module_name], 'wb') as file:
                file.write(source)
            loader = _bootstrap.SourceFileLoader(module_name,
                                                 mapping[module_name])
            return loader.load_module(module_name)

    # [cr]
    def test_cr(self):
        self.run_test(b'\r')

    # [crlf]
    def test_crlf(self):
        self.run_test(b'\r\n')

    # [lf]
    def test_lf(self):
        self.run_test(b'\n')


def test_main():
    from test.support import run_unittest
    run_unittest(EncodingTest, LineEndingTest)


if __name__ == '__main__':
    test_main()
Split out support code that is specific to source tests out of importlib.test.support to importlib.test.source.util. 2009-01-31 23:08:31 -04:00			`from . import util as source_util`
Add initial implementation of importlib. See the NOTES files for what is planned for the package. There are no docs yet, but they are coming once the API for the first new function, importlib.import_module() is finalized. 2009-01-17 20:24:28 -04:00
Issue #2377: Make importlib the implementation of __import__(). importlib._bootstrap is now frozen into Python/importlib.h and stored as _frozen_importlib in sys.modules. Py_Initialize() loads the frozen code along with sys and imp and then uses _frozen_importlib._install() to set builtins.__import__() w/ _frozen_importlib.__import__(). 2012-04-14 15:10:13 -03:00			`from importlib import _bootstrap`
Add initial implementation of importlib. See the NOTES files for what is planned for the package. There are no docs yet, but they are coming once the API for the first new function, importlib.import_module() is finalized. 2009-01-17 20:24:28 -04:00			`import codecs`
			`import re`
			`import sys`
			`# Because sys.path gets essentially blanked, need to have unicodedata already`
			`# imported for the parser to use.`
			`import unicodedata`
			`import unittest`


			`CODING_RE = re.compile(r'coding[:=]\s*([-\w.]+)')`


			`class EncodingTest(unittest.TestCase):`

			`"""PEP 3120 makes UTF-8 the default encoding for source code`
			`[default encoding].`

			`PEP 263 specifies how that can change on a per-file basis. Either the first`
			`or second line can contain the encoding line [encoding first line]`
			`encoding second line]. If the file has the BOM marker it is considered UTF-8`
			`implicitly [BOM]. If any encoding is specified it must be UTF-8, else it is`
			`an error [BOM and utf-8][BOM conflict].`

			`"""`

			`variable = '\u00fc'`
			`character = '\u00c9'`
			`source_line = "{0} = '{1}'\n".format(variable, character)`
			`module_name = '_temp'`

			`def run_test(self, source):`
Split out support code that is specific to source tests out of importlib.test.support to importlib.test.source.util. 2009-01-31 23:08:31 -04:00			`with source_util.create_modules(self.module_name) as mapping:`
PEP 3147 2010-04-16 21:19:56 -03:00			`with open(mapping[self.module_name], 'wb') as file:`
Add initial implementation of importlib. See the NOTES files for what is planned for the package. There are no docs yet, but they are coming once the API for the first new function, importlib.import_module() is finalized. 2009-01-17 20:24:28 -04:00			`file.write(source)`
Issue #14605: Expose importlib.abc.FileLoader and importlib.machinery.(FileFinder, SourceFileLoader, _SourcelessFileLoader, ExtensionFileLoader). This exposes all of importlib's mechanisms that will become public on the sys module. 2012-04-22 20:58:33 -03:00			`loader = _bootstrap.SourceFileLoader(self.module_name,`
Issue #2377: Make importlib the implementation of __import__(). importlib._bootstrap is now frozen into Python/importlib.h and stored as _frozen_importlib in sys.modules. Py_Initialize() loads the frozen code along with sys and imp and then uses _frozen_importlib._install() to set builtins.__import__() w/ _frozen_importlib.__import__(). 2012-04-14 15:10:13 -03:00			`mapping[self.module_name])`
Add initial implementation of importlib. See the NOTES files for what is planned for the package. There are no docs yet, but they are coming once the API for the first new function, importlib.import_module() is finalized. 2009-01-17 20:24:28 -04:00			`return loader.load_module(self.module_name)`

			`def create_source(self, encoding):`
			`encoding_line = "# coding={0}".format(encoding)`
			`assert CODING_RE.search(encoding_line)`
			`source_lines = [encoding_line.encode('utf-8')]`
			`source_lines.append(self.source_line.encode(encoding))`
			`return b'\n'.join(source_lines)`

			`def test_non_obvious_encoding(self):`
			`# Make sure that an encoding that has never been a standard one for`
			`# Python works.`
			`encoding_line = "# coding=koi8-r"`
			`assert CODING_RE.search(encoding_line)`
			`source = "{0}\na=42\n".format(encoding_line).encode("koi8-r")`
			`self.run_test(source)`

			`# [default encoding]`
			`def test_default_encoding(self):`
			`self.run_test(self.source_line.encode('utf-8'))`

			`# [encoding first line]`
			`def test_encoding_on_first_line(self):`
			`encoding = 'Latin-1'`
			`source = self.create_source(encoding)`
			`self.run_test(source)`

			`# [encoding second line]`
			`def test_encoding_on_second_line(self):`
			`source = b"#/usr/bin/python\n" + self.create_source('Latin-1')`
			`self.run_test(source)`

			`# [BOM]`
			`def test_bom(self):`
			`self.run_test(codecs.BOM_UTF8 + self.source_line.encode('utf-8'))`

			`# [BOM and utf-8]`
			`def test_bom_and_utf_8(self):`
			`source = codecs.BOM_UTF8 + self.create_source('utf-8')`
			`self.run_test(source)`

			`# [BOM conflict]`
			`def test_bom_conflict(self):`
			`source = codecs.BOM_UTF8 + self.create_source('latin-1')`
Move over to using assertRaises as a context manager for importlib tests. Obviously one shouldn't do whole sale conversions like this, but I was already going through the test code and I was bored at the airport. 2009-08-27 20:49:21 -03:00			`with self.assertRaises(SyntaxError):`
			`self.run_test(source)`
Add initial implementation of importlib. See the NOTES files for what is planned for the package. There are no docs yet, but they are coming once the API for the first new function, importlib.import_module() is finalized. 2009-01-17 20:24:28 -04:00

			`class LineEndingTest(unittest.TestCase):`

			`r"""Source written with the three types of line endings (\n, \r\n, \r)`
			`need to be readable [cr][crlf][lf]."""`

			`def run_test(self, line_ending):`
			`module_name = '_temp'`
			`source_lines = [b"a = 42", b"b = -13", b'']`
			`source = line_ending.join(source_lines)`
Split out support code that is specific to source tests out of importlib.test.support to importlib.test.source.util. 2009-01-31 23:08:31 -04:00			`with source_util.create_modules(module_name) as mapping:`
Add initial implementation of importlib. See the NOTES files for what is planned for the package. There are no docs yet, but they are coming once the API for the first new function, importlib.import_module() is finalized. 2009-01-17 20:24:28 -04:00			`with open(mapping[module_name], 'wb') as file:`
			`file.write(source)`
Issue #14605: Expose importlib.abc.FileLoader and importlib.machinery.(FileFinder, SourceFileLoader, _SourcelessFileLoader, ExtensionFileLoader). This exposes all of importlib's mechanisms that will become public on the sys module. 2012-04-22 20:58:33 -03:00			`loader = _bootstrap.SourceFileLoader(module_name,`
Make importlib.abc.SourceLoader the primary mechanism for importlib. This required moving the class from importlib/abc.py into importlib/_bootstrap.py and jiggering some code to work better with the class. This included changing how the file finder worked to better meet import semantics. This also led to fixing importlib to handle the empty string from sys.path as import currently does (and making me wish we didn't support that instead just required people to insert '.' instead to represent cwd). It also required making the new set_data abstractmethod create any needed subdirectories implicitly thanks to __pycache__ (it was either this or grow the SourceLoader ABC to gain an 'exists' method and either a mkdir method or have set_data with no data arg mean to create a directory). Lastly, as an optimization the file loaders cache the file path where the finder found something to use for loading (this is thanks to having a sourceless loader separate from the source loader to simplify the code and cut out stat calls). Unfortunately test_runpy assumed a loader would always work for a module, even if you changed from underneath it what it was expected to work with. By simply dropping the previous loader in test_runpy so the proper loader can be returned by the finder fixed the failure. At this point importlib deviates from import on two points: 1. The exception raised when trying to import a file is different (import does an explicit file check to print a special message, importlib just says the path cannot be imported as if it was just some module name). 2. the co_filename on a code object is not being set to where bytecode was actually loaded from instead of where the marshalled code object originally came from (a solution for this has already been agreed upon on python-dev but has not been implemented yet; issue8611). 2010-07-03 18:48:25 -03:00			`mapping[module_name])`
Add initial implementation of importlib. See the NOTES files for what is planned for the package. There are no docs yet, but they are coming once the API for the first new function, importlib.import_module() is finalized. 2009-01-17 20:24:28 -04:00			`return loader.load_module(module_name)`

			`# [cr]`
			`def test_cr(self):`
			`self.run_test(b'\r')`

			`# [crlf]`
			`def test_crlf(self):`
			`self.run_test(b'\r\n')`

			`# [lf]`
			`def test_lf(self):`
			`self.run_test(b'\n')`


			`def test_main():`
			`from test.support import run_unittest`
			`run_unittest(EncodingTest, LineEndingTest)`


			`if __name__ == '__main__':`
			`test_main()`