#9411: allow selecting an encoding for configparser files. Also adds a new test config file to test special cases.

This commit is contained in:
Georg Brandl 2010-07-29 12:17:40 +00:00
parent f206d0e393
commit 8dcaa7396f
5 changed files with 136 additions and 16 deletions

View File

@ -286,25 +286,29 @@ RawConfigParser Objects
:const:`True`; otherwise return :const:`False`. :const:`True`; otherwise return :const:`False`.
.. method:: RawConfigParser.read(filenames) .. method:: RawConfigParser.read(filenames, encoding=None)
Attempt to read and parse a list of filenames, returning a list of filenames Attempt to read and parse a list of filenames, returning a list of filenames
which were successfully parsed. If *filenames* is a string, which were successfully parsed. If *filenames* is a string, it is treated as
it is treated as a single filename. If a file named in *filenames* cannot be a single filename. If a file named in *filenames* cannot be opened, that
opened, that file will be ignored. This is designed so that you can specify a file will be ignored. This is designed so that you can specify a list of
list of potential configuration file locations (for example, the current potential configuration file locations (for example, the current directory,
directory, the user's home directory, and some system-wide directory), and all the user's home directory, and some system-wide directory), and all existing
existing configuration files in the list will be read. If none of the named configuration files in the list will be read. If none of the named files
files exist, the :class:`ConfigParser` instance will contain an empty dataset. exist, the :class:`ConfigParser` instance will contain an empty dataset. An
An application which requires initial values to be loaded from a file should application which requires initial values to be loaded from a file should
load the required file or files using :meth:`readfp` before calling :meth:`read` load the required file or files using :meth:`readfp` before calling
for any optional files:: :meth:`read` for any optional files::
import configparser, os import configparser, os
config = configparser.ConfigParser() config = configparser.ConfigParser()
config.readfp(open('defaults.cfg')) config.readfp(open('defaults.cfg'))
config.read(['site.cfg', os.path.expanduser('~/.myapp.cfg')]) config.read(['site.cfg', os.path.expanduser('~/.myapp.cfg')], encoding='cp1250')
.. versionadded:: 3.2
The *encoding* parameter. Previously, all files were read using the
default encoding for :func:`open`.
.. method:: RawConfigParser.readfp(fp, filename=None) .. method:: RawConfigParser.readfp(fp, filename=None)

View File

@ -61,7 +61,7 @@ ConfigParser -- responsible for parsing a list of
options(section) options(section)
Return list of configuration options for the named section. Return list of configuration options for the named section.
read(filenames) read(filenames, encoding=None)
Read and parse the list of named configuration files, given by Read and parse the list of named configuration files, given by
name. A single filename is also allowed. Non-existing files name. A single filename is also allowed. Non-existing files
are ignored. Return list of successfully read files. are ignored. Return list of successfully read files.
@ -369,7 +369,7 @@ class RawConfigParser:
del opts['__name__'] del opts['__name__']
return list(opts.keys()) return list(opts.keys())
def read(self, filenames): def read(self, filenames, encoding=None):
"""Read and parse a filename or a list of filenames. """Read and parse a filename or a list of filenames.
Files that cannot be opened are silently ignored; this is Files that cannot be opened are silently ignored; this is
@ -386,7 +386,7 @@ class RawConfigParser:
read_ok = [] read_ok = []
for filename in filenames: for filename in filenames:
try: try:
fp = open(filename) fp = open(filename, encoding=encoding)
except IOError: except IOError:
continue continue
self._read(fp, filename) self._read(fp, filename)

69
Lib/test/cfgparser.3 Normal file
View File

@ -0,0 +1,69 @@
# INI with as many tricky parts as possible
# Most of them could not be used before 3.2
# This will be parsed with the following options
# delimiters = {'='}
# comment_prefixes = {'#'}
# allow_no_value = True
[DEFAULT]
go = %(interpolate)s
[strange]
values = that are indented # and end with hash comments
other = that do continue
in # and still have
other # comments mixed
lines # with the values
[corruption]
value = that is
actually still here
and holds all these weird newlines
# but not for the lines that are comments
nor the indentation
another value = # empty string
yet another # None!
[yeah, sections can be indented as well]
and that does not mean = anything
are they subsections = False
if you want subsections = use XML
lets use some Unicode = 片仮名
[another one!]
even if values are indented like this = seriously
yes, this still applies to = section "another one!"
this too = are there people with configurations broken as this?
beware, this is going to be a continuation
of the value for
key "this too"
even if it has a = character
this is still the continuation
your editor probably highlights it wrong
but that's life
# let's set this value so there is no error
# when getting all items for this section:
interpolate = anything will do
[no values here]
# but there's this `go` in DEFAULT
[tricky interpolation]
interpolate = do this
lets = %(go)s
[more interpolation]
interpolate = go shopping
lets = %(go)s

View File

@ -533,7 +533,7 @@ class RawConfigParserTestSambaConf(BasicTestCase):
smbconf = support.findfile("cfgparser.2") smbconf = support.findfile("cfgparser.2")
# check when we pass a mix of readable and non-readable files: # check when we pass a mix of readable and non-readable files:
cf = self.newconfig() cf = self.newconfig()
parsed_files = cf.read([smbconf, "nonexistent-file"]) parsed_files = cf.read([smbconf, "nonexistent-file"], encoding='utf-8')
self.assertEqual(parsed_files, [smbconf]) self.assertEqual(parsed_files, [smbconf])
sections = ['global', 'homes', 'printers', sections = ['global', 'homes', 'printers',
'print$', 'pdf-generator', 'tmp', 'Agustin'] 'print$', 'pdf-generator', 'tmp', 'Agustin']
@ -600,6 +600,46 @@ class SafeConfigParserTestCaseNonStandardDelimiters(SafeConfigParserTestCase):
class SafeConfigParserTestCaseNoValue(SafeConfigParserTestCase): class SafeConfigParserTestCaseNoValue(SafeConfigParserTestCase):
allow_no_value = True allow_no_value = True
class SafeConfigParserTestCaseTrickyFile(CfgParserTestCaseClass):
config_class = configparser.SafeConfigParser
delimiters = {'='}
comment_prefixes = {'#'}
allow_no_value = True
def test_cfgparser_dot_3(self):
tricky = support.findfile("cfgparser.3")
cf = self.newconfig()
self.assertEqual(len(cf.read(tricky, encoding='utf-8')), 1)
self.assertEqual(cf.sections(), ['strange',
'corruption',
'yeah, sections can be '
'indented as well',
'another one!',
'no values here',
'tricky interpolation',
'more interpolation'])
#self.assertEqual(cf.getint('DEFAULT', 'go', vars={'interpolate': '-1'}),
# -1)
self.assertEqual(len(cf.get('strange', 'other').split('\n')), 4)
self.assertEqual(len(cf.get('corruption', 'value').split('\n')), 10)
longname = 'yeah, sections can be indented as well'
self.assertFalse(cf.getboolean(longname, 'are they subsections'))
self.assertEquals(cf.get(longname, 'lets use some Unicode'),
'片仮名')
self.assertEqual(len(cf.items('another one!')), 5) # 4 in section and
# `go` from DEFAULT
with self.assertRaises(configparser.InterpolationMissingOptionError):
cf.items('no values here')
self.assertEqual(cf.get('tricky interpolation', 'lets'), 'do this')
self.assertEqual(cf.get('tricky interpolation', 'lets'),
cf.get('tricky interpolation', 'go'))
self.assertEqual(cf.get('more interpolation', 'lets'), 'go shopping')
def test_unicode_failure(self):
tricky = support.findfile("cfgparser.3")
cf = self.newconfig()
with self.assertRaises(UnicodeDecodeError):
cf.read(tricky, encoding='ascii')
class SortedTestCase(RawConfigParserTestCase): class SortedTestCase(RawConfigParserTestCase):
dict_type = SortedDict dict_type = SortedDict
@ -635,10 +675,13 @@ class CompatibleTestCase(CfgParserTestCaseClass):
foo: bar # not a comment! foo: bar # not a comment!
# but this is a comment # but this is a comment
; another comment ; another comment
quirk: this;is not a comment
; a space must precede a comment character
""") """)
cf = self.fromstring(config_string) cf = self.fromstring(config_string)
self.assertEqual(cf.get('Commented Bar', 'foo'), 'bar # not a comment!') self.assertEqual(cf.get('Commented Bar', 'foo'), 'bar # not a comment!')
self.assertEqual(cf.get('Commented Bar', 'baz'), 'qwe') self.assertEqual(cf.get('Commented Bar', 'baz'), 'qwe')
self.assertEqual(cf.get('Commented Bar', 'quirk'), 'this;is not a comment')
def test_main(): def test_main():
@ -652,6 +695,7 @@ def test_main():
SafeConfigParserTestCase, SafeConfigParserTestCase,
SafeConfigParserTestCaseNonStandardDelimiters, SafeConfigParserTestCaseNonStandardDelimiters,
SafeConfigParserTestCaseNoValue, SafeConfigParserTestCaseNoValue,
SafeConfigParserTestCaseTrickyFile,
SortedTestCase, SortedTestCase,
CompatibleTestCase, CompatibleTestCase,
) )

View File

@ -475,6 +475,9 @@ C-API
Library Library
------- -------
- Issue #9411: Allow specifying an encoding for config files in the
configparser module.
- Issue #1682942: Improvements to configparser: support alternate - Issue #1682942: Improvements to configparser: support alternate
delimiters, alternate comment prefixes and empty lines in values. delimiters, alternate comment prefixes and empty lines in values.