#9411: allow selecting an encoding for configparser files. Also adds a new test config file to test special cases.

This commit is contained in:
Georg Brandl 2010-07-29 12:17:40 +00:00
parent f206d0e393
commit 8dcaa7396f
5 changed files with 136 additions and 16 deletions

View File

@ -286,25 +286,29 @@ RawConfigParser Objects
:const:`True`; otherwise return :const:`False`.
.. method:: RawConfigParser.read(filenames)
.. method:: RawConfigParser.read(filenames, encoding=None)
Attempt to read and parse a list of filenames, returning a list of filenames
which were successfully parsed. If *filenames* is a string,
it is treated as a single filename. If a file named in *filenames* cannot be
opened, that file will be ignored. This is designed so that you can specify a
list of potential configuration file locations (for example, the current
directory, the user's home directory, and some system-wide directory), and all
existing configuration files in the list will be read. If none of the named
files exist, the :class:`ConfigParser` instance will contain an empty dataset.
An application which requires initial values to be loaded from a file should
load the required file or files using :meth:`readfp` before calling :meth:`read`
for any optional files::
which were successfully parsed. If *filenames* is a string, it is treated as
a single filename. If a file named in *filenames* cannot be opened, that
file will be ignored. This is designed so that you can specify a list of
potential configuration file locations (for example, the current directory,
the user's home directory, and some system-wide directory), and all existing
configuration files in the list will be read. If none of the named files
exist, the :class:`ConfigParser` instance will contain an empty dataset. An
application which requires initial values to be loaded from a file should
load the required file or files using :meth:`readfp` before calling
:meth:`read` for any optional files::
import configparser, os
config = configparser.ConfigParser()
config.readfp(open('defaults.cfg'))
config.read(['site.cfg', os.path.expanduser('~/.myapp.cfg')])
config.read(['site.cfg', os.path.expanduser('~/.myapp.cfg')], encoding='cp1250')
.. versionadded:: 3.2
The *encoding* parameter. Previously, all files were read using the
default encoding for :func:`open`.
.. method:: RawConfigParser.readfp(fp, filename=None)

View File

@ -61,7 +61,7 @@ ConfigParser -- responsible for parsing a list of
options(section)
Return list of configuration options for the named section.
read(filenames)
read(filenames, encoding=None)
Read and parse the list of named configuration files, given by
name. A single filename is also allowed. Non-existing files
are ignored. Return list of successfully read files.
@ -369,7 +369,7 @@ class RawConfigParser:
del opts['__name__']
return list(opts.keys())
def read(self, filenames):
def read(self, filenames, encoding=None):
"""Read and parse a filename or a list of filenames.
Files that cannot be opened are silently ignored; this is
@ -386,7 +386,7 @@ class RawConfigParser:
read_ok = []
for filename in filenames:
try:
fp = open(filename)
fp = open(filename, encoding=encoding)
except IOError:
continue
self._read(fp, filename)

69
Lib/test/cfgparser.3 Normal file
View File

@ -0,0 +1,69 @@
# INI with as many tricky parts as possible
# Most of them could not be used before 3.2
# This will be parsed with the following options
# delimiters = {'='}
# comment_prefixes = {'#'}
# allow_no_value = True
[DEFAULT]
go = %(interpolate)s
[strange]
values = that are indented # and end with hash comments
other = that do continue
in # and still have
other # comments mixed
lines # with the values
[corruption]
value = that is
actually still here
and holds all these weird newlines
# but not for the lines that are comments
nor the indentation
another value = # empty string
yet another # None!
[yeah, sections can be indented as well]
and that does not mean = anything
are they subsections = False
if you want subsections = use XML
lets use some Unicode = 片仮名
[another one!]
even if values are indented like this = seriously
yes, this still applies to = section "another one!"
this too = are there people with configurations broken as this?
beware, this is going to be a continuation
of the value for
key "this too"
even if it has a = character
this is still the continuation
your editor probably highlights it wrong
but that's life
# let's set this value so there is no error
# when getting all items for this section:
interpolate = anything will do
[no values here]
# but there's this `go` in DEFAULT
[tricky interpolation]
interpolate = do this
lets = %(go)s
[more interpolation]
interpolate = go shopping
lets = %(go)s

View File

@ -533,7 +533,7 @@ class RawConfigParserTestSambaConf(BasicTestCase):
smbconf = support.findfile("cfgparser.2")
# check when we pass a mix of readable and non-readable files:
cf = self.newconfig()
parsed_files = cf.read([smbconf, "nonexistent-file"])
parsed_files = cf.read([smbconf, "nonexistent-file"], encoding='utf-8')
self.assertEqual(parsed_files, [smbconf])
sections = ['global', 'homes', 'printers',
'print$', 'pdf-generator', 'tmp', 'Agustin']
@ -600,6 +600,46 @@ class SafeConfigParserTestCaseNonStandardDelimiters(SafeConfigParserTestCase):
class SafeConfigParserTestCaseNoValue(SafeConfigParserTestCase):
allow_no_value = True
class SafeConfigParserTestCaseTrickyFile(CfgParserTestCaseClass):
config_class = configparser.SafeConfigParser
delimiters = {'='}
comment_prefixes = {'#'}
allow_no_value = True
def test_cfgparser_dot_3(self):
tricky = support.findfile("cfgparser.3")
cf = self.newconfig()
self.assertEqual(len(cf.read(tricky, encoding='utf-8')), 1)
self.assertEqual(cf.sections(), ['strange',
'corruption',
'yeah, sections can be '
'indented as well',
'another one!',
'no values here',
'tricky interpolation',
'more interpolation'])
#self.assertEqual(cf.getint('DEFAULT', 'go', vars={'interpolate': '-1'}),
# -1)
self.assertEqual(len(cf.get('strange', 'other').split('\n')), 4)
self.assertEqual(len(cf.get('corruption', 'value').split('\n')), 10)
longname = 'yeah, sections can be indented as well'
self.assertFalse(cf.getboolean(longname, 'are they subsections'))
self.assertEquals(cf.get(longname, 'lets use some Unicode'),
'片仮名')
self.assertEqual(len(cf.items('another one!')), 5) # 4 in section and
# `go` from DEFAULT
with self.assertRaises(configparser.InterpolationMissingOptionError):
cf.items('no values here')
self.assertEqual(cf.get('tricky interpolation', 'lets'), 'do this')
self.assertEqual(cf.get('tricky interpolation', 'lets'),
cf.get('tricky interpolation', 'go'))
self.assertEqual(cf.get('more interpolation', 'lets'), 'go shopping')
def test_unicode_failure(self):
tricky = support.findfile("cfgparser.3")
cf = self.newconfig()
with self.assertRaises(UnicodeDecodeError):
cf.read(tricky, encoding='ascii')
class SortedTestCase(RawConfigParserTestCase):
dict_type = SortedDict
@ -635,10 +675,13 @@ class CompatibleTestCase(CfgParserTestCaseClass):
foo: bar # not a comment!
# but this is a comment
; another comment
quirk: this;is not a comment
; a space must precede a comment character
""")
cf = self.fromstring(config_string)
self.assertEqual(cf.get('Commented Bar', 'foo'), 'bar # not a comment!')
self.assertEqual(cf.get('Commented Bar', 'baz'), 'qwe')
self.assertEqual(cf.get('Commented Bar', 'quirk'), 'this;is not a comment')
def test_main():
@ -652,6 +695,7 @@ def test_main():
SafeConfigParserTestCase,
SafeConfigParserTestCaseNonStandardDelimiters,
SafeConfigParserTestCaseNoValue,
SafeConfigParserTestCaseTrickyFile,
SortedTestCase,
CompatibleTestCase,
)

View File

@ -475,6 +475,9 @@ C-API
Library
-------
- Issue #9411: Allow specifying an encoding for config files in the
configparser module.
- Issue #1682942: Improvements to configparser: support alternate
delimiters, alternate comment prefixes and empty lines in values.