mirror of https://github.com/python/cpython
#4487: have Charset check with codecs for possible aliases.
Previously, unexpected results occurred when email was passed, for example, 'utf8' as a charset name, since email would accept it but would *not* use the 'utf-8' codec for it, even though Python itself recognises that as an alias for utf-8. Now Charset checks with codecs for aliases as well as its own internal table. Issue 8898 has been opened to change this further in py3k so that all aliasing is routed through the codecs module.
This commit is contained in:
parent
eba67c0eac
commit
e7e505ba6e
|
@ -9,6 +9,7 @@ __all__ = [
|
|||
'add_codec',
|
||||
]
|
||||
|
||||
import codecs
|
||||
import email.base64mime
|
||||
import email.quoprimime
|
||||
|
||||
|
@ -209,7 +210,12 @@ class Charset:
|
|||
except UnicodeError:
|
||||
raise errors.CharsetError(input_charset)
|
||||
input_charset = input_charset.lower()
|
||||
# Set the input charset after filtering through the aliases
|
||||
# Set the input charset after filtering through the aliases and/or codecs
|
||||
if not (input_charset in ALIASES or input_charset in CHARSETS):
|
||||
try:
|
||||
input_charset = codecs.lookup(input_charset).name
|
||||
except LookupError:
|
||||
pass
|
||||
self.input_charset = ALIASES.get(input_charset, input_charset)
|
||||
# We can try to guess which encoding and conversion to use by the
|
||||
# charset_map dictionary. Try that first, but let the user override
|
||||
|
|
|
@ -2868,6 +2868,9 @@ class TestCharset(unittest.TestCase):
|
|||
self.assertEqual(str(charset), 'us-ascii')
|
||||
self.assertRaises(Errors.CharsetError, Charset, 'asc\xffii')
|
||||
|
||||
def test_codecs_aliases_accepted(self):
|
||||
charset = Charset('utf8')
|
||||
self.assertEqual(str(charset), 'utf-8')
|
||||
|
||||
|
||||
# Test multilingual MIME headers.
|
||||
|
|
Loading…
Reference in New Issue