Fix distutils’ check and register Unicode handling (#13114).

The check command was fixed by Kirill Kuzminykh.

The register command was using StringIO.getvalue, which uses “''.join”
and thus coerces to str using the default encoding (ASCII), so I changed
the code to use one extra intermediary list and correctly encode to
UTF-8.
This commit is contained in:
Éric Araujo 2011-10-09 07:11:19 +02:00
parent ece7079b54
commit 017e535bde
6 changed files with 56 additions and 13 deletions

View File

@ -5,6 +5,7 @@ Implements the Distutils 'check' command.
__revision__ = "$Id$" __revision__ = "$Id$"
from distutils.core import Command from distutils.core import Command
from distutils.dist import PKG_INFO_ENCODING
from distutils.errors import DistutilsSetupError from distutils.errors import DistutilsSetupError
try: try:
@ -108,6 +109,8 @@ class check(Command):
def check_restructuredtext(self): def check_restructuredtext(self):
"""Checks if the long string fields are reST-compliant.""" """Checks if the long string fields are reST-compliant."""
data = self.distribution.get_long_description() data = self.distribution.get_long_description()
if not isinstance(data, unicode):
data = data.decode(PKG_INFO_ENCODING)
for warning in self._check_rst_data(data): for warning in self._check_rst_data(data):
line = warning[-1].get('line') line = warning[-1].get('line')
if line is None: if line is None:

View File

@ -10,7 +10,6 @@ __revision__ = "$Id$"
import urllib2 import urllib2
import getpass import getpass
import urlparse import urlparse
import StringIO
from warnings import warn from warnings import warn
from distutils.core import PyPIRCCommand from distutils.core import PyPIRCCommand
@ -260,21 +259,30 @@ Your selection [default 1]: ''', log.INFO)
boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
sep_boundary = '\n--' + boundary sep_boundary = '\n--' + boundary
end_boundary = sep_boundary + '--' end_boundary = sep_boundary + '--'
body = StringIO.StringIO() chunks = []
for key, value in data.items(): for key, value in data.items():
# handle multiple entries for the same name # handle multiple entries for the same name
if type(value) not in (type([]), type( () )): if type(value) not in (type([]), type( () )):
value = [value] value = [value]
for value in value: for value in value:
body.write(sep_boundary) chunks.append(sep_boundary)
body.write('\nContent-Disposition: form-data; name="%s"'%key) chunks.append('\nContent-Disposition: form-data; name="%s"'%key)
body.write("\n\n") chunks.append("\n\n")
body.write(value) chunks.append(value)
if value and value[-1] == '\r': if value and value[-1] == '\r':
body.write('\n') # write an extra newline (lurve Macs) chunks.append('\n') # write an extra newline (lurve Macs)
body.write(end_boundary) chunks.append(end_boundary)
body.write("\n") chunks.append("\n")
body = body.getvalue()
# chunks may be bytes (str) or unicode objects that we need to encode
body = []
for chunk in chunks:
if isinstance(chunk, unicode):
body.append(chunk.encode('utf-8'))
else:
body.append(chunk)
body = ''.join(body)
# build the Request # build the Request
headers = { headers = {

View File

@ -1,3 +1,4 @@
# -*- encoding: utf8 -*-
"""Tests for distutils.command.check.""" """Tests for distutils.command.check."""
import unittest import unittest
from test.test_support import run_unittest from test.test_support import run_unittest
@ -46,6 +47,15 @@ class CheckTestCase(support.LoggingSilencer,
cmd = self._run(metadata, strict=1) cmd = self._run(metadata, strict=1)
self.assertEqual(cmd._warnings, 0) self.assertEqual(cmd._warnings, 0)
# now a test with Unicode entries
metadata = {'url': u'xxx', 'author': u'\u00c9ric',
'author_email': u'xxx', u'name': 'xxx',
'version': u'xxx',
'description': u'Something about esszet \u00df',
'long_description': u'More things about esszet \u00df'}
cmd = self._run(metadata)
self.assertEqual(cmd._warnings, 0)
def test_check_document(self): def test_check_document(self):
if not HAS_DOCUTILS: # won't test without docutils if not HAS_DOCUTILS: # won't test without docutils
return return
@ -80,8 +90,8 @@ class CheckTestCase(support.LoggingSilencer,
self.assertRaises(DistutilsSetupError, self._run, metadata, self.assertRaises(DistutilsSetupError, self._run, metadata,
**{'strict': 1, 'restructuredtext': 1}) **{'strict': 1, 'restructuredtext': 1})
# and non-broken rest # and non-broken rest, including a non-ASCII character to test #12114
metadata['long_description'] = 'title\n=====\n\ntest' metadata['long_description'] = u'title\n=====\n\ntest \u00df'
cmd = self._run(metadata, strict=1, restructuredtext=1) cmd = self._run(metadata, strict=1, restructuredtext=1)
self.assertEqual(cmd._warnings, 0) self.assertEqual(cmd._warnings, 0)

View File

@ -1,5 +1,5 @@
"""Tests for distutils.command.register."""
# -*- encoding: utf8 -*- # -*- encoding: utf8 -*-
"""Tests for distutils.command.register."""
import sys import sys
import os import os
import unittest import unittest
@ -246,6 +246,24 @@ class RegisterTestCase(PyPIRCCommandTestCase):
finally: finally:
del register_module.raw_input del register_module.raw_input
# and finally a Unicode test (bug #12114)
metadata = {'url': u'xxx', 'author': u'\u00c9ric',
'author_email': u'xxx', u'name': 'xxx',
'version': u'xxx',
'description': u'Something about esszet \u00df',
'long_description': u'More things about esszet \u00df'}
cmd = self._get_cmd(metadata)
cmd.ensure_finalized()
cmd.strict = 1
inputs = RawInputs('1', 'tarek', 'y')
register_module.raw_input = inputs.__call__
# let's run the command
try:
cmd.run()
finally:
del register_module.raw_input
def test_check_metadata_deprecated(self): def test_check_metadata_deprecated(self):
# makes sure make_metadata is deprecated # makes sure make_metadata is deprecated
cmd = self._get_cmd() cmd = self._get_cmd()

View File

@ -469,6 +469,7 @@ Ivan Krstić
Andrew Kuchling Andrew Kuchling
Ralf W. Grosse-Kunstleve Ralf W. Grosse-Kunstleve
Vladimir Kushnir Vladimir Kushnir
Kirill Kuzminykh (Кирилл Кузьминых)
Ross Lagerwall Ross Lagerwall
Cameron Laird Cameron Laird
Łukasz Langa Łukasz Langa

View File

@ -50,6 +50,9 @@ Core and Builtins
Library Library
------- -------
- Issue #13114: Fix the distutils commands check and register when the
long description is a Unicode string with non-ASCII characters.
- Issue #7367: Fix pkgutil.walk_paths to skip directories whose - Issue #7367: Fix pkgutil.walk_paths to skip directories whose
contents cannot be read. contents cannot be read.