Issue #11159: SAX parser now supports unicode file names.

This commit is contained in:
Serhiy Storchaka 2013-02-02 10:28:30 +02:00
parent 6e7da15279
commit 8673ab97cc
4 changed files with 79 additions and 6 deletions

View File

@ -14,6 +14,8 @@ from xml.sax.expatreader import create_parser
from xml.sax.handler import feature_namespaces
from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
from cStringIO import StringIO
import shutil
import test.test_support as support
from test.test_support import findfile, run_unittest
import unittest
@ -384,6 +386,22 @@ class ExpatReaderTest(XmlTestBase):
self.assertEqual(result.getvalue(), xml_test_out)
@unittest.skipUnless(hasattr(support, 'TESTFN_UNICODE'),
'Requires unicode filenames support')
def test_expat_file_unicode(self):
fname = support.TESTFN_UNICODE
shutil.copyfile(TEST_XMLFILE, fname)
self.addCleanup(support.unlink, fname)
parser = create_parser()
result = StringIO()
xmlgen = XMLGenerator(result)
parser.setContentHandler(xmlgen)
parser.parse(open(fname))
self.assertEqual(result.getvalue(), xml_test_out)
# ===== DTDHandler support
class TestDTDHandler:
@ -523,6 +541,22 @@ class ExpatReaderTest(XmlTestBase):
self.assertEqual(result.getvalue(), xml_test_out)
@unittest.skipUnless(hasattr(support, 'TESTFN_UNICODE'),
'Requires unicode filenames support')
def test_expat_inpsource_sysid_unicode(self):
fname = support.TESTFN_UNICODE
shutil.copyfile(TEST_XMLFILE, fname)
self.addCleanup(support.unlink, fname)
parser = create_parser()
result = StringIO()
xmlgen = XMLGenerator(result)
parser.setContentHandler(xmlgen)
parser.parse(InputSource(fname))
self.assertEqual(result.getvalue(), xml_test_out)
def test_expat_inpsource_stream(self):
parser = create_parser()
result = StringIO()
@ -596,6 +630,22 @@ class ExpatReaderTest(XmlTestBase):
self.assertEqual(parser.getSystemId(), TEST_XMLFILE)
self.assertEqual(parser.getPublicId(), None)
@unittest.skipUnless(hasattr(support, 'TESTFN_UNICODE'),
'Requires unicode filenames support')
def test_expat_locator_withinfo_unicode(self):
fname = support.TESTFN_UNICODE
shutil.copyfile(TEST_XMLFILE, fname)
self.addCleanup(support.unlink, fname)
result = StringIO()
xmlgen = XMLGenerator(result)
parser = create_parser()
parser.setContentHandler(xmlgen)
parser.parse(fname)
self.assertEqual(parser.getSystemId(), fname)
self.assertEqual(parser.getPublicId(), None)
# ===========================================================================
#

View File

@ -108,7 +108,10 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
def prepareParser(self, source):
if source.getSystemId() is not None:
self._parser.SetBase(source.getSystemId())
base = source.getSystemId()
if isinstance(base, unicode):
base = base.encode('utf-8')
self._parser.SetBase(base)
# Redefined setContentHandler to allow changing handlers during parsing

View File

@ -4,6 +4,7 @@ convenience of application and driver writers.
"""
import os, urlparse, urllib, types
import sys
import handler
import xmlreader
@ -293,14 +294,31 @@ def prepare_input_source(source, base = ""):
source.setSystemId(f.name)
if source.getByteStream() is None:
try:
sysid = source.getSystemId()
basehead = os.path.dirname(os.path.normpath(base))
encoding = sys.getfilesystemencoding()
if isinstance(sysid, unicode):
if not isinstance(basehead, unicode):
try:
basehead = basehead.decode(encoding)
except UnicodeDecodeError:
sysid = sysid.encode(encoding)
else:
if isinstance(basehead, unicode):
try:
sysid = sysid.decode(encoding)
except UnicodeDecodeError:
basehead = basehead.encode(encoding)
sysidfilename = os.path.join(basehead, sysid)
if os.path.isfile(sysidfilename):
isfile = os.path.isfile(sysidfilename)
except UnicodeError:
isfile = False
if isfile:
source.setSystemId(sysidfilename)
f = open(sysidfilename, "rb")
else:
source.setSystemId(urlparse.urljoin(base, sysid))
source.setSystemId(urlparse.urljoin(base, source.getSystemId()))
f = urllib.urlopen(source.getSystemId())
source.setByteStream(f)

View File

@ -202,6 +202,8 @@ Core and Builtins
Library
-------
- Issue #11159: SAX parser now supports unicode file names.
- Issue #6972: The zipfile module no longer overwrites files outside of
its destination path when extracting malicious zip files.