Issue #11159: SAX parser now supports unicode file names.
This commit is contained in:
parent
6e7da15279
commit
8673ab97cc
|
@ -14,6 +14,8 @@ from xml.sax.expatreader import create_parser
|
|||
from xml.sax.handler import feature_namespaces
|
||||
from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
|
||||
from cStringIO import StringIO
|
||||
import shutil
|
||||
import test.test_support as support
|
||||
from test.test_support import findfile, run_unittest
|
||||
import unittest
|
||||
|
||||
|
@ -384,6 +386,22 @@ class ExpatReaderTest(XmlTestBase):
|
|||
|
||||
self.assertEqual(result.getvalue(), xml_test_out)
|
||||
|
||||
@unittest.skipUnless(hasattr(support, 'TESTFN_UNICODE'),
|
||||
'Requires unicode filenames support')
|
||||
def test_expat_file_unicode(self):
|
||||
fname = support.TESTFN_UNICODE
|
||||
shutil.copyfile(TEST_XMLFILE, fname)
|
||||
self.addCleanup(support.unlink, fname)
|
||||
|
||||
parser = create_parser()
|
||||
result = StringIO()
|
||||
xmlgen = XMLGenerator(result)
|
||||
|
||||
parser.setContentHandler(xmlgen)
|
||||
parser.parse(open(fname))
|
||||
|
||||
self.assertEqual(result.getvalue(), xml_test_out)
|
||||
|
||||
# ===== DTDHandler support
|
||||
|
||||
class TestDTDHandler:
|
||||
|
@ -523,6 +541,22 @@ class ExpatReaderTest(XmlTestBase):
|
|||
|
||||
self.assertEqual(result.getvalue(), xml_test_out)
|
||||
|
||||
@unittest.skipUnless(hasattr(support, 'TESTFN_UNICODE'),
|
||||
'Requires unicode filenames support')
|
||||
def test_expat_inpsource_sysid_unicode(self):
|
||||
fname = support.TESTFN_UNICODE
|
||||
shutil.copyfile(TEST_XMLFILE, fname)
|
||||
self.addCleanup(support.unlink, fname)
|
||||
|
||||
parser = create_parser()
|
||||
result = StringIO()
|
||||
xmlgen = XMLGenerator(result)
|
||||
|
||||
parser.setContentHandler(xmlgen)
|
||||
parser.parse(InputSource(fname))
|
||||
|
||||
self.assertEqual(result.getvalue(), xml_test_out)
|
||||
|
||||
def test_expat_inpsource_stream(self):
|
||||
parser = create_parser()
|
||||
result = StringIO()
|
||||
|
@ -596,6 +630,22 @@ class ExpatReaderTest(XmlTestBase):
|
|||
self.assertEqual(parser.getSystemId(), TEST_XMLFILE)
|
||||
self.assertEqual(parser.getPublicId(), None)
|
||||
|
||||
@unittest.skipUnless(hasattr(support, 'TESTFN_UNICODE'),
|
||||
'Requires unicode filenames support')
|
||||
def test_expat_locator_withinfo_unicode(self):
|
||||
fname = support.TESTFN_UNICODE
|
||||
shutil.copyfile(TEST_XMLFILE, fname)
|
||||
self.addCleanup(support.unlink, fname)
|
||||
|
||||
result = StringIO()
|
||||
xmlgen = XMLGenerator(result)
|
||||
parser = create_parser()
|
||||
parser.setContentHandler(xmlgen)
|
||||
parser.parse(fname)
|
||||
|
||||
self.assertEqual(parser.getSystemId(), fname)
|
||||
self.assertEqual(parser.getPublicId(), None)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
#
|
||||
|
|
|
@ -108,7 +108,10 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
|
|||
|
||||
def prepareParser(self, source):
|
||||
if source.getSystemId() is not None:
|
||||
self._parser.SetBase(source.getSystemId())
|
||||
base = source.getSystemId()
|
||||
if isinstance(base, unicode):
|
||||
base = base.encode('utf-8')
|
||||
self._parser.SetBase(base)
|
||||
|
||||
# Redefined setContentHandler to allow changing handlers during parsing
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ convenience of application and driver writers.
|
|||
"""
|
||||
|
||||
import os, urlparse, urllib, types
|
||||
import sys
|
||||
import handler
|
||||
import xmlreader
|
||||
|
||||
|
@ -293,14 +294,31 @@ def prepare_input_source(source, base = ""):
|
|||
source.setSystemId(f.name)
|
||||
|
||||
if source.getByteStream() is None:
|
||||
sysid = source.getSystemId()
|
||||
basehead = os.path.dirname(os.path.normpath(base))
|
||||
sysidfilename = os.path.join(basehead, sysid)
|
||||
if os.path.isfile(sysidfilename):
|
||||
try:
|
||||
sysid = source.getSystemId()
|
||||
basehead = os.path.dirname(os.path.normpath(base))
|
||||
encoding = sys.getfilesystemencoding()
|
||||
if isinstance(sysid, unicode):
|
||||
if not isinstance(basehead, unicode):
|
||||
try:
|
||||
basehead = basehead.decode(encoding)
|
||||
except UnicodeDecodeError:
|
||||
sysid = sysid.encode(encoding)
|
||||
else:
|
||||
if isinstance(basehead, unicode):
|
||||
try:
|
||||
sysid = sysid.decode(encoding)
|
||||
except UnicodeDecodeError:
|
||||
basehead = basehead.encode(encoding)
|
||||
sysidfilename = os.path.join(basehead, sysid)
|
||||
isfile = os.path.isfile(sysidfilename)
|
||||
except UnicodeError:
|
||||
isfile = False
|
||||
if isfile:
|
||||
source.setSystemId(sysidfilename)
|
||||
f = open(sysidfilename, "rb")
|
||||
else:
|
||||
source.setSystemId(urlparse.urljoin(base, sysid))
|
||||
source.setSystemId(urlparse.urljoin(base, source.getSystemId()))
|
||||
f = urllib.urlopen(source.getSystemId())
|
||||
|
||||
source.setByteStream(f)
|
||||
|
|
Loading…
Reference in New Issue