Expat could crash if given the wrong kind of input by never stopping its

tokenizing step.

Thanks to Ivan Krstić for the patch.
This commit is contained in:
Brett Cannon 2009-08-13 19:27:12 +00:00
parent c4ad0345cf
commit 764465f315
4 changed files with 26 additions and 3 deletions

View File

@ -559,6 +559,24 @@ class ChardataBufferTest(unittest.TestCase):
parser.Parse(xml2, 1)
self.assertEquals(self.n, 4)
class MalformedInputText(unittest.TestCase):
def test1(self):
xml = "\0\r\n"
parser = expat.ParserCreate()
try:
parser.Parse(xml, True)
self.fail()
except expat.ExpatError as e:
self.assertEquals(str(e), 'no element found: line 2, column 1')
def test2(self):
xml = "<?xml version\xc2\x85='1.0'?>\r\n"
parser = expat.ParserCreate()
try:
parser.Parse(xml, True)
self.fail()
except expat.ExpatError as e:
self.assertEquals(str(e), 'XML declaration not well-formed: line 1, column 14')
def test_main():
run_unittest(SetAttributeTest,
@ -569,7 +587,8 @@ def test_main():
HandlerExceptionTest,
PositionTest,
sf1296433Test,
ChardataBufferTest)
ChardataBufferTest,
MalformedInputText)
if __name__ == "__main__":
test_main()

View File

@ -183,6 +183,7 @@ Ismail Donmez
Dima Dorfman
Cesar Douady
Dean Draayer
Fred L. Drake, Jr.
John DuBois
Paul Dubois
Graham Dumpleton
@ -371,7 +372,6 @@ Irmen de Jong
Lucas de Jonge
John Jorgensen
Jens B. Jorgensen
Fred L. Drake, Jr.
Andreas Jung
Tattoo Mabonzo K.
Bob Kahn
@ -408,6 +408,7 @@ Holger Krekel
Michael Kremer
Fabian Kreutz
Hannu Krosing
Ivan Krstić
Andrew Kuchling
Vladimir Kushnir
Cameron Laird

View File

@ -1227,6 +1227,9 @@ C-API
Extension Modules
-----------------
- Fix a segfault in expat when given a specially crafted input lead to the
tokenizer not stopping.
- Issue #6561: '\d' in a regex now matches only characters with
Unicode category 'Nd' (Number, Decimal Digit). Previously it also
matched characters with category 'No'.

View File

@ -1741,7 +1741,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
const char *end,
POSITION *pos)
{
while (ptr != end) {
while (ptr < end) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: \