Expat could crash if given the wrong kind of input by never stopping its
tokenizing step. Thanks to Ivan Krstić for the patch.
This commit is contained in:
parent
c4ad0345cf
commit
764465f315
|
@ -559,6 +559,24 @@ class ChardataBufferTest(unittest.TestCase):
|
|||
parser.Parse(xml2, 1)
|
||||
self.assertEquals(self.n, 4)
|
||||
|
||||
class MalformedInputText(unittest.TestCase):
|
||||
def test1(self):
|
||||
xml = "\0\r\n"
|
||||
parser = expat.ParserCreate()
|
||||
try:
|
||||
parser.Parse(xml, True)
|
||||
self.fail()
|
||||
except expat.ExpatError as e:
|
||||
self.assertEquals(str(e), 'no element found: line 2, column 1')
|
||||
|
||||
def test2(self):
|
||||
xml = "<?xml version\xc2\x85='1.0'?>\r\n"
|
||||
parser = expat.ParserCreate()
|
||||
try:
|
||||
parser.Parse(xml, True)
|
||||
self.fail()
|
||||
except expat.ExpatError as e:
|
||||
self.assertEquals(str(e), 'XML declaration not well-formed: line 1, column 14')
|
||||
|
||||
def test_main():
|
||||
run_unittest(SetAttributeTest,
|
||||
|
@ -569,7 +587,8 @@ def test_main():
|
|||
HandlerExceptionTest,
|
||||
PositionTest,
|
||||
sf1296433Test,
|
||||
ChardataBufferTest)
|
||||
ChardataBufferTest,
|
||||
MalformedInputText)
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_main()
|
||||
|
|
|
@ -183,6 +183,7 @@ Ismail Donmez
|
|||
Dima Dorfman
|
||||
Cesar Douady
|
||||
Dean Draayer
|
||||
Fred L. Drake, Jr.
|
||||
John DuBois
|
||||
Paul Dubois
|
||||
Graham Dumpleton
|
||||
|
@ -371,7 +372,6 @@ Irmen de Jong
|
|||
Lucas de Jonge
|
||||
John Jorgensen
|
||||
Jens B. Jorgensen
|
||||
Fred L. Drake, Jr.
|
||||
Andreas Jung
|
||||
Tattoo Mabonzo K.
|
||||
Bob Kahn
|
||||
|
@ -408,6 +408,7 @@ Holger Krekel
|
|||
Michael Kremer
|
||||
Fabian Kreutz
|
||||
Hannu Krosing
|
||||
Ivan Krstić
|
||||
Andrew Kuchling
|
||||
Vladimir Kushnir
|
||||
Cameron Laird
|
||||
|
|
|
@ -1227,6 +1227,9 @@ C-API
|
|||
Extension Modules
|
||||
-----------------
|
||||
|
||||
- Fix a segfault in expat when given a specially crafted input lead to the
|
||||
tokenizer not stopping.
|
||||
|
||||
- Issue #6561: '\d' in a regex now matches only characters with
|
||||
Unicode category 'Nd' (Number, Decimal Digit). Previously it also
|
||||
matched characters with category 'No'.
|
||||
|
|
|
@ -1741,7 +1741,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
|
|||
const char *end,
|
||||
POSITION *pos)
|
||||
{
|
||||
while (ptr != end) {
|
||||
while (ptr < end) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
#define LEAD_CASE(n) \
|
||||
case BT_LEAD ## n: \
|
||||
|
|
Loading…
Reference in New Issue