bpo-21872: fix lzma library decompresses data incompletely (GH-14048)

* 1. add test case with wrong behavior
* 2. fix bug when max_length == -1
* 3. allow b"" as valid input data for decompress_buf()
* 4. when max_length >= 0, let needs_input mechanism works
* add more asserts to test case
This commit is contained in:
animalize 2019-09-12 22:20:37 +08:00 committed by Gregory P. Smith
parent 2f1b857562
commit 4ffd05d7ec
3 changed files with 188 additions and 6 deletions

View File

@ -1197,6 +1197,36 @@ class FileTestCase(unittest.TestCase):
f.close()
self.assertRaises(ValueError, f.tell)
def test_issue21872(self):
# sometimes decompress data incompletely
# ---------------------
# when max_length == -1
# ---------------------
d1 = LZMADecompressor()
entire = d1.decompress(ISSUE_21872_DAT, max_length=-1)
self.assertEqual(len(entire), 13160)
self.assertTrue(d1.eof)
# ---------------------
# when max_length > 0
# ---------------------
d2 = LZMADecompressor()
# When this value of max_length is used, the input and output
# buffers are exhausted at the same time, and lzs's internal
# state still have 11 bytes can be output.
out1 = d2.decompress(ISSUE_21872_DAT, max_length=13149)
self.assertFalse(d2.needs_input) # ensure needs_input mechanism works
self.assertFalse(d2.eof)
# simulate needs_input mechanism
# output internal state's 11 bytes
out2 = d2.decompress(b'')
self.assertEqual(len(out2), 11)
self.assertTrue(d2.eof)
self.assertEqual(out1 + out2, entire)
class OpenTestCase(unittest.TestCase):
@ -1763,6 +1793,139 @@ COMPRESSED_RAW_4 = (
b"\x00"
)
ISSUE_21872_DAT = (
b']\x00\x00@\x00h3\x00\x00\x00\x00\x00\x00\x00\x00`D\x0c\x99\xc8'
b'\xd1\xbbZ^\xc43+\x83\xcd\xf1\xc6g\xec-\x061F\xb1\xbb\xc7\x17%-\xea'
b'\xfap\xfb\x8fs\x128\xb2,\x88\xe4\xc0\x12|*x\xd0\xa2\xc4b\x1b!\x02c'
b'\xab\xd9\x87U\xb8n \xfaVJ\x9a"\xb78\xff%_\x17`?@*\xc2\x82'
b"\xf2^\x1b\xb8\x04&\xc0\xbb\x03g\x9d\xca\xe9\xa4\xc9\xaf'\xe5\x8e}"
b'F\xdd\x11\xf3\x86\xbe\x1fN\x95\\\xef\xa2Mz-\xcb\x9a\xe3O@'
b"\x19\x07\xf6\xee\x9e\x9ag\xc6\xa5w\rnG'\x99\xfd\xfeGI\xb0"
b'\xbb\xf9\xc2\xe1\xff\xc5r\xcf\x85y[\x01\xa1\xbd\xcc/\xa3\x1b\x83\xaa'
b'\xc6\xf9\x99\x0c\xb6_\xc9MQ+x\xa2F\xda]\xdd\xe8\xfb\x1a&'
b',\xc4\x19\x1df\x81\x1e\x90\xf3\xb8Hgr\x85v\xbe\xa3qx\x01Y\xb5\x9fF'
b"\x13\x18\x01\xe69\x9b\xc8'\x1e\x9d\xd6\xe4F\x84\xac\xf8d<\x11\xd5"
b'\\\x0b\xeb\x0e\x82\xab\xb1\xe6\x1fka\xe1i\xc4 C\xb1"4)\xd6\xa7`\x02'
b'\xec\x11\x8c\xf0\x14\xb0\x1d\x1c\xecy\xf0\xb7|\x11j\x85X\xb2!\x1c'
b'\xac\xb5N\xc7\x85j\x9ev\xf5\xe6\x0b\xc1]c\xc15\x16\x9f\xd5\x99'
b"\xfei^\xd2G\x9b\xbdl\xab:\xbe,\xa9'4\x82\xe5\xee\xb3\xc1"
b'$\x93\x95\xa8Y\x16\xf5\xbf\xacw\x91\x04\x1d\x18\x06\xe9\xc5\xfdk\x06'
b'\xe8\xfck\xc5\x86>\x8b~\xa4\xcb\xf1\xb3\x04\xf1\x04G5\xe2\xcc]'
b'\x16\xbf\x140d\x18\xe2\xedw#(3\xca\xa1\x80bX\x7f\xb3\x84'
b'\x9d\xdb\xe7\x08\x97\xcd\x16\xb9\xf1\xd5r+m\x1e\xcb3q\xc5\x9e\x92'
b"\x7f\x8e*\xc7\xde\xe9\xe26\xcds\xb1\x10-\xf6r\x02?\x9d\xddCgJN'"
b'\x11M\xfa\nQ\n\xe6`m\xb8N\xbbq\x8el\x0b\x02\xc7:q\x04G\xa1T'
b'\xf1\xfe!0\x85~\xe5\x884\xe9\x89\xfb\x13J8\x15\xe42\xb6\xad'
b'\x877A\x9a\xa6\xbft]\xd0\xe35M\xb0\x0cK\xc8\xf6\x88\xae\xed\xa9,j7'
b'\x81\x13\xa0(\xcb\xe1\xe9l2\x7f\xcd\xda\x95(\xa70B\xbd\xf4\xe3'
b'hp\x94\xbdJ\xd7\t\xc7g\xffo?\x89?\xf8}\x7f\xbc\x1c\x87'
b'\x14\xc0\xcf\x8cV:\x9a\x0e\xd0\xb2\x1ck\xffk\xb9\xe0=\xc7\x8d/'
b'\xb8\xff\x7f\x1d\x87`\x19.\x98X*~\xa7j\xb9\x0b"\xf4\xe4;V`\xb9\xd7'
b'\x03\x1e\xd0t0\xd3\xde\x1fd\xb9\xe2)\x16\x81}\xb1\\b\x7fJ'
b'\x92\xf4\xff\n+V!\xe9\xde\x98\xa0\x8fK\xdf7\xb9\xc0\x12\x1f\xe2'
b'\xe9\xb0`\xae\x14\r\xa7\xc4\x81~\xd8\x8d\xc5\x06\xd8m\xb0Y\x8a)'
b'\x06/\xbb\xf9\xac\xeaP\xe0\x91\x05m[\xe5z\xe6Z\xf3\x9f\xc7\xd0'
b'\xd3\x8b\xf3\x8a\x1b\xfa\xe4Pf\xbc0\x17\x10\xa9\xd0\x95J{\xb3\xc3'
b'\xfdW\x9bop\x0f\xbe\xaee\xa3]\x93\x9c\xda\xb75<\xf6g!\xcc\xb1\xfc\\'
b'7\x152Mc\x17\x84\x9d\xcd35\r0\xacL-\xf3\xfb\xcb\x96\x1e\xe9U\x7f'
b'\xd7\xca\xb0\xcc\x89\x0c*\xce\x14\xd1P\xf1\x03\xb6.~9o?\xe8'
b'\r\x86\xe0\x92\x87}\xa3\x84\x03P\xe0\xc2\x7f\n;m\x9d\x9e\xb4|'
b'\x8c\x18\xc0#0\xfe3\x07<\xda\xd8\xcf^\xd4Hi\xd6\xb3\x0bT'
b'\x1dF\x88\x85q}\x02\xc6&\xc4\xae\xce\x9cU\xfa\x0f\xcc\xb6\x1f\x11'
b'drw\x9eN\x19\xbd\xffz\x0f\xf0\x04s\xadR\xc1\xc0\xbfl\xf1\xba\xf95^'
b'e\xb1\xfbVY\xd9\x9f\x1c\xbf*\xc4\xa86\x08+\xd6\x88[\xc4_rc\xf0f'
b'\xb8\xd4\xec\x1dx\x19|\xbf\xa7\xe0\x82\x0b\x8c~\x10L/\x90\xd6\xfb'
b'\x81\xdb\x98\xcc\x02\x14\xa5C\xb2\xa7i\xfd\xcd\x1fO\xf7\xe9\x89t\xf0'
b'\x17\xa5\x1c\xad\xfe<Q`%\x075k\n7\x9eI\x82<#)&\x04\xc2\xf0C\xd4`!'
b'\xcb\xa9\xf9\xb3F\x86\xb5\xc3M\xbeu\x12\xb2\xca\x95e\x10\x0b\xb1\xcc'
b'\x01b\x9bXa\x1b[B\x8c\x07\x11Of;\xeaC\xebr\x8eb\xd9\x9c\xe4i]<z\x9a'
b'\x03T\x8b9pF\x10\x8c\x84\xc7\x0e\xeaPw\xe5\xa0\x94\x1f\x84\xdd'
b'a\xe8\x85\xc2\x00\xebq\xe7&Wo5q8\xc2t\x98\xab\xb7\x7f\xe64-H'
b'\t\xb4d\xbe\x06\xe3Q\x8b\xa9J\xb0\x00\xd7s.\x85"\xc0p\x05'
b'\x1c\x06N\x87\xa5\xf8\xc3g\x1b}\x0f\x0f\xc3|\x90\xea\xefd3X'
b'[\xab\x04E\xf2\xf2\xc9\x08\x8a\xa8+W\xa2v\xec\x15G\x08/I<L\\1'
b'\xff\x15O\xaa\x89{\xd1mW\x13\xbd~\xe1\x90^\xc4@\r\xed\xb5D@\xb4\x08'
b'A\x90\xe69;\xc7BO\xdb\xda\xebu\x9e\xa9tN\xae\x8aJ5\xcd\x11\x1d\xea'
b"\xe5\xa7\x04\xe6\x82Z\xc7O\xe46[7\xdco*[\xbe\x0b\xc9\xb7a\xab'\xf6"
b"\xd1u\xdb\xd9q\xf5+y\x1b\x00\xb4\xf3a\xae\xf1M\xc4\xbc\xd00'\x06pQ"
b'\x8dH\xaa\xaa\xc4\xd2K\x9b\xc0\xe9\xec=n\xa9\x1a\x8a\xc2\xe8\x18\xbc'
b'\x93\xb8F\xa1\x8fOY\xe7\xda\xcf0\t\xff|\xd9\xe5\xcf\xe7\xf6\xbe'
b'\xf8\x04\x17\xf2\xe5P\xa7y~\xce\x11h0\x81\x80d[\x00_v\xbbc\xdbI'
b'3\xbc`W\xc0yrkB\xf5\x9f\xe9i\xc5\x8a^\x8d\xd4\x81\xd9\x05\xc1\xfc>'
b'"\xd1v`\x82\xd5$\x89\xcf^\xd52.\xafd\xe8d@\xaa\xd5Y|\x90\x84'
b'j\xdb}\x84riV\x8e\xf0X4rB\xf2NPS[\x8e\x88\xd4\x0fI\xb8'
b'\xdd\xcb\x1d\xf2(\xdf;9\x9e|\xef^0;.*[\x9fl\x7f\xa2_X\xaff!\xbb\x03'
b'\xff\x19\x8f\x88\xb5\xb6\x884\xa3\x05\xde3D{\xe3\xcb\xce\xe4t]'
b'\x875\xe3Uf\xae\xea\x88\x1c\x03b\n\xb1,Q\xec\xcf\x08\t\xde@\x83\xaa<'
b',-\xe4\xee\x9b\x843\xe5\x007\tK\xac\x057\xd6*X\xa3\xc6~\xba\xe6O'
b'\x81kz"\xbe\xe43sL\xf1\xfa;\xf4^\x1e\xb4\x80\xe2\xbd\xaa\x17Z\xe1f'
b'\xda\xa6\xb9\x07:]}\x9fa\x0b?\xba\xe7\xf15\x04M\xe3\n}M\xa4\xcb\r'
b'2\x8a\x88\xa9\xa7\x92\x93\x84\x81Yo\x00\xcc\xc4\xab\x9aT\x96\x0b\xbe'
b'U\xac\x1d\x8d\x1b\x98"\xf8\x8f\xf1u\xc1n\xcc\xfcA\xcc\x90\xb7i'
b'\x83\x9c\x9c~\x1d4\xa2\xf0*J\xe7t\x12\xb4\xe3\xa0u\xd7\x95Z'
b'\xf7\xafG\x96~ST,\xa7\rC\x06\xf4\xf0\xeb`2\x9e>Q\x0e\xf6\xf5\xc5'
b'\x9b\xb5\xaf\xbe\xa3\x8f\xc0\xa3hu\x14\x12 \x97\x99\x04b\x8e\xc7\x1b'
b'VKc\xc1\xf3 \xde\x85-:\xdc\x1f\xac\xce*\x06\xb3\x80;`'
b'\xdb\xdd\x97\xfdg\xbf\xe7\xa8S\x08}\xf55e7\xb8/\xf0!\xc8'
b"Y\xa8\x9a\x07'\xe2\xde\r\x02\xe1\xb2\x0c\xf4C\xcd\xf9\xcb(\xe8\x90"
b'\xd3bTD\x15_\xf6\xc3\xfb\xb3E\xfc\xd6\x98{\xc6\\fz\x81\xa99\x85\xcb'
b'\xa5\xb1\x1d\x94bqW\x1a!;z~\x18\x88\xe8i\xdb\x1b\x8d\x8d'
b'\x06\xaa\x0e\x99s+5k\x00\xe4\xffh\xfe\xdbt\xa6\x1bU\xde\xa3'
b'\xef\xcb\x86\x9e\x81\x16j\n\x9d\xbc\xbbC\x80?\x010\xc7Jj;'
b'\xc4\xe5\x86\xd5\x0e0d#\xc6;\xb8\xd1\xc7c\xb5&8?\xd9J\xe5\xden\xb9'
b'\xe9cb4\xbb\xe6\x14\xe0\xe7l\x1b\x85\x94\x1fh\xf1n\xdeZ\xbe'
b'\x88\xff\xc2e\xca\xdc,B-\x8ac\xc9\xdf\xf5|&\xe4LL\xf0\x1f\xaa8\xbd'
b'\xc26\x94bVi\xd3\x0c\x1c\xb6\xbb\x99F\x8f\x0e\xcc\x8e4\xc6/^W\xf5?'
b'\xdc\x84(\x14dO\x9aD6\x0f4\xa3,\x0c\x0bS\x9fJ\xe1\xacc^\x8a0\t\x80D['
b'\xb8\xe6\x86\xb0\xe8\xd4\xf9\x1en\xf1\xf5^\xeb\xb8\xb8\xf8'
b')\xa8\xbf\xaa\x84\x86\xb1a \x95\x16\x08\x1c\xbb@\xbd+\r/\xfb'
b'\x92\xfbh\xf1\x8d3\xf9\x92\xde`\xf1\x86\x03\xaa+\xd9\xd9\xc6P\xaf'
b'\xe3-\xea\xa5\x0fB\xca\xde\xd5n^\xe3/\xbf\xa6w\xc8\x0e<M'
b'\xc2\x1e!\xd4\xc6E\xf2\xad\x0c\xbc\x1d\x88Y\x03\x98<\x92\xd9\xa6B'
b'\xc7\x83\xb5"\x97D|&\xc4\xd4\xfad\x0e\xde\x06\xa3\xc2\x9c`\xf2'
b'7\x03\x1a\xed\xd80\x10\xe9\x0co\x10\xcf\x18\x16\xa7\x1c'
b"\xe5\x96\xa4\xd9\xe1\xa5v;]\xb7\xa9\xdc'hA\xe3\x9c&\x98\x0b9\xdf~@"
b'\xf8\xact\x87<\xf94\x0c\x9d\x93\xb0)\xe1\xa2\x0f\x1e=:&\xd56\xa5A+'
b'\xab\xc4\x00\x8d\x81\x93\xd4\xd8<\x82k\\d\xd8v\xab\xbd^l5C?\xd4\xa0'
b'M\x12C\xc8\x80\r\xc83\xe8\xc0\xf5\xdf\xca\x05\xf4BPjy\xbe\x91\x9bzE'
b"\xd8[\x93oT\r\x13\x16'\x1a\xbd*H\xd6\xfe\r\xf3\x91M\x8b\xee\x8f7f"
b"\x0b;\xaa\x85\xf2\xdd'\x0fwM \xbd\x13\xb9\xe5\xb8\xb7 D+P\x1c\xe4g"
b'n\xd2\xf1kc\x15\xaf\xc6\x90V\x03\xc2UovfZ\xcc\xd23^\xb3\xe7\xbf'
b'\xacv\x1d\x82\xedx\xa3J\xa9\xb7\xcf\x0c\xe6j\x96n*o\x18>'
b'\xc6\xfd\x97_+D{\x03\x15\xe8s\xb1\xc8HAG\xcf\xf4\x1a\xdd'
b'\xad\x11\xbf\x157q+\xdeW\x89g"X\x82\xfd~\xf7\xab4\xf6`\xab\xf1q'
b')\x82\x10K\xe9sV\xfe\xe45\xafs*\x14\xa7;\xac{\x06\x9d<@\x93G'
b'j\x1d\xefL\xe9\xd8\x92\x19&\xa1\x16\x19\x04\tu5\x01]\xf6\xf4'
b'\xcd\\\xd8A|I\xd4\xeb\x05\x88C\xc6e\xacQ\xe9*\x97~\x9au\xf8Xy'
b"\x17P\x10\x9f\n\x8c\xe2fZEu>\x9b\x1e\x91\x0b'`\xbd\xc0\xa8\x86c\x1d"
b'Z\xe2\xdc8j\x95\xffU\x90\x1e\xf4o\xbc\xe5\xe3e>\xd2R\xc0b#\xbc\x15'
b'H-\xb9!\xde\x9d\x90k\xdew\x9b{\x99\xde\xf7/K)A\xfd\xf5\xe6:\xda'
b'UM\xcc\xbb\xa2\x0b\x9a\x93\xf5{9\xc0 \xd2((6i\xc0\xbbu\xd8\x9e\x8d'
b'\xf8\x04q\x10\xd4\x14\x9e7-\xb9B\xea\x01Q8\xc8v\x9a\x12A\x88Cd\x92'
b"\x1c\x8c!\xf4\x94\x87'\xe3\xcd\xae\xf7\xd8\x93\xfa\xde\xa8b\x9e\xee2"
b'K\xdb\x00l\x9d\t\xb1|D\x05U\xbb\xf4>\xf1w\x887\xd1}W\x9d|g|1\xb0\x13'
b"\xa3 \xe5\xbfm@\xc06+\xb7\t\xcf\x15D\x9a \x1fM\x1f\xd2\xb5'\xa9\xbb"
b'~Co\x82\xfa\xc2\t\xe6f\xfc\xbeI\xae1\x8e\xbe\xb8\xcf\x86\x17'
b'\x9f\xe2`\xbd\xaf\xba\xb9\xbc\x1b\xa3\xcd\x82\x8fwc\xefd\xa9\xd5\x14'
b'\xe2C\xafUE\xb6\x11MJH\xd0=\x05\xd4*I\xff"\r\x1b^\xcaS6=\xec@\xd5'
b'\x11,\xe0\x87Gr\xaa[\xb8\xbc>n\xbd\x81\x0c\x07<\xe9\x92('
b'\xb2\xff\xac}\xe7\xb6\x15\x90\x9f~4\x9a\xe6\xd6\xd8s\xed\x99tf'
b'\xa0f\xf8\xf1\x87\t\x96/)\x85\xb6\n\xd7\xb2w\x0b\xbc\xba\x99\xee'
b'Q\xeen\x1d\xad\x03\xc3s\xd1\xfd\xa2\xc6\xb7\x9a\x9c(G<6\xad[~H '
b'\x16\x89\x89\xd0\xc3\xd2\xca~\xac\xea\xa5\xed\xe5\xfb\r:'
b'\x8e\xa6\xf1e\xbb\xba\xbd\xe0(\xa3\x89_\x01(\xb5c\xcc\x9f\x1fg'
b'v\xfd\x17\xb3\x08S=S\xee\xfc\x85>\x91\x8d\x8d\nYR\xb3G\xd1A\xa2\xb1'
b'\xec\xb0\x01\xd2\xcd\xf9\xfe\x82\x06O\xb3\xecd\xa9c\xe0\x8eP\x90\xce'
b'\xe0\xcd\xd8\xd8\xdc\x9f\xaa\x01"[Q~\xe4\x88\xa1#\xc1\x12C\xcf'
b'\xbe\x80\x11H\xbf\x86\xd8\xbem\xcfWFQ(X\x01DK\xdfB\xaa\x10.-'
b'\xd5\x9e|\x86\x15\x86N]\xc7Z\x17\xcd=\xd7)M\xde\x15\xa4LTi\xa0\x15'
b'\xd1\xe7\xbdN\xa4?\xd1\xe7\x02\xfe4\xe4O\x89\x98&\x96\x0f\x02\x9c'
b'\x9e\x19\xaa\x13u7\xbd0\xdc\xd8\x93\xf4BNE\x1d\x93\x82\x81\x16'
b'\xe5y\xcf\x98D\xca\x9a\xe2\xfd\xcdL\xcc\xd1\xfc_\x0b\x1c\xa0]\xdc'
b'\xa91 \xc9c\xd8\xbf\x97\xcfp\xe6\x19-\xad\xff\xcc\xd1N(\xe8'
b'\xeb#\x182\x96I\xf7l\xf3r\x00'
)
def test_main():
run_unittest(

View File

@ -0,0 +1,3 @@
Fix :mod:`lzma`: module decompresses data incompletely. When decompressing a
FORMAT_ALONE format file, and it doesn't have the end marker, sometimes the
last one to dozens bytes can't be output. Patch by Ma Lin.

View File

@ -872,9 +872,6 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length)
PyObject *result;
lzma_stream *lzs = &d->lzs;
if (lzs->avail_in == 0)
return PyBytes_FromStringAndSize(NULL, 0);
if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
else
@ -891,7 +888,10 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length)
Py_BEGIN_ALLOW_THREADS
lzret = lzma_code(lzs, LZMA_RUN);
data_size = (char *)lzs->next_out - PyBytes_AS_STRING(result);
if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0)
lzret = LZMA_OK; /* That wasn't a real error */
Py_END_ALLOW_THREADS
if (catch_lzma_error(lzret))
goto error;
if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK)
@ -899,15 +899,19 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length)
if (lzret == LZMA_STREAM_END) {
d->eof = 1;
break;
} else if (lzs->avail_in == 0) {
break;
} else if (lzs->avail_out == 0) {
/* Need to check lzs->avail_out before lzs->avail_in.
Maybe lzs's internal state still have a few bytes
can be output, grow the output buffer and continue
if max_lengh < 0. */
if (data_size == max_length)
break;
if (grow_buffer(&result, max_length) == -1)
goto error;
lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
lzs->avail_out = PyBytes_GET_SIZE(result) - data_size;
} else if (lzs->avail_in == 0) {
break;
}
}
if (data_size != PyBytes_GET_SIZE(result))
@ -990,7 +994,19 @@ decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
}
else if (lzs->avail_in == 0) {
lzs->next_in = NULL;
d->needs_input = 1;
if (lzs->avail_out == 0) {
/* (avail_in==0 && avail_out==0)
Maybe lzs's internal state still have a few bytes can
be output, try to output them next time. */
d->needs_input = 0;
/* if max_length < 0, lzs->avail_out always > 0 */
assert(max_length >= 0);
} else {
/* Input buffer exhausted, output buffer has space. */
d->needs_input = 1;
}
}
else {
d->needs_input = 0;