gh-99581: Fix a buffer overflow in the tokenizer when copying lines that fill the available buffer (#99605)

2022-11-20 20:20:03 +00:00 · 2022-11-20 20:20:03 +00:00 · e13d1d9dda
parent abf5b6ff43
commit e13d1d9dda
3 changed files with 25 additions and 1 deletions
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@ -10,6 +10,8 @@ from textwrap import dedent
 from unittest import TestCase, mock
 from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
                               INVALID_UNDERSCORE_LITERALS)
 from test.support import os_helper
 from test.support.script_helper import run_test_script, make_script
 import os
 import token
@ -2631,5 +2633,19 @@ async def f():
        self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
 class CTokenizerBufferTests(unittest.TestCase):
    def test_newline_at_the_end_of_buffer(self):
        # See issue 99581: Make sure that if we need to add a new line at the
        # end of the buffer, we have enough space in the buffer, specially when
        # the current line is as long as the buffer space available.
        test_script = f"""\
        #coding: latin-1
        #{"a"*10000}
        #{"a"*10002}"""
        with os_helper.temp_dir() as temp_dir:
            file_name = make_script(temp_dir, 'foo', test_script)
            run_test_script(file_name)
 if __name__ == "__main__":
    unittest.main()
--- a/Builtins/2022-11-19-22-27-52.gh-issue-99581.yKYPbf.rst
+++ b/Builtins/2022-11-19-22-27-52.gh-issue-99581.yKYPbf.rst
@ -0,0 +1,3 @@
 Fixed a bug that was causing a buffer overflow if the tokenizer copies a
 line missing the newline caracter from a file that is as long as the
 available tokenizer buffer. Patch by Pablo galindo
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@ -413,7 +413,11 @@ tok_readline_recode(struct tok_state *tok) {
        error_ret(tok);
        goto error;
    }
-    if (!tok_reserve_buf(tok, buflen + 1)) {
+    // Make room for the null terminator *and* potentially
    // an extra newline character that we may need to artificially
    // add.
    size_t buffer_size = buflen + 2;
    if (!tok_reserve_buf(tok, buffer_size)) {
        goto error;
    }
    memcpy(tok->inp, buf, buflen);
@ -1000,6 +1004,7 @@ tok_underflow_file(struct tok_state *tok) {
        return 0;
    }
    if (tok->inp[-1] != '\n') {
        assert(tok->inp + 1 < tok->end);
        /* Last line does not end in \n, fake one */
        *tok->inp++ = '\n';
        *tok->inp = '\0';