#15927: Fix cvs.reader parsing of escaped \r\n with quoting off.
This fix means that such values are correctly roundtripped, since cvs.writer already does the correct escaping. Patch by Michael Johnson.
This commit is contained in:
parent
04cbe0c35b
commit
c7c42efb16
|
@ -308,6 +308,15 @@ class Test_Csv(unittest.TestCase):
|
|||
for i, row in enumerate(csv.reader(fileobj)):
|
||||
self.assertEqual(row, rows[i])
|
||||
|
||||
def test_roundtrip_escaped_unquoted_newlines(self):
|
||||
with TemporaryFile("w+", newline='') as fileobj:
|
||||
writer = csv.writer(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")
|
||||
rows = [['a\nb','b'],['c','x\r\nd']]
|
||||
writer.writerows(rows)
|
||||
fileobj.seek(0)
|
||||
for i, row in enumerate(csv.reader(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")):
|
||||
self.assertEqual(row,rows[i])
|
||||
|
||||
class TestDialectRegistry(unittest.TestCase):
|
||||
def test_registry_badargs(self):
|
||||
self.assertRaises(TypeError, csv.list_dialects, None)
|
||||
|
|
|
@ -591,6 +591,7 @@ Orjan Johansen
|
|||
Fredrik Johansson
|
||||
Gregory K. Johnson
|
||||
Kent Johnson
|
||||
Michael Johnson
|
||||
Simon Johnston
|
||||
Matt Joiner
|
||||
Thomas Jollans
|
||||
|
|
|
@ -289,6 +289,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #15927: CVS now correctly parses escaped newlines and carriage
|
||||
when parsing with quoting turned off.
|
||||
|
||||
- Issue #17467: add readline and readlines support to mock_open in
|
||||
unittest.mock.
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ static struct PyModuleDef _csvmodule;
|
|||
typedef enum {
|
||||
START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
|
||||
IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
|
||||
EAT_CRNL
|
||||
EAT_CRNL,AFTER_ESCAPED_CRNL
|
||||
} ParserState;
|
||||
|
||||
typedef enum {
|
||||
|
@ -644,6 +644,12 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
|
|||
break;
|
||||
|
||||
case ESCAPED_CHAR:
|
||||
if (c == '\n' | c=='\r') {
|
||||
if (parse_add_char(self, c) < 0)
|
||||
return -1;
|
||||
self->state = AFTER_ESCAPED_CRNL;
|
||||
break;
|
||||
}
|
||||
if (c == '\0')
|
||||
c = '\n';
|
||||
if (parse_add_char(self, c) < 0)
|
||||
|
@ -651,6 +657,11 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
|
|||
self->state = IN_FIELD;
|
||||
break;
|
||||
|
||||
case AFTER_ESCAPED_CRNL:
|
||||
if (c == '\0')
|
||||
break;
|
||||
/*fallthru*/
|
||||
|
||||
case IN_FIELD:
|
||||
/* in unquoted field */
|
||||
if (c == '\n' || c == '\r' || c == '\0') {
|
||||
|
|
Loading…
Reference in New Issue