#15927: Fix cvs.reader parsing of escaped \r\n with quoting off.

This fix means that such values are correctly roundtripped, since
cvs.writer already does the correct escaping.

Patch by Michael Johnson.
This commit is contained in:
R David Murray 2013-03-19 22:41:47 -04:00
parent 04cbe0c35b
commit c7c42efb16
4 changed files with 25 additions and 1 deletions

View File

@ -308,6 +308,15 @@ class Test_Csv(unittest.TestCase):
for i, row in enumerate(csv.reader(fileobj)):
self.assertEqual(row, rows[i])
def test_roundtrip_escaped_unquoted_newlines(self):
with TemporaryFile("w+", newline='') as fileobj:
writer = csv.writer(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")
rows = [['a\nb','b'],['c','x\r\nd']]
writer.writerows(rows)
fileobj.seek(0)
for i, row in enumerate(csv.reader(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")):
self.assertEqual(row,rows[i])
class TestDialectRegistry(unittest.TestCase):
def test_registry_badargs(self):
self.assertRaises(TypeError, csv.list_dialects, None)

View File

@ -591,6 +591,7 @@ Orjan Johansen
Fredrik Johansson
Gregory K. Johnson
Kent Johnson
Michael Johnson
Simon Johnston
Matt Joiner
Thomas Jollans

View File

@ -289,6 +289,9 @@ Core and Builtins
Library
-------
- Issue #15927: CVS now correctly parses escaped newlines and carriage
when parsing with quoting turned off.
- Issue #17467: add readline and readlines support to mock_open in
unittest.mock.

View File

@ -51,7 +51,7 @@ static struct PyModuleDef _csvmodule;
typedef enum {
START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
EAT_CRNL
EAT_CRNL,AFTER_ESCAPED_CRNL
} ParserState;
typedef enum {
@ -644,6 +644,12 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
break;
case ESCAPED_CHAR:
if (c == '\n' | c=='\r') {
if (parse_add_char(self, c) < 0)
return -1;
self->state = AFTER_ESCAPED_CRNL;
break;
}
if (c == '\0')
c = '\n';
if (parse_add_char(self, c) < 0)
@ -651,6 +657,11 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
self->state = IN_FIELD;
break;
case AFTER_ESCAPED_CRNL:
if (c == '\0')
break;
/*fallthru*/
case IN_FIELD:
/* in unquoted field */
if (c == '\n' || c == '\r' || c == '\0') {