mirror of https://github.com/python/cpython
gh-113732: Fix support of QUOTE_NOTNULL and QUOTE_STRINGS in csv.reader (GH-113738)
This commit is contained in:
parent
58f883b91b
commit
ea30a28c3e
|
@ -690,7 +690,7 @@ csv
|
|||
|
||||
* Add :const:`csv.QUOTE_NOTNULL` and :const:`csv.QUOTE_STRINGS` flags to
|
||||
provide finer grained control of ``None`` and empty strings by
|
||||
:class:`csv.writer` objects.
|
||||
:class:`~csv.reader` and :class:`~csv.writer` objects.
|
||||
|
||||
dis
|
||||
---
|
||||
|
|
|
@ -392,10 +392,26 @@ class Test_Csv(unittest.TestCase):
|
|||
# will this fail where locale uses comma for decimals?
|
||||
self._read_test([',3,"5",7.3, 9'], [['', 3, '5', 7.3, 9]],
|
||||
quoting=csv.QUOTE_NONNUMERIC)
|
||||
self._read_test([',3,"5",7.3, 9'], [[None, '3', '5', '7.3', ' 9']],
|
||||
quoting=csv.QUOTE_NOTNULL)
|
||||
self._read_test([',3,"5",7.3, 9'], [[None, 3, '5', 7.3, 9]],
|
||||
quoting=csv.QUOTE_STRINGS)
|
||||
|
||||
self._read_test([',,"",'], [['', '', '', '']])
|
||||
self._read_test([',,"",'], [['', '', '', '']],
|
||||
quoting=csv.QUOTE_NONNUMERIC)
|
||||
self._read_test([',,"",'], [[None, None, '', None]],
|
||||
quoting=csv.QUOTE_NOTNULL)
|
||||
self._read_test([',,"",'], [[None, None, '', None]],
|
||||
quoting=csv.QUOTE_STRINGS)
|
||||
|
||||
self._read_test(['"a\nb", 7'], [['a\nb', ' 7']])
|
||||
self.assertRaises(ValueError, self._read_test,
|
||||
['abc,3'], [[]],
|
||||
quoting=csv.QUOTE_NONNUMERIC)
|
||||
self.assertRaises(ValueError, self._read_test,
|
||||
['abc,3'], [[]],
|
||||
quoting=csv.QUOTE_STRINGS)
|
||||
self._read_test(['1,@,3,@,5'], [['1', ',3,', '5']], quotechar='@')
|
||||
self._read_test(['1,\0,3,\0,5'], [['1', ',3,', '5']], quotechar='\0')
|
||||
|
||||
|
@ -403,6 +419,15 @@ class Test_Csv(unittest.TestCase):
|
|||
self._read_test(['no space, space, spaces,\ttab'],
|
||||
[['no space', 'space', 'spaces', '\ttab']],
|
||||
skipinitialspace=True)
|
||||
self._read_test([' , , '],
|
||||
[['', '', '']],
|
||||
skipinitialspace=True)
|
||||
self._read_test([' , , '],
|
||||
[[None, None, None]],
|
||||
skipinitialspace=True, quoting=csv.QUOTE_NOTNULL)
|
||||
self._read_test([' , , '],
|
||||
[[None, None, None]],
|
||||
skipinitialspace=True, quoting=csv.QUOTE_STRINGS)
|
||||
|
||||
def test_read_bigfield(self):
|
||||
# This exercises the buffer realloc functionality and field size
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Fix support of :data:`~csv.QUOTE_NOTNULL` and :data:`~csv.QUOTE_STRINGS` in
|
||||
:func:`csv.reader`.
|
|
@ -131,7 +131,7 @@ typedef struct {
|
|||
Py_UCS4 *field; /* temporary buffer */
|
||||
Py_ssize_t field_size; /* size of allocated buffer */
|
||||
Py_ssize_t field_len; /* length of current field */
|
||||
int numeric_field; /* treat field as numeric */
|
||||
bool unquoted_field; /* true if no quotes around the current field */
|
||||
unsigned long line_num; /* Source-file line number */
|
||||
} ReaderObj;
|
||||
|
||||
|
@ -644,22 +644,33 @@ _call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
|
|||
static int
|
||||
parse_save_field(ReaderObj *self)
|
||||
{
|
||||
int quoting = self->dialect->quoting;
|
||||
PyObject *field;
|
||||
|
||||
field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
(void *) self->field, self->field_len);
|
||||
if (field == NULL)
|
||||
return -1;
|
||||
self->field_len = 0;
|
||||
if (self->numeric_field) {
|
||||
PyObject *tmp;
|
||||
|
||||
self->numeric_field = 0;
|
||||
tmp = PyNumber_Float(field);
|
||||
Py_DECREF(field);
|
||||
if (tmp == NULL)
|
||||
if (self->unquoted_field &&
|
||||
self->field_len == 0 &&
|
||||
(quoting == QUOTE_NOTNULL || quoting == QUOTE_STRINGS))
|
||||
{
|
||||
field = Py_NewRef(Py_None);
|
||||
}
|
||||
else {
|
||||
field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
(void *) self->field, self->field_len);
|
||||
if (field == NULL) {
|
||||
return -1;
|
||||
field = tmp;
|
||||
}
|
||||
if (self->unquoted_field &&
|
||||
self->field_len != 0 &&
|
||||
(quoting == QUOTE_NONNUMERIC || quoting == QUOTE_STRINGS))
|
||||
{
|
||||
PyObject *tmp = PyNumber_Float(field);
|
||||
Py_DECREF(field);
|
||||
if (tmp == NULL) {
|
||||
return -1;
|
||||
}
|
||||
field = tmp;
|
||||
}
|
||||
self->field_len = 0;
|
||||
}
|
||||
if (PyList_Append(self->fields, field) < 0) {
|
||||
Py_DECREF(field);
|
||||
|
@ -721,6 +732,7 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
|
|||
/* fallthru */
|
||||
case START_FIELD:
|
||||
/* expecting field */
|
||||
self->unquoted_field = true;
|
||||
if (c == '\n' || c == '\r' || c == EOL) {
|
||||
/* save empty field - return [fields] */
|
||||
if (parse_save_field(self) < 0)
|
||||
|
@ -730,10 +742,12 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
|
|||
else if (c == dialect->quotechar &&
|
||||
dialect->quoting != QUOTE_NONE) {
|
||||
/* start quoted field */
|
||||
self->unquoted_field = false;
|
||||
self->state = IN_QUOTED_FIELD;
|
||||
}
|
||||
else if (c == dialect->escapechar) {
|
||||
/* possible escaped character */
|
||||
self->unquoted_field = false;
|
||||
self->state = ESCAPED_CHAR;
|
||||
}
|
||||
else if (c == ' ' && dialect->skipinitialspace)
|
||||
|
@ -746,8 +760,6 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
|
|||
}
|
||||
else {
|
||||
/* begin new unquoted field */
|
||||
if (dialect->quoting == QUOTE_NONNUMERIC)
|
||||
self->numeric_field = 1;
|
||||
if (parse_add_char(self, module_state, c) < 0)
|
||||
return -1;
|
||||
self->state = IN_FIELD;
|
||||
|
@ -892,7 +904,7 @@ parse_reset(ReaderObj *self)
|
|||
return -1;
|
||||
self->field_len = 0;
|
||||
self->state = START_RECORD;
|
||||
self->numeric_field = 0;
|
||||
self->unquoted_field = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue