From b4a0417e9112126070316d21cb1f54a7c365a24c Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Thu, 20 Mar 2003 23:29:12 +0000 Subject: [PATCH] new CSV file processing module - see PEP 305 --- Doc/lib/libcsv.tex | 281 ++++++++ Lib/csv/__init__.py | 1 + Lib/csv/csv.py | 138 ++++ Lib/test/test_csv.py | 619 ++++++++++++++++++ Modules/_csv.c | 1465 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 2504 insertions(+) create mode 100644 Doc/lib/libcsv.tex create mode 100644 Lib/csv/__init__.py create mode 100644 Lib/csv/csv.py create mode 100644 Lib/test/test_csv.py create mode 100644 Modules/_csv.c diff --git a/Doc/lib/libcsv.tex b/Doc/lib/libcsv.tex new file mode 100644 index 00000000000..283e4015213 --- /dev/null +++ b/Doc/lib/libcsv.tex @@ -0,0 +1,281 @@ +\section{\module{csv} --- CSV File Reading and Writing} + +\declaremodule{standard}{csv} +\modulesynopsis{Write and read tabular data to and from delimited files.} + +\versionadded{2.3} +\index{csv} +\indexii{data}{tabular} + +The so-called CSV (Comma Separated Values) format is the most common import +and export format for spreadsheets and databases. There is no ``CSV +standard'', so the format is operationally defined by the many applications +which read and write it. The lack of a standard means that subtle +differences often exist in the data produced and consumed by different +applications. These differences can make it annoying to process CSV files +from multiple sources. Still, while the delimiters and quoting characters +vary, the overall format is similar enough that it is possible to write a +single module which can efficiently manipulate such data, hiding the details +of reading and writing the data from the programmer. + +The \module{csv} module implements classes to read and write tabular data in +CSV format. It allows programmers to say, ``write this data in the format +preferred by Excel,'' or ``read data from this file which was generated by +Excel,'' without knowing the precise details of the CSV format used by +Excel. Programmers can also describe the CSV formats understood by other +applications or define their own special-purpose CSV formats. + +The \module{csv} module's \class{reader} and \class{writer} objects read and +write sequences. Programmers can also read and write data in dictionary +form using the \class{DictReader} and \class{DictWriter} classes. + +\note{The first version of the \module{csv} module doesn't support Unicode +input. Also, there are currently some issues regarding \ASCII{} NUL +characters. Accordingly, all input should generally be plain \ASCII{} to be +safe. These restrictions will be removed in the future.} + +\begin{seealso} +% \seemodule{array}{Arrays of uniformly types numeric values.} + \seepep{305}{CSV File API} + {The Python Enhancement Proposal which proposed this addition + to Python.} +\end{seealso} + + +\subsection{Module Contents} + + +The \module{csv} module defines the following functions: + +\begin{funcdesc}{reader}{csvfile\optional{, + dialect=\code{'excel'}\optional{, fmtparam}}} +Return a reader object which will iterate over lines in the given +{}\var{csvfile}. \var{csvfile} can be any object which supports the +iterator protocol and returns a string each time its \method{next} +method is called. An optional \var{dialect} parameter can be given +which is used to define a set of parameters specific to a particular CSV +dialect. It may be an instance of a subclass of the \class{Dialect} +class or one of the strings returned by the \function{list_dialects} +function. The other optional {}\var{fmtparam} keyword arguments can be +given to override individual formatting parameters in the current +dialect. For more information about the dialect and formatting +parameters, see section~\ref{fmt-params}, ``Dialects and Formatting +Parameters'' for details of these parameters. + +All data read are returned as strings. No automatic data type +conversion is performed. +\end{funcdesc} + +\begin{funcdesc}{writer}{csvfile\optional{, + dialect=\code{'excel'}\optional{, fmtparam}}} +Return a writer object responsible for converting the user's data into +delimited strings on the given file-like object. An optional +{}\var{dialect} parameter can be given which is used to define a set of +parameters specific to a particular CSV dialect. It may be an instance +of a subclass of the \class{Dialect} class or one of the strings +returned by the \function{list_dialects} function. The other optional +{}\var{fmtparam} keyword arguments can be given to override individual +formatting parameters in the current dialect. For more information +about the dialect and formatting parameters, see +section~\ref{fmt-params}, ``Dialects and Formatting Parameters'' for +details of these parameters. To make it as easy as possible to +interface with modules which implement the DB API, the value +\constant{None} is written as the empty string. While this isn't a +reversible transformation, it makes it easier to dump SQL NULL data values +to CSV files without preprocessing the data returned from a +\code{cursor.fetch*()} call. All other non-string data are stringified +with \function{str()} before being written. +\end{funcdesc} + +\begin{funcdesc}{register_dialect}{name, dialect} +Associate \var{dialect} with \var{name}. \var{dialect} must be a subclass +of \class{csv.Dialect}. \var{name} must be a string or Unicode object. +\end{funcdesc} + +\begin{funcdesc}{unregister_dialect}{name} +Delete the dialect associated with \var{name} from the dialect registry. An +\exception{Error} is raised if \var{name} is not a registered dialect +name. +\end{funcdesc} + +\begin{funcdesc}{get_dialect}{name} +Return the dialect associated with \var{name}. An \exception{Error} is +raised if \var{name} is not a registered dialect name. +\end{funcdesc} + +\begin{funcdesc}{list_dialects}{} +Return the names of all registered dialects. +\end{funcdesc} + + +The \module{csv} module defines the following classes: + +\begin{classdesc}{DictReader}{csvfile, fieldnames\optional{, + restkey=\code{None}\optional{, + restval=\code{None}\optional{, + dialect=\code{'excel'}\optional{, + fmtparam}}}}} +Create an object which operates like a regular reader but maps the +information read into a dict whose keys are given by the \var{fieldnames} +parameter. If the row read has fewer fields than the fieldnames sequence, +the value of \var{restval} will be used as the default value. If the row +read has more fields than the fieldnames sequence, the remaining data is +added as a sequence keyed by the value of \var{restkey}. If the row read +has fewer fields than the fieldnames sequence, the remaining keys take the +value of the optiona \var{restval} parameter. All other parameters are +interpreted as for regular readers. +\end{classdesc} + + +\begin{classdesc}{DictWriter}{csvfile, fieldnames\optional{, + restval=""\optional{, + extrasaction=\code{'raise'}\optional{, + dialect=\code{'excel'}\optional{, fmtparam}}}}} +Create an object which operates like a regular writer but maps dictionaries +onto output rows. The \var{fieldnames} parameter identifies the order in +which values in the dictionary passed to the \method{writerow()} method are +written to the \var{csvfile}. The optional \var{restval} parameter +specifies the value to be written if the dictionary is missing a key in +\var{fieldnames}. If the dictionary passed to the \method{writerow()} +method contains a key not found in \var{fieldnames}, the optional +\var{extrasaction} parameter indicates what action to take. If it is set +to \code{'raise'} a \exception{ValueError} is raised. If it is set to +\code{'ignore'}, extra values in the dictionary are ignored. All other +parameters are interpreted as for regular writers. +\end{classdesc} + + +\begin{classdesc*}{Dialect}{} +The \class{Dialect} class is a container class relied on primarily for its +attributes, which are used to define the parameters for a specific +\class{reader} or \class{writer} instance. Dialect objects support the +following data attributes: + +\begin{memberdesc}[string]{delimiter} +A one-character string used to separate fields. It defaults to \code{","}. +\end{memberdesc} + +\begin{memberdesc}[boolean]{doublequote} +Controls how instances of \var{quotechar} appearing inside a field should be +themselves be quoted. When \constant{True}, the character is doubledd. +When \constant{False}, the \var{escapechar} must be a one-character string +which is used as a prefix to the \var{quotechar}. It defaults to +\constant{True}. +\end{memberdesc} + +\begin{memberdesc}{escapechar} +A one-character string used to escape the \var{delimiter} if \var{quoting} +is set to \constant{QUOTE_NONE}. It defaults to \constant{None}. +\end{memberdesc} + +\begin{memberdesc}[string]{lineterminator} +The string used to terminate lines in the CSV file. It defaults to +\code{"\e r\e n"}. +\end{memberdesc} + +\begin{memberdesc}[string]{quotechar} +A one-character string used to quote elements containing the \var{delimiter} +or which start with the \var{quotechar}. It defaults to \code{'"'}. +\end{memberdesc} + +\begin{memberdesc}[integer]{quoting} +Controls when quotes should be generated by the writer. It can take on any +of the \code{QUOTE_*} constants defined below and defaults to +\constant{QUOTE_MINIMAL}. +\end{memberdesc} + +\begin{memberdesc}[boolean]{skipinitialspace} +When \constant{True}, whitespace immediately following the \var{delimiter} +is ignored. The default is \constant{False}. +\end{memberdesc} + +\end{classdesc*} + +The \module{csv} module defines the following constants: + +\begin{datadesc}{QUOTE_ALWAYS} +Instructs \class{writer} objects to quote all fields. +\end{datadesc} + +\begin{datadesc}{QUOTE_MINIMAL} +Instructs \class{writer} objects to only quote those fields which contain +the current \var{delimiter} or begin with the current \var{quotechar}. +\end{datadesc} + +\begin{datadesc}{QUOTE_NONNUMERIC} +Instructs \class{writer} objects to quote all non-numeric fields. +\end{datadesc} + +\begin{datadesc}{QUOTE_NONE} +Instructs \class{writer} objects to never quote fields. When the current +\var{delimiter} occurs in output data it is preceded by the current +\var{escapechar} character. When \constant{QUOTE_NONE} is in effect, it +is an error not to have a single-character \var{escapechar} defined, even if +no data to be written contains the \var{delimiter} character. +\end{datadesc} + + +The \module{csv} module defines the following exception: + +\begin{excdesc}{Error} +Raised by any of the functions when an error is detected. +\end{excdesc} + + +\subsection{Dialects and Formatting Parameters\label{fmt-params}} + +To make it easier to specify the format of input and output records, +specific formatting parameters are grouped together into dialects. A +dialect is a subclass of the \class{Dialect} class having a set of specific +methods and a single \method{validate()} method. When creating \class{reader} +or \class{writer} objects, the programmer can specify a string or a subclass +of the \class{Dialect} class as the dialect parameter. In addition to, or +instead of, the \var{dialect} parameter, the programmer can also specify +individual formatting parameters, which have the same names as the +attributes defined above for the \class{Dialect} class. + + +\subsection{Reader Objects} + +\class{DictReader} and \var{reader} objects have the following public +methods: + +\begin{methoddesc}{next}{} +Return the next row of the reader's iterable object as a list, parsed +according to the current dialect. +\end{methoddesc} + + +\subsection{Writer Objects} + +\class{DictWriter} and \var{writer} objects have the following public +methods: + +\begin{methoddesc}{writerow}{row} +Write the \var{row} parameter to the writer's file object, formatted +according to the current dialect. +\end{methoddesc} + +\begin{methoddesc}{writerows}{rows} +Write all the \var{rows} parameters to the writer's file object, formatted +according to the current dialect. +\end{methoddesc} + + +\subsection{Examples} + +The ``Hello, world'' of csv reading is + +\begin{verbatim} + reader = csv.reader(file("some.csv")) + for row in reader: + print row +\end{verbatim} + +The corresponding simplest possible writing example is + +\begin{verbatim} + writer = csv.writer(file("some.csv", "w")) + for row in someiterable: + writer.writerow(row) +\end{verbatim} diff --git a/Lib/csv/__init__.py b/Lib/csv/__init__.py new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/Lib/csv/__init__.py @@ -0,0 +1 @@ + diff --git a/Lib/csv/csv.py b/Lib/csv/csv.py new file mode 100644 index 00000000000..a3ea4e20a63 --- /dev/null +++ b/Lib/csv/csv.py @@ -0,0 +1,138 @@ +from _csv import Error, __version__, writer, reader, register_dialect, \ + unregister_dialect, get_dialect, list_dialects, \ + QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \ + __doc__ + +__all__ = [ "QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE", + "Error", "Dialect", "excel", "excel_tab", "reader", "writer", + "register_dialect", "get_dialect", "list_dialects", + "unregister_dialect", "__version__", "DictReader", "DictWriter" ] + +class Dialect: + _name = "" + _valid = False + # placeholders + delimiter = None + quotechar = None + escapechar = None + doublequote = None + skipinitialspace = None + lineterminator = None + quoting = None + + def __init__(self): + if self.__class__ != Dialect: + self._valid = True + errors = self._validate() + if errors != []: + raise Error, "Dialect did not validate: %s" % ", ".join(errors) + + def _validate(self): + errors = [] + if not self._valid: + errors.append("can't directly instantiate Dialect class") + + if self.delimiter is None: + errors.append("delimiter character not set") + elif (not isinstance(self.delimiter, str) or + len(self.delimiter) > 1): + errors.append("delimiter must be one-character string") + + if self.quotechar is None: + if self.quoting != QUOTE_NONE: + errors.append("quotechar not set") + elif (not isinstance(self.quotechar, str) or + len(self.quotechar) > 1): + errors.append("quotechar must be one-character string") + + if self.lineterminator is None: + errors.append("lineterminator not set") + elif not isinstance(self.lineterminator, str): + errors.append("lineterminator must be a string") + + if self.doublequote not in (True, False): + errors.append("doublequote parameter must be True or False") + + if self.skipinitialspace not in (True, False): + errors.append("skipinitialspace parameter must be True or False") + + if self.quoting is None: + errors.append("quoting parameter not set") + + if self.quoting is QUOTE_NONE: + if (not isinstance(self.escapechar, (unicode, str)) or + len(self.escapechar) > 1): + errors.append("escapechar must be a one-character string or unicode object") + + return errors + +class excel(Dialect): + delimiter = ',' + quotechar = '"' + doublequote = True + skipinitialspace = False + lineterminator = '\r\n' + quoting = QUOTE_MINIMAL +register_dialect("excel", excel) + +class excel_tab(excel): + delimiter = '\t' +register_dialect("excel-tab", excel_tab) + + +class DictReader: + def __init__(self, f, fieldnames, restkey=None, restval=None, + dialect="excel", *args): + self.fieldnames = fieldnames # list of keys for the dict + self.restkey = restkey # key to catch long rows + self.restval = restval # default value for short rows + self.reader = reader(f, dialect, *args) + + def __iter__(self): + return self + + def next(self): + row = self.reader.next() + # unlike the basic reader, we prefer not to return blanks, + # because we will typically wind up with a dict full of None + # values + while row == []: + row = self.reader.next() + d = dict(zip(self.fieldnames, row)) + lf = len(self.fieldnames) + lr = len(row) + if lf < lr: + d[self.restkey] = row[lf:] + elif lf > lr: + for key in self.fieldnames[lr:]: + d[key] = self.restval + return d + + +class DictWriter: + def __init__(self, f, fieldnames, restval="", extrasaction="raise", + dialect="excel", *args): + self.fieldnames = fieldnames # list of keys for the dict + self.restval = restval # for writing short dicts + if extrasaction.lower() not in ("raise", "ignore"): + raise ValueError, \ + ("extrasaction (%s) must be 'raise' or 'ignore'" % + extrasaction) + self.extrasaction = extrasaction + self.writer = writer(f, dialect, *args) + + def _dict_to_list(self, rowdict): + if self.extrasaction == "raise": + for k in rowdict.keys(): + if k not in self.fieldnames: + raise ValueError, "dict contains fields not in fieldnames" + return [rowdict.get(key, self.restval) for key in self.fieldnames] + + def writerow(self, rowdict): + return self.writer.writerow(self._dict_to_list(rowdict)) + + def writerows(self, rowdicts): + rows = [] + for rowdict in rowdicts: + rows.append(self._dict_to_list(rowdict)) + return self.writer.writerows(rows) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py new file mode 100644 index 00000000000..64456db482f --- /dev/null +++ b/Lib/test/test_csv.py @@ -0,0 +1,619 @@ +# Copyright (C) 2001,2002 Python Software Foundation +# csv package unit tests + +import sys +import unittest +from StringIO import StringIO +from csv import csv +import gc + +class Test_Csv(unittest.TestCase): + """ + Test the underlying C csv parser in ways that are not appropriate + from the high level interface. Further tests of this nature are done + in TestDialectRegistry. + """ + def test_reader_arg_valid(self): + self.assertRaises(TypeError, csv.reader) + self.assertRaises(TypeError, csv.reader, None) + self.assertRaises(AttributeError, csv.reader, [], bad_attr = 0) + self.assertRaises(csv.Error, csv.reader, [], 'foo') + class BadClass: + def __init__(self): + raise IOError + self.assertRaises(IOError, csv.reader, [], BadClass) + self.assertRaises(TypeError, csv.reader, [], None) + class BadDialect: + bad_attr = 0 + self.assertRaises(AttributeError, csv.reader, [], BadDialect) + + def test_writer_arg_valid(self): + self.assertRaises(TypeError, csv.writer) + self.assertRaises(TypeError, csv.writer, None) + self.assertRaises(AttributeError, csv.writer, StringIO(), bad_attr = 0) + + def _test_attrs(self, obj): + self.assertEqual(obj.dialect.delimiter, ',') + obj.dialect.delimiter = '\t' + self.assertEqual(obj.dialect.delimiter, '\t') + self.assertRaises(TypeError, delattr, obj.dialect, 'delimiter') + self.assertRaises(TypeError, setattr, obj.dialect, + 'lineterminator', None) + obj.dialect.escapechar = None + self.assertEqual(obj.dialect.escapechar, None) + self.assertRaises(TypeError, delattr, obj.dialect, 'quoting') + self.assertRaises(TypeError, setattr, obj.dialect, 'quoting', None) + obj.dialect.quoting = csv.QUOTE_MINIMAL + self.assertEqual(obj.dialect.quoting, csv.QUOTE_MINIMAL) + + def test_reader_attrs(self): + self._test_attrs(csv.reader([])) + + def test_writer_attrs(self): + self._test_attrs(csv.writer(StringIO())) + + def _write_test(self, fields, expect, **kwargs): + fileobj = StringIO() + writer = csv.writer(fileobj, **kwargs) + writer.writerow(fields) + self.assertEqual(fileobj.getvalue(), + expect + writer.dialect.lineterminator) + + def test_write_arg_valid(self): + self.assertRaises(csv.Error, self._write_test, None, '') + self._write_test((), '') + self._write_test([None], '""') + self.assertRaises(csv.Error, self._write_test, + [None], None, quoting = csv.QUOTE_NONE) + # Check that exceptions are passed up the chain + class BadList: + def __len__(self): + return 10; + def __getitem__(self, i): + if i > 2: + raise IOError + self.assertRaises(IOError, self._write_test, BadList(), '') + class BadItem: + def __str__(self): + raise IOError + self.assertRaises(IOError, self._write_test, [BadItem()], '') + + def test_write_bigfield(self): + # This exercises the buffer realloc functionality + bigstring = 'X' * 50000 + self._write_test([bigstring,bigstring], '%s,%s' % \ + (bigstring, bigstring)) + + def test_write_quoting(self): + self._write_test(['a','1','p,q'], 'a,1,"p,q"') + self.assertRaises(csv.Error, + self._write_test, + ['a','1','p,q'], 'a,1,"p,q"', + quoting = csv.QUOTE_NONE) + self._write_test(['a','1','p,q'], 'a,1,"p,q"', + quoting = csv.QUOTE_MINIMAL) + self._write_test(['a','1','p,q'], '"a",1,"p,q"', + quoting = csv.QUOTE_NONNUMERIC) + self._write_test(['a','1','p,q'], '"a","1","p,q"', + quoting = csv.QUOTE_ALL) + + def test_write_escape(self): + self._write_test(['a','1','p,q'], 'a,1,"p,q"', + escapechar='\\') +# FAILED - needs to be fixed [am]: +# self._write_test(['a','1','p,"q"'], 'a,1,"p,\\"q\\"', +# escapechar='\\', doublequote = 0) + self._write_test(['a','1','p,q'], 'a,1,p\\,q', + escapechar='\\', quoting = csv.QUOTE_NONE) + + def test_writerows(self): + class BrokenFile: + def write(self, buf): + raise IOError + writer = csv.writer(BrokenFile()) + self.assertRaises(IOError, writer.writerows, [['a']]) + fileobj = StringIO() + writer = csv.writer(fileobj) + self.assertRaises(TypeError, writer.writerows, None) + writer.writerows([['a','b'],['c','d']]) + self.assertEqual(fileobj.getvalue(), "a,b\r\nc,d\r\n") + + def _read_test(self, input, expect, **kwargs): + reader = csv.reader(input, **kwargs) + result = list(reader) + self.assertEqual(result, expect) + + def test_read_oddinputs(self): + self._read_test([], []) + self._read_test([''], [[]]) + self.assertRaises(csv.Error, self._read_test, + ['"ab"c'], None, strict = 1) + # cannot handle null bytes for the moment + self.assertRaises(csv.Error, self._read_test, + ['ab\0c'], None, strict = 1) + self._read_test(['"ab"c'], [['abc']], doublequote = 0) + + def test_read_eol(self): + self._read_test(['a,b'], [['a','b']]) + self._read_test(['a,b\n'], [['a','b']]) + self._read_test(['a,b\r\n'], [['a','b']]) + self._read_test(['a,b\r'], [['a','b']]) + self.assertRaises(csv.Error, self._read_test, ['a,b\rc,d'], []) + self.assertRaises(csv.Error, self._read_test, ['a,b\nc,d'], []) + self.assertRaises(csv.Error, self._read_test, ['a,b\r\nc,d'], []) + + def test_read_escape(self): + self._read_test(['a,\\b,c'], [['a', '\\b', 'c']], escapechar='\\') + self._read_test(['a,b\\,c'], [['a', 'b,c']], escapechar='\\') + self._read_test(['a,"b\\,c"'], [['a', 'b,c']], escapechar='\\') + self._read_test(['a,"b,\\c"'], [['a', 'b,\\c']], escapechar='\\') + self._read_test(['a,"b,c\\""'], [['a', 'b,c"']], escapechar='\\') + self._read_test(['a,"b,c"\\'], [['a', 'b,c\\']], escapechar='\\') + + def test_read_bigfield(self): + # This exercises the buffer realloc functionality + bigstring = 'X' * 50000 + bigline = '%s,%s' % (bigstring, bigstring) + self._read_test([bigline], [[bigstring, bigstring]]) + +class TestDialectRegistry(unittest.TestCase): + def test_registry_badargs(self): + self.assertRaises(TypeError, csv.list_dialects, None) + self.assertRaises(TypeError, csv.get_dialect) + self.assertRaises(csv.Error, csv.get_dialect, None) + self.assertRaises(csv.Error, csv.get_dialect, "nonesuch") + self.assertRaises(TypeError, csv.unregister_dialect) + self.assertRaises(csv.Error, csv.unregister_dialect, None) + self.assertRaises(csv.Error, csv.unregister_dialect, "nonesuch") + self.assertRaises(TypeError, csv.register_dialect, None) + self.assertRaises(TypeError, csv.register_dialect, None, None) + self.assertRaises(TypeError, csv.register_dialect, "nonesuch", None) + class bogus: + def __init__(self): + raise KeyError + self.assertRaises(KeyError, csv.register_dialect, "nonesuch", bogus) + + def test_registry(self): + class myexceltsv(csv.excel): + delimiter = "\t" + name = "myexceltsv" + expected_dialects = csv.list_dialects() + [name] + expected_dialects.sort() + csv.register_dialect(name, myexceltsv) + try: + self.failUnless(isinstance(csv.get_dialect(name), myexceltsv)) + got_dialects = csv.list_dialects() + got_dialects.sort() + self.assertEqual(expected_dialects, got_dialects) + finally: + csv.unregister_dialect(name) + + def test_incomplete_dialect(self): + class myexceltsv(csv.Dialect): + delimiter = "\t" + self.assertRaises(csv.Error, myexceltsv) + + def test_space_dialect(self): + class space(csv.excel): + delimiter = " " + quoting = csv.QUOTE_NONE + escapechar = "\\" + + s = StringIO("abc def\nc1ccccc1 benzene\n") + rdr = csv.reader(s, dialect=space()) + self.assertEqual(rdr.next(), ["abc", "def"]) + self.assertEqual(rdr.next(), ["c1ccccc1", "benzene"]) + + def test_dialect_apply(self): + class testA(csv.excel): + delimiter = "\t" + class testB(csv.excel): + delimiter = ":" + class testC(csv.excel): + delimiter = "|" + + csv.register_dialect('testC', testC) + try: + fileobj = StringIO() + writer = csv.writer(fileobj) + writer.writerow([1,2,3]) + self.assertEqual(fileobj.getvalue(), "1,2,3\r\n") + + fileobj = StringIO() + writer = csv.writer(fileobj, testA) + writer.writerow([1,2,3]) + self.assertEqual(fileobj.getvalue(), "1\t2\t3\r\n") + + fileobj = StringIO() + writer = csv.writer(fileobj, dialect=testB()) + writer.writerow([1,2,3]) + self.assertEqual(fileobj.getvalue(), "1:2:3\r\n") + + fileobj = StringIO() + writer = csv.writer(fileobj, dialect='testC') + writer.writerow([1,2,3]) + self.assertEqual(fileobj.getvalue(), "1|2|3\r\n") + + fileobj = StringIO() + writer = csv.writer(fileobj, dialect=testA, delimiter=';') + writer.writerow([1,2,3]) + self.assertEqual(fileobj.getvalue(), "1;2;3\r\n") + finally: + csv.unregister_dialect('testC') + + def test_bad_dialect(self): + # Unknown parameter + self.assertRaises(AttributeError, csv.reader, [], bad_attr = 0) + # Bad values + self.assertRaises(TypeError, csv.reader, [], delimiter = None) + self.assertRaises(TypeError, csv.reader, [], quoting = -1) + self.assertRaises(TypeError, csv.reader, [], quoting = 100) + +class TestCsvBase(unittest.TestCase): + def readerAssertEqual(self, input, expected_result): + reader = csv.reader(StringIO(input), dialect = self.dialect) + fields = list(reader) + self.assertEqual(fields, expected_result) + + def writerAssertEqual(self, input, expected_result): + fileobj = StringIO() + writer = csv.writer(fileobj, dialect = self.dialect) + writer.writerows(input) + self.assertEqual(fileobj.getvalue(), expected_result) + +class TestDialectExcel(TestCsvBase): + dialect = 'excel' + + def test_single(self): + self.readerAssertEqual('abc', [['abc']]) + + def test_simple(self): + self.readerAssertEqual('1,2,3,4,5', [['1','2','3','4','5']]) + + def test_blankline(self): + self.readerAssertEqual('', []) + + def test_empty_fields(self): + self.readerAssertEqual(',', [['', '']]) + + def test_singlequoted(self): + self.readerAssertEqual('""', [['']]) + + def test_singlequoted_left_empty(self): + self.readerAssertEqual('"",', [['','']]) + + def test_singlequoted_right_empty(self): + self.readerAssertEqual(',""', [['','']]) + + def test_single_quoted_quote(self): + self.readerAssertEqual('""""', [['"']]) + + def test_quoted_quotes(self): + self.readerAssertEqual('""""""', [['""']]) + + def test_inline_quote(self): + self.readerAssertEqual('a""b', [['a""b']]) + + def test_inline_quotes(self): + self.readerAssertEqual('a"b"c', [['a"b"c']]) + + def test_quotes_and_more(self): + self.readerAssertEqual('"a"b', [['ab']]) + + def test_lone_quote(self): + self.readerAssertEqual('a"b', [['a"b']]) + + def test_quote_and_quote(self): + self.readerAssertEqual('"a" "b"', [['a "b"']]) + + def test_space_and_quote(self): + self.readerAssertEqual(' "a"', [[' "a"']]) + + def test_quoted(self): + self.readerAssertEqual('1,2,3,"I think, therefore I am",5,6', + [['1', '2', '3', + 'I think, therefore I am', + '5', '6']]) + + def test_quoted_quote(self): + self.readerAssertEqual('1,2,3,"""I see,"" said the blind man","as he picked up his hammer and saw"', + [['1', '2', '3', + '"I see," said the blind man', + 'as he picked up his hammer and saw']]) + + def test_quoted_nl(self): + input = '''\ +1,2,3,"""I see,"" +said the blind man","as he picked up his +hammer and saw" +9,8,7,6''' + self.readerAssertEqual(input, + [['1', '2', '3', + '"I see,"\nsaid the blind man', + 'as he picked up his\nhammer and saw'], + ['9','8','7','6']]) + + def test_dubious_quote(self): + self.readerAssertEqual('12,12,1",', [['12', '12', '1"', '']]) + + def test_null(self): + self.writerAssertEqual([], '') + + def test_single(self): + self.writerAssertEqual([['abc']], 'abc\r\n') + + def test_simple(self): + self.writerAssertEqual([[1, 2, 'abc', 3, 4]], '1,2,abc,3,4\r\n') + + def test_quotes(self): + self.writerAssertEqual([[1, 2, 'a"bc"', 3, 4]], '1,2,"a""bc""",3,4\r\n') + + def test_quote_fieldsep(self): + self.writerAssertEqual([['abc,def']], '"abc,def"\r\n') + + def test_newlines(self): + self.writerAssertEqual([[1, 2, 'a\nbc', 3, 4]], '1,2,"a\nbc",3,4\r\n') + +class EscapedExcel(csv.excel): + quoting = csv.QUOTE_NONE + escapechar = '\\' + +class TestEscapedExcel(TestCsvBase): + dialect = EscapedExcel() + + def test_escape_fieldsep(self): + self.writerAssertEqual([['abc,def']], 'abc\\,def\r\n') + + def test_read_escape_fieldsep(self): + self.readerAssertEqual('abc\\,def\r\n', [['abc,def']]) + +class QuotedEscapedExcel(csv.excel): + quoting = csv.QUOTE_NONNUMERIC + escapechar = '\\' + +class TestQuotedEscapedExcel(TestCsvBase): + dialect = QuotedEscapedExcel() + + def test_write_escape_fieldsep(self): + self.writerAssertEqual([['abc,def']], '"abc,def"\r\n') + + def test_read_escape_fieldsep(self): + self.readerAssertEqual('"abc\\,def"\r\n', [['abc,def']]) + +# Disabled, pending support in csv.utils module +class TestDictFields(unittest.TestCase): + ### "long" means the row is longer than the number of fieldnames + ### "short" means there are fewer elements in the row than fieldnames + def test_write_simple_dict(self): + fileobj = StringIO() + writer = csv.DictWriter(fileobj, fieldnames = ["f1", "f2", "f3"]) + writer.writerow({"f1": 10, "f3": "abc"}) + self.assertEqual(fileobj.getvalue(), "10,,abc\r\n") + + def test_write_no_fields(self): + fileobj = StringIO() + self.assertRaises(TypeError, csv.DictWriter, fileobj) + + def test_read_dict_fields(self): + reader = csv.DictReader(StringIO("1,2,abc\r\n"), + fieldnames=["f1", "f2", "f3"]) + self.assertEqual(reader.next(), {"f1": '1', "f2": '2', "f3": 'abc'}) + + def test_read_long(self): + reader = csv.DictReader(StringIO("1,2,abc,4,5,6\r\n"), + fieldnames=["f1", "f2"]) + self.assertEqual(reader.next(), {"f1": '1', "f2": '2', + None: ["abc", "4", "5", "6"]}) + + def test_read_long_with_rest(self): + reader = csv.DictReader(StringIO("1,2,abc,4,5,6\r\n"), + fieldnames=["f1", "f2"], restkey="_rest") + self.assertEqual(reader.next(), {"f1": '1', "f2": '2', + "_rest": ["abc", "4", "5", "6"]}) + + def test_read_short(self): + reader = csv.DictReader(["1,2,abc,4,5,6\r\n","1,2,abc\r\n"], + fieldnames="1 2 3 4 5 6".split(), + restval="DEFAULT") + self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc', + "4": '4', "5": '5', "6": '6'}) + self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc', + "4": 'DEFAULT', "5": 'DEFAULT', + "6": 'DEFAULT'}) + + def test_read_with_blanks(self): + reader = csv.DictReader(["1,2,abc,4,5,6\r\n","\r\n", + "1,2,abc,4,5,6\r\n"], + fieldnames="1 2 3 4 5 6".split()) + self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc', + "4": '4', "5": '5', "6": '6'}) + self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc', + "4": '4', "5": '5', "6": '6'}) + +class TestArrayWrites(unittest.TestCase): + def test_int_write(self): + import array + contents = [(20-i) for i in range(20)] + a = array.array('i', contents) + fileobj = StringIO() + writer = csv.writer(fileobj, dialect="excel") + writer.writerow(a) + expected = ",".join([str(i) for i in a])+"\r\n" + self.assertEqual(fileobj.getvalue(), expected) + + def test_double_write(self): + import array + contents = [(20-i)*0.1 for i in range(20)] + a = array.array('d', contents) + fileobj = StringIO() + writer = csv.writer(fileobj, dialect="excel") + writer.writerow(a) + expected = ",".join([str(i) for i in a])+"\r\n" + self.assertEqual(fileobj.getvalue(), expected) + + def test_float_write(self): + import array + contents = [(20-i)*0.1 for i in range(20)] + a = array.array('f', contents) + fileobj = StringIO() + writer = csv.writer(fileobj, dialect="excel") + writer.writerow(a) + expected = ",".join([str(i) for i in a])+"\r\n" + self.assertEqual(fileobj.getvalue(), expected) + + def test_char_write(self): + import array, string + a = array.array('c', string.letters) + fileobj = StringIO() + writer = csv.writer(fileobj, dialect="excel") + writer.writerow(a) + expected = ",".join(a)+"\r\n" + self.assertEqual(fileobj.getvalue(), expected) + +class TestDialectValidity(unittest.TestCase): + def test_quoting(self): + class mydialect(csv.Dialect): + delimiter = ";" + escapechar = '\\' + doublequote = False + skipinitialspace = True + lineterminator = '\r\n' + quoting = csv.QUOTE_NONE + d = mydialect() + + mydialect.quoting = None + self.assertRaises(csv.Error, mydialect) + + mydialect.quoting = csv.QUOTE_NONE + mydialect.escapechar = None + self.assertRaises(csv.Error, mydialect) + + mydialect.doublequote = True + mydialect.quoting = csv.QUOTE_ALL + mydialect.quotechar = '"' + d = mydialect() + + mydialect.quotechar = "''" + self.assertRaises(csv.Error, mydialect) + + mydialect.quotechar = 4 + self.assertRaises(csv.Error, mydialect) + + def test_delimiter(self): + class mydialect(csv.Dialect): + delimiter = ";" + escapechar = '\\' + doublequote = False + skipinitialspace = True + lineterminator = '\r\n' + quoting = csv.QUOTE_NONE + d = mydialect() + + mydialect.delimiter = ":::" + self.assertRaises(csv.Error, mydialect) + + mydialect.delimiter = 4 + self.assertRaises(csv.Error, mydialect) + + def test_lineterminator(self): + class mydialect(csv.Dialect): + delimiter = ";" + escapechar = '\\' + doublequote = False + skipinitialspace = True + lineterminator = '\r\n' + quoting = csv.QUOTE_NONE + d = mydialect() + + mydialect.lineterminator = ":::" + d = mydialect() + + mydialect.lineterminator = 4 + self.assertRaises(csv.Error, mydialect) + + +if not hasattr(sys, "gettotalrefcount"): + print "*** skipping leakage tests ***" +else: + class NUL: + def write(s, *args): + pass + writelines = write + + class TestLeaks(unittest.TestCase): + def test_create_read(self): + delta = 0 + lastrc = sys.gettotalrefcount() + for i in xrange(20): + gc.collect() + self.assertEqual(gc.garbage, []) + rc = sys.gettotalrefcount() + csv.reader(["a,b,c\r\n"]) + csv.reader(["a,b,c\r\n"]) + csv.reader(["a,b,c\r\n"]) + delta = rc-lastrc + lastrc = rc + # if csv.reader() leaks, last delta should be 3 or more + self.assertEqual(delta < 3, True) + + def test_create_write(self): + delta = 0 + lastrc = sys.gettotalrefcount() + s = NUL() + for i in xrange(20): + gc.collect() + self.assertEqual(gc.garbage, []) + rc = sys.gettotalrefcount() + csv.writer(s) + csv.writer(s) + csv.writer(s) + delta = rc-lastrc + lastrc = rc + # if csv.writer() leaks, last delta should be 3 or more + self.assertEqual(delta < 3, True) + + def test_read(self): + delta = 0 + rows = ["a,b,c\r\n"]*5 + lastrc = sys.gettotalrefcount() + for i in xrange(20): + gc.collect() + self.assertEqual(gc.garbage, []) + rc = sys.gettotalrefcount() + rdr = csv.reader(rows) + for row in rdr: + pass + delta = rc-lastrc + lastrc = rc + # if reader leaks during read, delta should be 5 or more + self.assertEqual(delta < 5, True) + + def test_write(self): + delta = 0 + rows = [[1,2,3]]*5 + s = NUL() + lastrc = sys.gettotalrefcount() + for i in xrange(20): + gc.collect() + self.assertEqual(gc.garbage, []) + rc = sys.gettotalrefcount() + writer = csv.writer(s) + for row in rows: + writer.writerow(row) + delta = rc-lastrc + lastrc = rc + # if writer leaks during write, last delta should be 5 or more + self.assertEqual(delta < 5, True) + +def _testclasses(): + mod = sys.modules[__name__] + return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')] + +def suite(): + suite = unittest.TestSuite() + for testclass in _testclasses(): + suite.addTest(unittest.makeSuite(testclass)) + return suite + +if __name__ == '__main__': + unittest.main(defaultTest='suite') diff --git a/Modules/_csv.c b/Modules/_csv.c new file mode 100644 index 00000000000..701904a06e7 --- /dev/null +++ b/Modules/_csv.c @@ -0,0 +1,1465 @@ +/* TODO: +*/ + +#include "Python.h" +#include "structmember.h" + +/* begin 2.2 compatibility macros */ +#ifndef PyDoc_STRVAR +/* Define macros for inline documentation. */ +#define PyDoc_VAR(name) static char name[] +#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str) +#ifdef WITH_DOC_STRINGS +#define PyDoc_STR(str) str +#else +#define PyDoc_STR(str) "" +#endif +#endif /* ifndef PyDoc_STRVAR */ + +#ifndef PyMODINIT_FUNC +# if defined(__cplusplus) +# define PyMODINIT_FUNC extern "C" void +# else /* __cplusplus */ +# define PyMODINIT_FUNC void +# endif /* __cplusplus */ +#endif +/* end 2.2 compatibility macros */ + +static PyObject *error_obj; /* CSV exception */ +static PyObject *dialects; /* Dialect registry */ + +typedef enum { + START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, + IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD +} ParserState; + +typedef enum { + QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE +} QuoteStyle; + +typedef struct { + QuoteStyle style; + char *name; +} StyleDesc; + +static StyleDesc quote_styles[] = { + { QUOTE_MINIMAL, "QUOTE_MINIMAL" }, + { QUOTE_ALL, "QUOTE_ALL" }, + { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" }, + { QUOTE_NONE, "QUOTE_NONE" }, + { 0 } +}; + +typedef struct { + PyObject_HEAD + + int doublequote; /* is " represented by ""? */ + char delimiter; /* field separator */ + char quotechar; /* quote character */ + char escapechar; /* escape character */ + int skipinitialspace; /* ignore spaces following delimiter? */ + PyObject *lineterminator; /* string to write between records */ + QuoteStyle quoting; /* style of quoting to write */ + + int strict; /* raise exception on bad CSV */ +} DialectObj; + +staticforward PyTypeObject Dialect_Type; + +typedef struct { + PyObject_HEAD + + PyObject *input_iter; /* iterate over this for input lines */ + + DialectObj *dialect; /* parsing dialect */ + + PyObject *fields; /* field list for current record */ + ParserState state; /* current CSV parse state */ + char *field; /* build current field in here */ + int field_size; /* size of allocated buffer */ + int field_len; /* length of current field */ + int had_parse_error; /* did we have a parse error? */ +} ReaderObj; + +staticforward PyTypeObject Reader_Type; + +#define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type) + +typedef struct { + PyObject_HEAD + + PyObject *writeline; /* write output lines to this file */ + + DialectObj *dialect; /* parsing dialect */ + + char *rec; /* buffer for parser.join */ + int rec_size; /* size of allocated record */ + int rec_len; /* length of record */ + int num_fields; /* number of fields in record */ +} WriterObj; + +staticforward PyTypeObject Writer_Type; + +/* + * DIALECT class + */ + +static PyObject * +get_dialect_from_registry(PyObject * name_obj) +{ + PyObject *dialect_obj; + + dialect_obj = PyDict_GetItem(dialects, name_obj); + if (dialect_obj == NULL) + return PyErr_Format(error_obj, "unknown dialect"); + Py_INCREF(dialect_obj); + return dialect_obj; +} + +static int +check_delattr(PyObject *v) +{ + if (v == NULL) { + PyErr_SetString(PyExc_TypeError, + "Cannot delete attribute"); + return -1; + } + return 0; +} + +static PyObject * +get_string(PyObject *str) +{ + Py_XINCREF(str); + return str; +} + +static int +set_string(PyObject **str, PyObject *v) +{ + if (check_delattr(v) < 0) + return -1; + if (!PyString_Check(v) && !PyUnicode_Check(v)) { + PyErr_BadArgument(); + return -1; + } + Py_XDECREF(*str); + Py_INCREF(v); + *str = v; + return 0; +} + +static PyObject * +get_nullchar_as_None(char c) +{ + if (c == '\0') { + Py_INCREF(Py_None); + return Py_None; + } + else + return PyString_FromStringAndSize((char*)&c, 1); +} + +static int +set_None_as_nullchar(char * addr, PyObject *v) +{ + if (check_delattr(v) < 0) + return -1; + if (v == Py_None) + *addr = '\0'; + else if (!PyString_Check(v) || PyString_Size(v) != 1) { + PyErr_BadArgument(); + return -1; + } + else + *addr = PyString_AsString(v)[0]; + return 0; +} + +static PyObject * +Dialect_get_lineterminator(DialectObj *self) +{ + return get_string(self->lineterminator); +} + +static int +Dialect_set_lineterminator(DialectObj *self, PyObject *value) +{ + return set_string(&self->lineterminator, value); +} + +static PyObject * +Dialect_get_escapechar(DialectObj *self) +{ + return get_nullchar_as_None(self->escapechar); +} + +static int +Dialect_set_escapechar(DialectObj *self, PyObject *value) +{ + return set_None_as_nullchar(&self->escapechar, value); +} + +static PyObject * +Dialect_get_quoting(DialectObj *self) +{ + return PyInt_FromLong(self->quoting); +} + +static int +Dialect_set_quoting(DialectObj *self, PyObject *v) +{ + int quoting; + StyleDesc *qs = quote_styles; + + if (check_delattr(v) < 0) + return -1; + if (!PyInt_Check(v)) { + PyErr_BadArgument(); + return -1; + } + quoting = PyInt_AsLong(v); + for (qs = quote_styles; qs->name; qs++) { + if (qs->style == quoting) { + self->quoting = quoting; + return 0; + } + } + PyErr_BadArgument(); + return -1; +} + +static struct PyMethodDef Dialect_methods[] = { + { NULL, NULL } +}; + +#define D_OFF(x) offsetof(DialectObj, x) + +static struct PyMemberDef Dialect_memberlist[] = { + { "quotechar", T_CHAR, D_OFF(quotechar) }, + { "delimiter", T_CHAR, D_OFF(delimiter) }, + { "skipinitialspace", T_INT, D_OFF(skipinitialspace) }, + { "doublequote", T_INT, D_OFF(doublequote) }, + { "strict", T_INT, D_OFF(strict) }, + { NULL } +}; + +static PyGetSetDef Dialect_getsetlist[] = { + { "escapechar", (getter)Dialect_get_escapechar, + (setter)Dialect_set_escapechar }, + { "lineterminator", (getter)Dialect_get_lineterminator, + (setter)Dialect_set_lineterminator }, + { "quoting", (getter)Dialect_get_quoting, + (setter)Dialect_set_quoting }, + {NULL}, +}; + +static void +Dialect_dealloc(DialectObj *self) +{ + Py_XDECREF(self->lineterminator); + /*PyMem_DEL(self);*/ + self->ob_type->tp_free((PyObject *)self); +} + +static int +dialect_init(DialectObj * self, PyObject * args, PyObject * kwargs) +{ + PyObject *dialect = NULL, *name_obj, *value_obj; + + self->quotechar = '"'; + self->delimiter = ','; + self->escapechar = '\0'; + self->skipinitialspace = 0; + Py_XDECREF(self->lineterminator); + self->lineterminator = PyString_FromString("\r\n"); + if (self->lineterminator == NULL) + return -1; + self->quoting = QUOTE_MINIMAL; + self->doublequote = 1; + self->strict = 0; + + if (!PyArg_ParseTuple(args, "|O", &dialect)) + return -1; + Py_XINCREF(dialect); + if (kwargs != NULL) { + PyObject * key = PyString_FromString("dialect"); + PyObject * d; + + d = PyDict_GetItem(kwargs, key); + if (d) { + Py_INCREF(d); + Py_XDECREF(dialect); + PyDict_DelItem(kwargs, key); + dialect = d; + } + Py_DECREF(key); + } + if (dialect != NULL) { + int i; + PyObject * dir_list; + + /* If dialect is a string, look it up in our registry */ + if (PyString_Check(dialect) || PyUnicode_Check(dialect)) { + PyObject * new_dia; + new_dia = get_dialect_from_registry(dialect); + Py_DECREF(dialect); + if (new_dia == NULL) + return -1; + dialect = new_dia; + } + /* A class rather than an instance? Instanciate */ + if (PyObject_TypeCheck(dialect, &PyClass_Type)) { + PyObject * new_dia; + new_dia = PyObject_CallFunction(dialect, ""); + Py_DECREF(dialect); + if (new_dia == NULL) + return -1; + dialect = new_dia; + } + /* Make sure we finally have an instance */ + if (!PyInstance_Check(dialect) || + (dir_list = PyObject_Dir(dialect)) == NULL) { + PyErr_SetString(PyExc_TypeError, + "dialect must be an instance"); + Py_DECREF(dialect); + return -1; + } + /* And extract the attributes */ + for (i = 0; i < PyList_GET_SIZE(dir_list); ++i) { + name_obj = PyList_GET_ITEM(dir_list, i); + if (PyString_AsString(name_obj)[0] == '_') + continue; + value_obj = PyObject_GetAttr(dialect, name_obj); + if (value_obj) { + if (PyObject_SetAttr((PyObject *)self, + name_obj, value_obj)) { + Py_DECREF(dir_list); + return -1; + } + Py_DECREF(value_obj); + } + } + Py_DECREF(dir_list); + Py_DECREF(dialect); + } + if (kwargs != NULL) { + int pos = 0; + + while (PyDict_Next(kwargs, &pos, &name_obj, &value_obj)) { + if (PyObject_SetAttr((PyObject *)self, + name_obj, value_obj)) + return -1; + } + } + return 0; +} + +static PyObject * +dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + DialectObj *self; + self = (DialectObj *)type->tp_alloc(type, 0); + if (self != NULL) { + self->lineterminator = NULL; + } + return (PyObject *)self; +} + + +PyDoc_STRVAR(Dialect_Type_doc, +"CSV dialect\n" +"\n" +"The Dialect type records CSV parsing and generation options.\n"); + +static PyTypeObject Dialect_Type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "_csv.Dialect", /* tp_name */ + sizeof(DialectObj), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)Dialect_dealloc, /* tp_dealloc */ + (printfunc)0, /* tp_print */ + (getattrfunc)0, /* tp_getattr */ + (setattrfunc)0, /* tp_setattr */ + (cmpfunc)0, /* tp_compare */ + (reprfunc)0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + (hashfunc)0, /* tp_hash */ + (ternaryfunc)0, /* tp_call */ + (reprfunc)0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ + Dialect_Type_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Dialect_methods, /* tp_methods */ + Dialect_memberlist, /* tp_members */ + Dialect_getsetlist, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)dialect_init, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + dialect_new, /* tp_new */ + 0, /* tp_free */ +}; + +static void +parse_save_field(ReaderObj *self) +{ + PyObject *field; + + field = PyString_FromStringAndSize(self->field, self->field_len); + if (field != NULL) { + PyList_Append(self->fields, field); + Py_XDECREF(field); + } + self->field_len = 0; +} + +static int +parse_grow_buff(ReaderObj *self) +{ + if (self->field_size == 0) { + self->field_size = 4096; + self->field = PyMem_Malloc(self->field_size); + } + else { + self->field_size *= 2; + self->field = PyMem_Realloc(self->field, self->field_size); + } + if (self->field == NULL) { + PyErr_NoMemory(); + return 0; + } + return 1; +} + +static void +parse_add_char(ReaderObj *self, char c) +{ + if (self->field_len == self->field_size && !parse_grow_buff(self)) + return; + self->field[self->field_len++] = c; +} + +static void +parse_process_char(ReaderObj *self, char c) +{ + DialectObj *dialect = self->dialect; + + switch (self->state) { + case START_RECORD: + /* start of record */ + if (c == '\n') + /* empty line - return [] */ + break; + /* normal character - handle as START_FIELD */ + self->state = START_FIELD; + /* fallthru */ + case START_FIELD: + /* expecting field */ + if (c == '\n') { + /* save empty field - return [fields] */ + parse_save_field(self); + self->state = START_RECORD; + } + else if (c == dialect->quotechar) { + /* start quoted field */ + self->state = IN_QUOTED_FIELD; + } + else if (c == dialect->escapechar) { + /* possible escaped character */ + self->state = ESCAPED_CHAR; + } + else if (c == ' ' && dialect->skipinitialspace) + /* ignore space at start of field */ + ; + else if (c == dialect->delimiter) { + /* save empty field */ + parse_save_field(self); + } + else { + /* begin new unquoted field */ + parse_add_char(self, c); + self->state = IN_FIELD; + } + break; + + case ESCAPED_CHAR: + if (c != dialect->escapechar && + c != dialect->delimiter && + c != dialect->quotechar) + parse_add_char(self, dialect->escapechar); + parse_add_char(self, c); + self->state = IN_FIELD; + break; + + case IN_FIELD: + /* in unquoted field */ + if (c == '\n') { + /* end of line - return [fields] */ + parse_save_field(self); + self->state = START_RECORD; + } + else if (c == dialect->escapechar) { + /* possible escaped character */ + self->state = ESCAPED_CHAR; + } + else if (c == dialect->delimiter) { + /* save field - wait for new field */ + parse_save_field(self); + self->state = START_FIELD; + } + else { + /* normal character - save in field */ + parse_add_char(self, c); + } + break; + + case IN_QUOTED_FIELD: + /* in quoted field */ + if (c == '\n') { + /* end of line - save '\n' in field */ + parse_add_char(self, '\n'); + } + else if (c == dialect->escapechar) { + /* Possible escape character */ + self->state = ESCAPE_IN_QUOTED_FIELD; + } + else if (c == dialect->quotechar) { + if (dialect->doublequote) { + /* doublequote; " represented by "" */ + self->state = QUOTE_IN_QUOTED_FIELD; + } + else { + /* end of quote part of field */ + self->state = IN_FIELD; + } + } + else { + /* normal character - save in field */ + parse_add_char(self, c); + } + break; + + case ESCAPE_IN_QUOTED_FIELD: + if (c != dialect->escapechar && + c != dialect->delimiter && + c != dialect->quotechar) + parse_add_char(self, dialect->escapechar); + parse_add_char(self, c); + self->state = IN_QUOTED_FIELD; + break; + + case QUOTE_IN_QUOTED_FIELD: + /* doublequote - seen a quote in an quoted field */ + if (dialect->quoting != QUOTE_NONE && + c == dialect->quotechar) { + /* save "" as " */ + parse_add_char(self, c); + self->state = IN_QUOTED_FIELD; + } + else if (c == dialect->delimiter) { + /* save field - wait for new field */ + parse_save_field(self); + self->state = START_FIELD; + } + else if (c == '\n') { + /* end of line - return [fields] */ + parse_save_field(self); + self->state = START_RECORD; + } + else if (!dialect->strict) { + parse_add_char(self, c); + self->state = IN_FIELD; + } + else { + /* illegal */ + self->had_parse_error = 1; + PyErr_Format(error_obj, "%c expected after %c", + dialect->delimiter, + dialect->quotechar); + } + break; + + } +} + +/* + * READER + */ +#define R_OFF(x) offsetof(ReaderObj, x) + +static struct PyMemberDef Reader_memberlist[] = { + { "dialect", T_OBJECT, R_OFF(dialect), RO }, + { NULL } +}; + +static PyObject * +Reader_getiter(ReaderObj *self) +{ + Py_INCREF(self); + return (PyObject *)self; +} + +static PyObject * +Reader_iternext(ReaderObj *self) +{ + PyObject *lineobj; + PyObject *fields; + char *line; + + do { + lineobj = PyIter_Next(self->input_iter); + if (lineobj == NULL) { + /* End of input OR exception */ + if (!PyErr_Occurred() && self->field_len != 0) + return PyErr_Format(error_obj, + "newline inside string"); + return NULL; + } + + if (self->had_parse_error) { + if (self->fields) { + Py_XDECREF(self->fields); + } + self->fields = PyList_New(0); + self->field_len = 0; + self->state = START_RECORD; + self->had_parse_error = 0; + } + line = PyString_AsString(lineobj); + + if (line == NULL) { + Py_DECREF(lineobj); + return NULL; + } + if (strlen(line) < PyString_GET_SIZE(lineobj)) { + self->had_parse_error = 1; + Py_DECREF(lineobj); + return PyErr_Format(error_obj, + "string with NUL bytes"); + } + + /* Process line of text - send '\n' to processing code to + represent end of line. End of line which is not at end of + string is an error. */ + while (*line) { + char c; + + c = *line++; + if (c == '\r') { + c = *line++; + if (c == '\0') + /* macintosh end of line */ + break; + if (c == '\n') { + c = *line++; + if (c == '\0') + /* DOS end of line */ + break; + } + self->had_parse_error = 1; + Py_DECREF(lineobj); + return PyErr_Format(error_obj, + "newline inside string"); + } + if (c == '\n') { + c = *line++; + if (c == '\0') + /* unix end of line */ + break; + self->had_parse_error = 1; + Py_DECREF(lineobj); + return PyErr_Format(error_obj, + "newline inside string"); + } + parse_process_char(self, c); + if (PyErr_Occurred()) { + Py_DECREF(lineobj); + return NULL; + } + } + parse_process_char(self, '\n'); + Py_DECREF(lineobj); + } while (self->state != START_RECORD); + + fields = self->fields; + self->fields = PyList_New(0); + return fields; +} + +static void +Reader_dealloc(ReaderObj *self) +{ + Py_XDECREF(self->dialect); + Py_XDECREF(self->input_iter); + Py_XDECREF(self->fields); + PyMem_DEL(self); +} + +PyDoc_STRVAR(Reader_Type_doc, +"CSV reader\n" +"\n" +"Reader objects are responsible for reading and parsing tabular data\n" +"in CSV format.\n" +); + +static struct PyMethodDef Reader_methods[] = { + { NULL, NULL } +}; + +static PyTypeObject Reader_Type = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "_csv.reader", /*tp_name*/ + sizeof(ReaderObj), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + /* methods */ + (destructor)Reader_dealloc, /*tp_dealloc*/ + (printfunc)0, /*tp_print*/ + (getattrfunc)0, /*tp_getattr*/ + (setattrfunc)0, /*tp_setattr*/ + (cmpfunc)0, /*tp_compare*/ + (reprfunc)0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + (hashfunc)0, /*tp_hash*/ + (ternaryfunc)0, /*tp_call*/ + (reprfunc)0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + Reader_Type_doc, /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + (getiterfunc)Reader_getiter, /*tp_iter*/ + (getiterfunc)Reader_iternext, /*tp_iternext*/ + Reader_methods, /*tp_methods*/ + Reader_memberlist, /*tp_members*/ + 0, /*tp_getset*/ + +}; + +static PyObject * +csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) +{ + PyObject * iterator, * dialect = NULL, *ctor_args; + ReaderObj * self = PyObject_NEW(ReaderObj, &Reader_Type); + + if (!self) + return NULL; + + self->dialect = NULL; + self->input_iter = self->fields = NULL; + + self->fields = NULL; + self->input_iter = NULL; + self->had_parse_error = 0; + self->field = NULL; + self->field_size = 0; + self->field_len = 0; + self->state = START_RECORD; + + if (!PyArg_ParseTuple(args, "O|O", &iterator, &dialect)) { + Py_DECREF(self); + return NULL; + } + self->input_iter = PyObject_GetIter(iterator); + if (self->input_iter == NULL) { + PyErr_SetString(PyExc_TypeError, + "argument 1 must be an iterator"); + Py_DECREF(self); + return NULL; + } + ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect); + if (ctor_args == NULL) { + Py_DECREF(self); + return NULL; + } + self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type, + ctor_args, keyword_args); + Py_DECREF(ctor_args); + if (self->dialect == NULL) { + Py_DECREF(self); + return NULL; + } + self->fields = PyList_New(0); + if (self->fields == NULL) { + Py_DECREF(self); + return NULL; + } + + return (PyObject *)self; +} + +/* + * WRITER + */ +/* ---------------------------------------------------------------- */ +static void +join_reset(WriterObj *self) +{ + self->rec_len = 0; + self->num_fields = 0; +} + +#define MEM_INCR 32768 + +/* Calculate new record length or append field to record. Return new + * record length. + */ +static int +join_append_data(WriterObj *self, char *field, int quote_empty, + int *quoted, int copy_phase) +{ + DialectObj *dialect = self->dialect; + int i, rec_len; + + rec_len = self->rec_len; + + /* If this is not the first field we need a field separator. + */ + if (self->num_fields > 0) { + if (copy_phase) + self->rec[rec_len] = dialect->delimiter; + rec_len++; + } + /* Handle preceding quote. + */ + switch (dialect->quoting) { + case QUOTE_ALL: + *quoted = 1; + if (copy_phase) + self->rec[rec_len] = dialect->quotechar; + rec_len++; + break; + case QUOTE_MINIMAL: + case QUOTE_NONNUMERIC: + /* We only know about quoted in the copy phase. + */ + if (copy_phase && *quoted) { + self->rec[rec_len] = dialect->quotechar; + rec_len++; + } + break; + case QUOTE_NONE: + break; + } + /* Copy/count field data. + */ + for (i = 0;; i++) { + char c = field[i]; + + if (c == '\0') + break; + /* If in doublequote mode we escape quote chars with a + * quote. + */ + if (dialect->quoting != QUOTE_NONE && + c == dialect->quotechar && dialect->doublequote) { + if (copy_phase) + self->rec[rec_len] = dialect->quotechar; + *quoted = 1; + rec_len++; + } + + /* Some special characters need to be escaped. If we have a + * quote character switch to quoted field instead of escaping + * individual characters. + */ + if (!*quoted + && (c == dialect->delimiter || + c == dialect->escapechar || + c == '\n' || c == '\r')) { + if (dialect->quoting != QUOTE_NONE) + *quoted = 1; + else if (dialect->escapechar) { + if (copy_phase) + self->rec[rec_len] = dialect->escapechar; + rec_len++; + } + else { + PyErr_Format(error_obj, + "delimiter must be quoted or escaped"); + return -1; + } + } + /* Copy field character into record buffer. + */ + if (copy_phase) + self->rec[rec_len] = c; + rec_len++; + } + + /* If field is empty check if it needs to be quoted. + */ + if (i == 0 && quote_empty) { + if (dialect->quoting == QUOTE_NONE) { + PyErr_Format(error_obj, + "single empty field record must be quoted"); + return -1; + } else + *quoted = 1; + } + + /* Handle final quote character on field. + */ + if (*quoted) { + if (copy_phase) + self->rec[rec_len] = dialect->quotechar; + else + /* Didn't know about leading quote until we found it + * necessary in field data - compensate for it now. + */ + rec_len++; + rec_len++; + } + + return rec_len; +} + +static int +join_check_rec_size(WriterObj *self, int rec_len) +{ + if (rec_len > self->rec_size) { + if (self->rec_size == 0) { + self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; + self->rec = PyMem_Malloc(self->rec_size); + } + else { + char *old_rec = self->rec; + + self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; + self->rec = PyMem_Realloc(self->rec, self->rec_size); + if (self->rec == NULL) + PyMem_Free(old_rec); + } + if (self->rec == NULL) { + PyErr_NoMemory(); + return 0; + } + } + return 1; +} + +static int +join_append(WriterObj *self, char *field, int *quoted, int quote_empty) +{ + int rec_len; + + rec_len = join_append_data(self, field, quote_empty, quoted, 0); + if (rec_len < 0) + return 0; + + /* grow record buffer if necessary */ + if (!join_check_rec_size(self, rec_len)) + return 0; + + self->rec_len = join_append_data(self, field, quote_empty, quoted, 1); + self->num_fields++; + + return 1; +} + +static int +join_append_lineterminator(WriterObj *self) +{ + int terminator_len; + + terminator_len = PyString_Size(self->dialect->lineterminator); + + /* grow record buffer if necessary */ + if (!join_check_rec_size(self, self->rec_len + terminator_len)) + return 0; + + memmove(self->rec + self->rec_len, + PyString_AsString(self->dialect->lineterminator), + terminator_len); + self->rec_len += terminator_len; + + return 1; +} + +PyDoc_STRVAR(csv_writerow_doc, +"join(sequence) -> string\n" +"\n" +"Construct a CSV record from a sequence of fields. Non-string\n" +"elements will be converted to string."); + +static PyObject * +csv_writerow(WriterObj *self, PyObject *seq) +{ + DialectObj *dialect = self->dialect; + int len, i; + + if (!PySequence_Check(seq)) + return PyErr_Format(error_obj, "sequence expected"); + + len = PySequence_Length(seq); + if (len < 0) + return NULL; + + /* Join all fields in internal buffer. + */ + join_reset(self); + for (i = 0; i < len; i++) { + PyObject *field; + int append_ok; + int quoted; + + field = PySequence_GetItem(seq, i); + if (field == NULL) + return NULL; + + quoted = 0; + if (dialect->quoting == QUOTE_NONNUMERIC) { + PyObject *num; + + num = PyNumber_Float(field); + if (num == NULL) { + quoted = 1; + PyErr_Clear(); + } + else { + Py_DECREF(num); + } + } + + if (PyString_Check(field)) { + append_ok = join_append(self, PyString_AsString(field), + "ed, len == 1); + Py_DECREF(field); + } + else if (field == Py_None) { + append_ok = join_append(self, "", "ed, len == 1); + Py_DECREF(field); + } + else { + PyObject *str; + + str = PyObject_Str(field); + Py_DECREF(field); + if (str == NULL) + return NULL; + + append_ok = join_append(self, PyString_AsString(str), + "ed, len == 1); + Py_DECREF(str); + } + if (!append_ok) + return NULL; + } + + /* Add line terminator. + */ + if (!join_append_lineterminator(self)) + return 0; + + return PyObject_CallFunction(self->writeline, + "(s#)", self->rec, self->rec_len); +} + +static PyObject * +csv_writerows(WriterObj *self, PyObject *seqseq) +{ + PyObject *row_iter, *row_obj, *result; + + row_iter = PyObject_GetIter(seqseq); + if (row_iter == NULL) { + PyErr_SetString(PyExc_TypeError, + "writerows() argument must be iteratable"); + return NULL; + } + while ((row_obj = PyIter_Next(row_iter))) { + result = csv_writerow(self, row_obj); + Py_DECREF(row_obj); + if (!result) { + Py_DECREF(row_iter); + return NULL; + } + else + Py_DECREF(result); + } + Py_DECREF(row_iter); + if (PyErr_Occurred()) + return NULL; + Py_INCREF(Py_None); + return Py_None; +} + +static struct PyMethodDef Writer_methods[] = { + { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc}, + { "writerows", (PyCFunction)csv_writerows, METH_O}, + { NULL, NULL } +}; + +#define W_OFF(x) offsetof(WriterObj, x) + +static struct PyMemberDef Writer_memberlist[] = { + { "dialect", T_OBJECT, W_OFF(dialect), RO }, + { NULL } +}; + +static void +Writer_dealloc(WriterObj *self) +{ + Py_XDECREF(self->dialect); + Py_XDECREF(self->writeline); + PyMem_DEL(self); +} + +PyDoc_STRVAR(Writer_Type_doc, +"CSV writer\n" +"\n" +"Writer objects are responsible for generating tabular data\n" +"in CSV format from sequence input.\n" +); + +static PyTypeObject Writer_Type = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "_csv.writer", /*tp_name*/ + sizeof(WriterObj), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + /* methods */ + (destructor)Writer_dealloc, /*tp_dealloc*/ + (printfunc)0, /*tp_print*/ + (getattrfunc)0, /*tp_getattr*/ + (setattrfunc)0, /*tp_setattr*/ + (cmpfunc)0, /*tp_compare*/ + (reprfunc)0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + (hashfunc)0, /*tp_hash*/ + (ternaryfunc)0, /*tp_call*/ + (reprfunc)0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + Writer_Type_doc, + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + (getiterfunc)0, /*tp_iter*/ + (getiterfunc)0, /*tp_iternext*/ + Writer_methods, /*tp_methods*/ + Writer_memberlist, /*tp_members*/ + 0, /*tp_getset*/ +}; + +static PyObject * +csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) +{ + PyObject * output_file, * dialect = NULL, *ctor_args; + WriterObj * self = PyObject_NEW(WriterObj, &Writer_Type); + + if (!self) + return NULL; + + self->dialect = NULL; + self->writeline = NULL; + + self->rec = NULL; + self->rec_size = 0; + self->rec_len = 0; + self->num_fields = 0; + + if (!PyArg_ParseTuple(args, "O|O", &output_file, &dialect)) { + Py_DECREF(self); + return NULL; + } + self->writeline = PyObject_GetAttrString(output_file, "write"); + if (self->writeline == NULL || !PyCallable_Check(self->writeline)) { + PyErr_SetString(PyExc_TypeError, + "argument 1 must be an instance with a write method"); + Py_DECREF(self); + return NULL; + } + ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect); + if (ctor_args == NULL) { + Py_DECREF(self); + return NULL; + } + self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type, + ctor_args, keyword_args); + Py_DECREF(ctor_args); + if (self->dialect == NULL) { + Py_DECREF(self); + return NULL; + } + return (PyObject *)self; +} + +/* + * DIALECT REGISTRY + */ +static PyObject * +csv_list_dialects(PyObject *module, PyObject *args) +{ + return PyDict_Keys(dialects); +} + +static PyObject * +csv_register_dialect(PyObject *module, PyObject *args) +{ + PyObject *name_obj, *dialect_obj; + + if (!PyArg_ParseTuple(args, "OO", &name_obj, &dialect_obj)) + return NULL; + if (!PyString_Check(name_obj) && !PyUnicode_Check(name_obj)) { + PyErr_SetString(PyExc_TypeError, + "dialect name must be a string or unicode"); + return NULL; + } + Py_INCREF(dialect_obj); + /* A class rather than an instance? Instanciate */ + if (PyObject_TypeCheck(dialect_obj, &PyClass_Type)) { + PyObject * new_dia; + new_dia = PyObject_CallFunction(dialect_obj, ""); + Py_DECREF(dialect_obj); + if (new_dia == NULL) + return NULL; + dialect_obj = new_dia; + } + /* Make sure we finally have an instance */ + if (!PyInstance_Check(dialect_obj)) { + PyErr_SetString(PyExc_TypeError, "dialect must be an instance"); + Py_DECREF(dialect_obj); + return NULL; + } + if (PyObject_SetAttrString(dialect_obj, "_name", name_obj) < 0) { + Py_DECREF(dialect_obj); + return NULL; + } + if (PyDict_SetItem(dialects, name_obj, dialect_obj) < 0) { + Py_DECREF(dialect_obj); + return NULL; + } + Py_DECREF(dialect_obj); + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject * +csv_unregister_dialect(PyObject *module, PyObject *args) +{ + PyObject *name_obj; + + if (!PyArg_ParseTuple(args, "O", &name_obj)) + return NULL; + if (PyDict_DelItem(dialects, name_obj) < 0) + return PyErr_Format(error_obj, "unknown dialect"); + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject * +csv_get_dialect(PyObject *module, PyObject *args) +{ + PyObject *name_obj; + + if (!PyArg_ParseTuple(args, "O", &name_obj)) + return NULL; + return get_dialect_from_registry(name_obj); +} + +/* + * MODULE + */ + +PyDoc_STRVAR(csv_module_doc, +"CSV parsing and writing.\n" +"\n" +"This module provides classes that assist in the reading and writing\n" +"of Comma Separated Value (CSV) files, and implements the interface\n" +"described by PEP 305. Although many CSV files are simple to parse,\n" +"the format is not formally defined by a stable specification and\n" +"is subtle enough that parsing lines of a CSV file with something\n" +"like line.split(\",\") is bound to fail. The module supports three\n" +"basic APIs: reading, writing, and registration of dialects.\n" +"\n" +"\n" +"DIALECT REGISTRATION:\n" +"\n" +"Readers and writers support a dialect argument, which is a convenient\n" +"handle on a group of settings. When the dialect argument is a string,\n" +"it identifies one of the dialects previously registered with the module.\n" +"If it is a class or instance, the attributes of the argument are used as\n" +"the settings for the reader or writer:\n" +"\n" +" class excel:\n" +" delimiter = ','\n" +" quotechar = '\"'\n" +" escapechar = None\n" +" doublequote = True\n" +" skipinitialspace = False\n" +" lineterminator = '\r\n'\n" +" quoting = QUOTE_MINIMAL\n" +"\n" +"SETTINGS:\n" +"\n" +" * quotechar - specifies a one-character string to use as the \n" +" quoting character. It defaults to '\"'.\n" +" * delimiter - specifies a one-character string to use as the \n" +" field separator. It defaults to ','.\n" +" * skipinitialspace - specifies how to interpret whitespace which\n" +" immediately follows a delimiter. It defaults to False, which\n" +" means that whitespace immediately following a delimiter is part\n" +" of the following field.\n" +" * lineterminator - specifies the character sequence which should \n" +" terminate rows.\n" +" * quoting - controls when quotes should be generated by the writer.\n" +" It can take on any of the following module constants:\n" +"\n" +" csv.QUOTE_MINIMAL means only when required, for example, when a\n" +" field contains either the quotechar or the delimiter\n" +" csv.QUOTE_ALL means that quotes are always placed around fields.\n" +" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n" +" fields which contain characters other than [+-0-9.].\n" +" csv.QUOTE_NONE means that quotes are never placed around fields.\n" +" * escapechar - specifies a one-character string used to escape \n" +" the delimiter when quoting is set to QUOTE_NONE.\n" +" * doublequote - controls the handling of quotes inside fields. When\n" +" True, two consecutive quotes are interpreted as one during read,\n" +" and when writing, each quote character embedded in the data is\n" +" written as two quotes\n"); + +PyDoc_STRVAR(csv_reader_doc, +" csv_reader = reader(iterable [, dialect='excel']\n" +" [optional keyword args])\n" +" for row in csv_reader:\n" +" process(row)\n" +"\n" +"The \"iterable\" argument can be any object that returns a line\n" +"of input for each iteration, such as a file object or a list. The\n" +"optional \"dialect\" parameter is discussed below. The function\n" +"also accepts optional keyword arguments which override settings\n" +"provided by the dialect.\n" +"\n" +"The returned object is an iterator. Each iteration returns a row\n" + "of the CSV file (which can span multiple input lines):\n"); + +PyDoc_STRVAR(csv_writer_doc, +" csv_writer = csv.writer(fileobj [, dialect='excel']\n" +" [optional keyword args])\n" +" for row in csv_writer:\n" +" csv_writer.writerow(row)\n" +"\n" +" [or]\n" +"\n" +" csv_writer = csv.writer(fileobj [, dialect='excel']\n" +" [optional keyword args])\n" +" csv_writer.writerows(rows)\n" +"\n" +"The \"fileobj\" argument can be any object that supports the file API.\n"); + +PyDoc_STRVAR(csv_list_dialects_doc, +"Return a list of all know dialect names.\n" +" names = csv.list_dialects()"); + +PyDoc_STRVAR(csv_get_dialect_doc, +"Return the dialect instance associated with name.\n" +" dialect = csv.get_dialect(name)"); + +PyDoc_STRVAR(csv_register_dialect_doc, +"Create a mapping from a string name to a dialect class.\n" +" dialect = csv.register_dialect(name, dialect)"); + +PyDoc_STRVAR(csv_unregister_dialect_doc, +"Delete the name/dialect mapping associated with a string name.\n" +" csv.unregister_dialect(name)"); + +static struct PyMethodDef csv_methods[] = { + { "reader", (PyCFunction)csv_reader, + METH_VARARGS | METH_KEYWORDS, csv_reader_doc}, + { "writer", (PyCFunction)csv_writer, + METH_VARARGS | METH_KEYWORDS, csv_writer_doc}, + { "list_dialects", (PyCFunction)csv_list_dialects, + METH_NOARGS, csv_list_dialects_doc}, + { "register_dialect", (PyCFunction)csv_register_dialect, + METH_VARARGS, csv_register_dialect_doc}, + { "unregister_dialect", (PyCFunction)csv_unregister_dialect, + METH_VARARGS, csv_unregister_dialect_doc}, + { "get_dialect", (PyCFunction)csv_get_dialect, + METH_VARARGS, csv_get_dialect_doc}, + { NULL, NULL } +}; + +PyMODINIT_FUNC +init_csv(void) +{ + PyObject *module; + PyObject *rev; + PyObject *v; + int res; + StyleDesc *style; + + if (PyType_Ready(&Dialect_Type) < 0) + return; + + if (PyType_Ready(&Reader_Type) < 0) + return; + + if (PyType_Ready(&Writer_Type) < 0) + return; + + /* Create the module and add the functions */ + module = Py_InitModule3("_csv", csv_methods, csv_module_doc); + if (module == NULL) + return; + + /* Add version to the module. */ + rev = PyString_FromString("1.0"); + if (rev == NULL) + return; + if (PyModule_AddObject(module, "__version__", rev) < 0) + return; + + /* Add _dialects dictionary */ + dialects = PyDict_New(); + if (dialects == NULL) + return; + if (PyModule_AddObject(module, "_dialects", dialects)) + return; + + /* Add quote styles into dictionary */ + for (style = quote_styles; style->name; style++) { + v = PyInt_FromLong(style->style); + if (v == NULL) + return; + res = PyModule_AddObject(module, style->name, v); + if (res < 0) + return; + } + + /* Add the Dialect type */ + if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type)) + return; + + /* Add the CSV exception object to the module. */ + error_obj = PyErr_NewException("_csv.Error", NULL, NULL); + if (error_obj == NULL) + return; + PyModule_AddObject(module, "Error", error_obj); +}