Fix a delimiter detection problem in sniffer. Sniffing "a|b|c\r\n" was
returning 'a' as the delimiter. It now returns '|', but not because I understood better what the code was supposed to do. Would someone that understands the idea behind _guess_delimiter() (see its doc string) look to see if my fallback choice is better than before or if it's just serendipity that I picked the proper delimiter?
This commit is contained in:
parent
0174dddc65
commit
39b29be8a6
13
Lib/csv.py
13
Lib/csv.py
|
@ -152,10 +152,13 @@ class Sniffer:
|
|||
|
||||
quotechar, delimiter, skipinitialspace = \
|
||||
self._guess_quote_and_delimiter(sample, delimiters)
|
||||
if delimiter is None:
|
||||
if not delimiter:
|
||||
delimiter, skipinitialspace = self._guess_delimiter(sample,
|
||||
delimiters)
|
||||
|
||||
if not delimiter:
|
||||
raise Error, "Could not determine delimiter"
|
||||
|
||||
class dialect(Dialect):
|
||||
_name = "sniffed"
|
||||
lineterminator = '\r\n'
|
||||
|
@ -329,8 +332,12 @@ class Sniffer:
|
|||
data[0].count("%c " % d))
|
||||
return (d, skipinitialspace)
|
||||
|
||||
# finally, just return the first damn character in the list
|
||||
delim = delims.keys()[0]
|
||||
# nothing else indicates a preference, pick the character that
|
||||
# dominates(?)
|
||||
items = [(v,k) for (k,v) in delims.items()]
|
||||
items.sort()
|
||||
delim = items[-1][1]
|
||||
|
||||
skipinitialspace = (data[0].count(delim) ==
|
||||
data[0].count("%c " % delim))
|
||||
return (delim, skipinitialspace)
|
||||
|
|
|
@ -852,6 +852,8 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
|
|||
'''
|
||||
|
||||
sample5 = "aaa\tbbb\r\nAAA\t\r\nBBB\t\r\n"
|
||||
sample6 = "a|b|c\r\nd|e|f\r\n"
|
||||
sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n"
|
||||
|
||||
def test_has_header(self):
|
||||
sniffer = csv.Sniffer()
|
||||
|
@ -882,6 +884,11 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
|
|||
self.assertEqual(dialect.delimiter, ";")
|
||||
dialect = sniffer.sniff(self.sample5)
|
||||
self.assertEqual(dialect.delimiter, "\t")
|
||||
dialect = sniffer.sniff(self.sample6)
|
||||
self.assertEqual(dialect.delimiter, "|")
|
||||
dialect = sniffer.sniff(self.sample7)
|
||||
self.assertEqual(dialect.delimiter, "|")
|
||||
self.assertEqual(dialect.quotechar, "'")
|
||||
|
||||
if not hasattr(sys, "gettotalrefcount"):
|
||||
if test_support.verbose: print "*** skipping leakage tests ***"
|
||||
|
|
Loading…
Reference in New Issue