Fix for problem with Sniffer class. If your delimiter is whitespace and the

last field was empty it would strip the delimiter and incorrectly guess that
"" was the delimiter.  Reported in c.l.py by Laurent Laporte.  Will
backport.
This commit is contained in:
Skip Montanaro 2005-12-28 15:37:25 +00:00
parent e08fa29d0e
commit 91bb70c5c0
2 changed files with 5 additions and 2 deletions

View File

@ -271,7 +271,7 @@ class Sniffer:
for char in ascii: for char in ascii:
metaFrequency = charFrequency.get(char, {}) metaFrequency = charFrequency.get(char, {})
# must count even if frequency is 0 # must count even if frequency is 0
freq = line.strip().count(char) freq = line.count(char)
# value is the mode # value is the mode
metaFrequency[freq] = metaFrequency.get(freq, 0) + 1 metaFrequency[freq] = metaFrequency.get(freq, 0) + 1
charFrequency[char] = metaFrequency charFrequency[char] = metaFrequency

View File

@ -836,7 +836,6 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow' 'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
'Stonecutters Seafood and Chop House':'Lemont':'IL':'12/19/02':'Week Back' 'Stonecutters Seafood and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
""" """
header = '''\ header = '''\
"venue","city","state","date","performers" "venue","city","state","date","performers"
''' '''
@ -852,6 +851,8 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
47483648;43.0;170;abc;def 47483648;43.0;170;abc;def
''' '''
sample5 = "aaa\tbbb\r\nAAA\t\r\nBBB\t\r\n"
def test_has_header(self): def test_has_header(self):
sniffer = csv.Sniffer() sniffer = csv.Sniffer()
self.assertEqual(sniffer.has_header(self.sample1), False) self.assertEqual(sniffer.has_header(self.sample1), False)
@ -879,6 +880,8 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
self.assertEqual(dialect.delimiter, "/") self.assertEqual(dialect.delimiter, "/")
dialect = sniffer.sniff(self.sample4) dialect = sniffer.sniff(self.sample4)
self.assertEqual(dialect.delimiter, ";") self.assertEqual(dialect.delimiter, ";")
dialect = sniffer.sniff(self.sample5)
self.assertEqual(dialect.delimiter, "\t")
if not hasattr(sys, "gettotalrefcount"): if not hasattr(sys, "gettotalrefcount"):
if test_support.verbose: print "*** skipping leakage tests ***" if test_support.verbose: print "*** skipping leakage tests ***"