#18155: Regex-escape delimiter, in case it is a regex special char.
Patch by Vajrasky Kok, with slight modification to the tests by me.
This commit is contained in:
parent
e4e530e7e8
commit
925a322570
|
@ -264,8 +264,9 @@ class Sniffer:
|
||||||
|
|
||||||
# if we see an extra quote between delimiters, we've got a
|
# if we see an extra quote between delimiters, we've got a
|
||||||
# double quoted format
|
# double quoted format
|
||||||
dq_regexp = re.compile(r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
|
dq_regexp = re.compile(
|
||||||
{'delim':delim, 'quote':quotechar}, re.MULTILINE)
|
r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
|
||||||
|
{'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -796,7 +796,7 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
|
||||||
'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
|
'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
|
||||||
'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
|
'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
|
||||||
"""
|
"""
|
||||||
header = '''\
|
header1 = '''\
|
||||||
"venue","city","state","date","performers"
|
"venue","city","state","date","performers"
|
||||||
'''
|
'''
|
||||||
sample3 = '''\
|
sample3 = '''\
|
||||||
|
@ -815,10 +815,35 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
|
||||||
sample6 = "a|b|c\r\nd|e|f\r\n"
|
sample6 = "a|b|c\r\nd|e|f\r\n"
|
||||||
sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n"
|
sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n"
|
||||||
|
|
||||||
|
# Issue 18155: Use a delimiter that is a special char to regex:
|
||||||
|
|
||||||
|
header2 = '''\
|
||||||
|
"venue"+"city"+"state"+"date"+"performers"
|
||||||
|
'''
|
||||||
|
sample8 = """\
|
||||||
|
Harry's+ Arlington Heights+ IL+ 2/1/03+ Kimi Hayes
|
||||||
|
Shark City+ Glendale Heights+ IL+ 12/28/02+ Prezence
|
||||||
|
Tommy's Place+ Blue Island+ IL+ 12/28/02+ Blue Sunday/White Crow
|
||||||
|
Stonecutters Seafood and Chop House+ Lemont+ IL+ 12/19/02+ Week Back
|
||||||
|
"""
|
||||||
|
sample9 = """\
|
||||||
|
'Harry''s'+ Arlington Heights'+ 'IL'+ '2/1/03'+ 'Kimi Hayes'
|
||||||
|
'Shark City'+ Glendale Heights'+' IL'+ '12/28/02'+ 'Prezence'
|
||||||
|
'Tommy''s Place'+ Blue Island'+ 'IL'+ '12/28/02'+ 'Blue Sunday/White Crow'
|
||||||
|
'Stonecutters ''Seafood'' and Chop House'+ 'Lemont'+ 'IL'+ '12/19/02'+ 'Week Back'
|
||||||
|
"""
|
||||||
|
|
||||||
def test_has_header(self):
|
def test_has_header(self):
|
||||||
sniffer = csv.Sniffer()
|
sniffer = csv.Sniffer()
|
||||||
self.assertEqual(sniffer.has_header(self.sample1), False)
|
self.assertEqual(sniffer.has_header(self.sample1), False)
|
||||||
self.assertEqual(sniffer.has_header(self.header+self.sample1), True)
|
self.assertEqual(sniffer.has_header(self.header1 + self.sample1),
|
||||||
|
True)
|
||||||
|
|
||||||
|
def test_has_header_regex_special_delimiter(self):
|
||||||
|
sniffer = csv.Sniffer()
|
||||||
|
self.assertEqual(sniffer.has_header(self.sample8), False)
|
||||||
|
self.assertEqual(sniffer.has_header(self.header2 + self.sample8),
|
||||||
|
True)
|
||||||
|
|
||||||
def test_sniff(self):
|
def test_sniff(self):
|
||||||
sniffer = csv.Sniffer()
|
sniffer = csv.Sniffer()
|
||||||
|
@ -852,13 +877,24 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
|
||||||
dialect = sniffer.sniff(self.sample7)
|
dialect = sniffer.sniff(self.sample7)
|
||||||
self.assertEqual(dialect.delimiter, "|")
|
self.assertEqual(dialect.delimiter, "|")
|
||||||
self.assertEqual(dialect.quotechar, "'")
|
self.assertEqual(dialect.quotechar, "'")
|
||||||
|
dialect = sniffer.sniff(self.sample8)
|
||||||
|
self.assertEqual(dialect.delimiter, '+')
|
||||||
|
dialect = sniffer.sniff(self.sample9)
|
||||||
|
self.assertEqual(dialect.delimiter, '+')
|
||||||
|
self.assertEqual(dialect.quotechar, "'")
|
||||||
|
|
||||||
def test_doublequote(self):
|
def test_doublequote(self):
|
||||||
sniffer = csv.Sniffer()
|
sniffer = csv.Sniffer()
|
||||||
dialect = sniffer.sniff(self.header)
|
dialect = sniffer.sniff(self.header1)
|
||||||
|
self.assertFalse(dialect.doublequote)
|
||||||
|
dialect = sniffer.sniff(self.header2)
|
||||||
self.assertFalse(dialect.doublequote)
|
self.assertFalse(dialect.doublequote)
|
||||||
dialect = sniffer.sniff(self.sample2)
|
dialect = sniffer.sniff(self.sample2)
|
||||||
self.assertTrue(dialect.doublequote)
|
self.assertTrue(dialect.doublequote)
|
||||||
|
dialect = sniffer.sniff(self.sample8)
|
||||||
|
self.assertFalse(dialect.doublequote)
|
||||||
|
dialect = sniffer.sniff(self.sample9)
|
||||||
|
self.assertTrue(dialect.doublequote)
|
||||||
|
|
||||||
if not hasattr(sys, "gettotalrefcount"):
|
if not hasattr(sys, "gettotalrefcount"):
|
||||||
if support.verbose: print("*** skipping leakage tests ***")
|
if support.verbose: print("*** skipping leakage tests ***")
|
||||||
|
|
|
@ -656,6 +656,7 @@ Kubilay Kocak
|
||||||
Greg Kochanski
|
Greg Kochanski
|
||||||
Damon Kohler
|
Damon Kohler
|
||||||
Marko Kohtala
|
Marko Kohtala
|
||||||
|
Vajrasky Kok
|
||||||
Guido Kollerie
|
Guido Kollerie
|
||||||
Jacek Konieczny
|
Jacek Konieczny
|
||||||
Марк Коренберг
|
Марк Коренберг
|
||||||
|
|
|
@ -38,6 +38,10 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #18155: The csv module now correctly handles csv files that use
|
||||||
|
a delimter character that has a special meaning in regexes, instead of
|
||||||
|
throwing an exception.
|
||||||
|
|
||||||
- Issue #14360: encode_quopri can now be successfully used as an encoder
|
- Issue #14360: encode_quopri can now be successfully used as an encoder
|
||||||
when constructing a MIMEApplication object.
|
when constructing a MIMEApplication object.
|
||||||
|
|
||||||
|
@ -50,7 +54,7 @@ Library
|
||||||
|
|
||||||
- Issue #18259: Declare sethostname in socketmodule.c for AIX
|
- Issue #18259: Declare sethostname in socketmodule.c for AIX
|
||||||
|
|
||||||
- Issue #18167: cgi.FieldStorage no more fails to handle multipart/form-data
|
- Issue #18167: cgi.FieldStorage no longer fails to handle multipart/form-data
|
||||||
when \r\n appears at end of 65535 bytes without other newlines.
|
when \r\n appears at end of 65535 bytes without other newlines.
|
||||||
|
|
||||||
- subprocess: Prevent a possible double close of parent pipe fds when the
|
- subprocess: Prevent a possible double close of parent pipe fds when the
|
||||||
|
|
Loading…
Reference in New Issue