SF Patch #744104: Remove eval() from csv
Eliminates the eval() step in the csv module resulting in better security, more clarity, and a little speed. The idea is to make successive attempts to coerce the string to a python type: int(s), long(s), float(s), etc. As a by-product, eliminates a bare 'except' statement.
This commit is contained in:
parent
1546bc43fe
commit
39a5592001
35
Lib/csv.py
35
Lib/csv.py
|
@ -148,6 +148,11 @@ class DictWriter:
|
||||||
rows.append(self._dict_to_list(rowdict))
|
rows.append(self._dict_to_list(rowdict))
|
||||||
return self.writer.writerows(rows)
|
return self.writer.writerows(rows)
|
||||||
|
|
||||||
|
# Guard Sniffer's type checking against builds that exclude complex()
|
||||||
|
try:
|
||||||
|
complex
|
||||||
|
except NameError:
|
||||||
|
complex = float
|
||||||
|
|
||||||
class Sniffer:
|
class Sniffer:
|
||||||
'''
|
'''
|
||||||
|
@ -360,13 +365,6 @@ class Sniffer:
|
||||||
# Finally, a 'vote' is taken at the end for each column, adding or
|
# Finally, a 'vote' is taken at the end for each column, adding or
|
||||||
# subtracting from the likelihood of the first row being a header.
|
# subtracting from the likelihood of the first row being a header.
|
||||||
|
|
||||||
def seval(item):
|
|
||||||
"""
|
|
||||||
Strips parens from item prior to calling eval in an
|
|
||||||
attempt to make it safer
|
|
||||||
"""
|
|
||||||
return eval(item.replace('(', '').replace(')', ''))
|
|
||||||
|
|
||||||
rdr = reader(StringIO(sample), self.sniff(sample))
|
rdr = reader(StringIO(sample), self.sniff(sample))
|
||||||
|
|
||||||
header = rdr.next() # assume first row is header
|
header = rdr.next() # assume first row is header
|
||||||
|
@ -386,18 +384,21 @@ class Sniffer:
|
||||||
continue # skip rows that have irregular number of columns
|
continue # skip rows that have irregular number of columns
|
||||||
|
|
||||||
for col in columnTypes.keys():
|
for col in columnTypes.keys():
|
||||||
try:
|
|
||||||
|
for thisType in [int, long, float, complex]:
|
||||||
try:
|
try:
|
||||||
# is it a built-in type (besides string)?
|
thisType(row[col])
|
||||||
thisType = type(seval(row[col]))
|
break
|
||||||
except OverflowError:
|
except ValueError, OverflowError:
|
||||||
# a long int?
|
pass
|
||||||
thisType = type(seval(row[col] + 'L'))
|
else:
|
||||||
thisType = type(0) # treat long ints as int
|
|
||||||
except:
|
|
||||||
# fallback to length of string
|
# fallback to length of string
|
||||||
thisType = len(row[col])
|
thisType = len(row[col])
|
||||||
|
|
||||||
|
# treat longs as ints
|
||||||
|
if thisType == long:
|
||||||
|
thisType = int
|
||||||
|
|
||||||
if thisType != columnTypes[col]:
|
if thisType != columnTypes[col]:
|
||||||
if columnTypes[col] is None: # add new column type
|
if columnTypes[col] is None: # add new column type
|
||||||
columnTypes[col] = thisType
|
columnTypes[col] = thisType
|
||||||
|
@ -417,8 +418,8 @@ class Sniffer:
|
||||||
hasHeader -= 1
|
hasHeader -= 1
|
||||||
else: # attempt typecast
|
else: # attempt typecast
|
||||||
try:
|
try:
|
||||||
eval("%s(%s)" % (colType.__name__, header[col]))
|
colType(header[col])
|
||||||
except:
|
except ValueError, TypeError:
|
||||||
hasHeader += 1
|
hasHeader += 1
|
||||||
else:
|
else:
|
||||||
hasHeader -= 1
|
hasHeader -= 1
|
||||||
|
|
Loading…
Reference in New Issue