LogAnalyzer - binary log parsing preparations

- use FMT messages to create classes using type()
 - work with these classes instead of arrays
 - split parsing and processing
This commit is contained in:
Markus Koetter 2014-07-03 12:19:20 +02:00 committed by Craig Elder
parent ce0efdb7d2
commit 64ae2011b5
2 changed files with 176 additions and 102 deletions

View File

@ -13,6 +13,7 @@ import sys
class Format:
'''Data channel format as specified by the FMT lines in the log file'''
NAME = 'FMT'
msgType = 0
msgLen = 0
name = ""
@ -24,9 +25,72 @@ class Format:
self.name = name
self.types = types
self.labels = labels.split(',')
def __str__(self):
return "%8s %s" % (self.name, `self.labels`)
@staticmethod
def trycastToFormatType(value,valueType):
'''using format characters from libraries/DataFlash/DataFlash.h to cast strings to basic python int/float/string types
tries a cast, if it does not work, well, acceptable as the text logs do not match the format, e.g. MODE is expected to be int'''
try:
if valueType in "fcCeEL":
return float(value)
elif valueType in "bBhHiIM":
return int(value)
elif valueType in "nNZ":
return str(value)
except:
pass
return value
def to_class(self):
members = dict(
NAME = self.name,
labels = self.labels[:],
)
fieldformats = [i for i in self.types]
fieldnames = self.labels[:]
# field access
for (xname,xformat) in zip(fieldnames,fieldformats):
def createproperty(name, format):
# extra scope for variable sanity
# scaling via _NAME and def NAME(self): return self._NAME / SCALE
propertyname = name
attributename = '_' + name
p = property(lambda x:getattr(x, attributename),
lambda x, v:setattr(x,attributename, Format.trycastToFormatType(v,format)))
members[propertyname] = p
members[attributename] = None
createproperty(xname, xformat)
# repr shows all values but the header
members['__repr__'] = lambda x: "<{cls} {data}>".format(cls=x.__class__.__name__, data = ' '.join(["{}:{}".format(k,getattr(x,'_'+k)) for k in x.labels]))
def init(a, *x):
if len(x) != len(a.labels):
raise ValueError("Invalid Length")
#print(list(zip(a.labels, x)))
for (l,v) in zip(a.labels, x):
try:
setattr(a, l, v)
except Exception as e:
print("{} {} {} failed".format(a,l,v))
print(e)
members['__init__'] = init
# finally, create the class
cls = type(\
'Log__{:s}'.format(self.name),
(object,),
members
)
#print(members)
return cls
class Channel:
'''storage for a single stream of data, i.e. all GPS.RelAlt values'''
@ -263,24 +327,7 @@ class DataflashLog:
else:
return ""
def __castToFormatType(self,value,valueType):
'''using format characters from libraries/DataFlash/DataFlash.h to cast strings to basic python int/float/string types'''
intTypes = "bBhHiIM"
floatTypes = "fcCeEL"
charTypes = "nNZ"
if valueType in floatTypes:
return float(value)
elif valueType in intTypes:
return int(value)
elif valueType in charTypes:
return str(value)
else:
raise Exception("Unknown value type of '%s' specified to __castToFormatType()" % valueType)
#def __init__(self, logfile, ignoreBadlines=False):
#self.read(logfile, ignoreBadlines)
def read(self, logfile, ignoreBadlines=False):
def read(self, logfile, format, ignoreBadlines=False):
'''returns on successful log read (including bad lines if ignoreBadlines==True), will throw an Exception otherwise'''
# TODO: dataflash log parsing code is pretty hacky, should re-write more methodically
self.filename = logfile
@ -289,13 +336,111 @@ class DataflashLog:
else:
f = open(self.filename, 'r')
lineNumber = 0
knownHardwareTypes = ["APM", "PX4", "MPNG"]
numBytes = 0
for line in f:
if len(line) >= 4 and line[0:4] == '\xa3\x95\x80\x80':
raise Exception("Unable to parse binary log files at this time, will be added soon")
if format == 'bin':
head = '\xa3\x95\x80\x80'
elif format == 'log':
head = ""
elif format == 'auto':
if self.filename == '<stdin>':
# assuming TXT format
# raise ValueError("Invalid log format for stdin: {}".format(format))
head = ""
else:
head = f.read(4)
f.seek(0)
else:
raise ValueError("Unknown log format for {}: {}".format(self.logfile, format))
if head == '\xa3\x95\x80\x80':
# lineNumber = self.read_binary(f, ignoreBadlines)
pass
else:
numBytes, lineNumber = self.read_text(f, ignoreBadlines)
# gather some general stats about the log
self.lineCount = lineNumber
self.filesizeKB = numBytes / 1024.0
# TODO: switch duration calculation to use TimeMS values rather than GPS timestemp
if "GPS" in self.channels:
# the GPS time label changed at some point, need to handle both
timeLabel = "TimeMS"
if timeLabel not in self.channels["GPS"]:
timeLabel = "Time"
firstTimeGPS = self.channels["GPS"][timeLabel].listData[0][1]
lastTimeGPS = self.channels["GPS"][timeLabel].listData[-1][1]
self.durationSecs = (lastTimeGPS-firstTimeGPS) / 1000
# TODO: calculate logging rate based on timestamps
# ...
def process(self, lineNumber, e):
if e.NAME == 'FMT':
cls = e.to_class()
if hasattr(e, 'type') and e.type not in self._formats: # binary log specific
self._formats[e.type] = cls
if cls.NAME not in self.formats:
self.formats[cls.NAME] = cls
elif e.NAME == "PARM":
self.parameters[e.Name] = e.Value
elif e.NAME == "MSG":
if not self.vehicleType:
tokens = e.Message.split(' ')
vehicleTypes = ["ArduPlane", "ArduCopter", "ArduRover"]
self.vehicleType = tokens[0]
self.firmwareVersion = tokens[1]
if len(tokens) == 3:
self.firmwareHash = tokens[2][1:-1]
else:
self.messages[lineNumber] = e.Message
elif e.NAME == "MODE":
if self.vehicleType == "ArduCopter":
try:
modes = {0:'STABILIZE',
1:'ACRO',
2:'ALT_HOLD',
3:'AUTO',
4:'GUIDED',
5:'LOITER',
6:'RTL',
7:'CIRCLE',
9:'LAND',
10:'OF_LOITER',
11:'DRIFT',
13:'SPORT',
14:'FLIP',
15:'AUTOTUNE',
16:'HYBRID',}
self.modeChanges[lineNumber] = (modes[int(e.Mode)], e.ThrCrs)
except:
self.modeChanges[lineNumber] = (e.Mode, e.ThrCrs)
elif self.vehicleType == "ArduPlane" or self.vehicleType == "ArduRover":
self.modeChanges[lineNumber] = (tokens[2],int(tokens[3]))
else:
raise Exception("Unknown log type for MODE line {} {}".format(self.vehicleType, repr(e)))
# anything else must be the log data
else:
groupName = e.NAME
# first time seeing this type of log line, create the channel storage
if not groupName in self.channels:
self.channels[groupName] = {}
for label in e.labels:
self.channels[groupName][label] = Channel()
# store each token in its relevant channel
for label in e.labels:
value = getattr(e, label)
channel = self.channels[groupName][label]
channel.dictData[lineNumber] = value
channel.listData.append((lineNumber, value))
def read_text(self, f, ignoreBadlines):
self.formats = {'FMT':Format}
lineNumber = 0
numBytes = 0
knownHardwareTypes = ["APM", "PX4", "MPNG"]
for line in f:
lineNumber = lineNumber + 1
numBytes += len(line) + 1
try:
@ -307,17 +452,6 @@ class DataflashLog:
continue
if line == "----------------------------------------": # present in pre-3.0 logs
raise Exception("Log file seems to be in the older format (prior to self-describing logs), which isn't supported")
# Some logs are missing the initial dataflash header which says the log index and the type of log, but we can catch the vehicle
# type here too in the MSG line
if not self.vehicleType and tokens[0] == "MSG":
tokens2 = line.split(' ')
vehicleTypes = ["ArduPlane", "ArduCopter", "ArduRover"]
if tokens2[1] in vehicleTypes and tokens2[2][0].lower() == "v":
self.vehicleType = tokens2[1]
self.firmwareVersion = tokens2[1]
if len(tokens2) == 3:
self.firmwareHash = tokens2[2][1:-1]
continue
if len(tokens) == 1:
tokens2 = line.split(' ')
if line == "":
@ -340,75 +474,14 @@ class DataflashLog:
self.skippedLines += 1
else:
raise Exception("")
# now handle the non-log data stuff, format descriptions, params, etc
elif tokens[0] == "FMT":
format = None
if len(tokens) == 6:
format = Format(tokens[1],tokens[2],tokens[3],tokens[4],tokens[5])
elif len(tokens) == 5: # some logs have FMT STRT with no labels
format = Format(tokens[1],tokens[2],tokens[3],tokens[4],"")
else:
raise Exception("FMT error, nTokens: %d" % len(tokens))
#print format # TEMP
self.formats[tokens[3]] = format
elif tokens[0] == "PARM":
pName = tokens[1]
self.parameters[pName] = float(tokens[2])
elif tokens[0] == "MSG":
self.messages[lineNumber] = tokens[1]
elif tokens[0] == "MODE":
if self.vehicleType == "ArduCopter":
self.modeChanges[lineNumber] = (tokens[1],int(tokens[2]))
elif self.vehicleType == "ArduPlane" or self.vehicleType == "ArduRover":
self.modeChanges[lineNumber] = (tokens[2],int(tokens[3]))
else:
raise Exception("Unknown log type for MODE line")
# anything else must be the log data
else:
groupName = tokens[0]
tokens2 = line.split(', ')
# first time seeing this type of log line, create the channel storage
if not groupName in self.channels:
self.channels[groupName] = {}
for label in self.formats[groupName].labels:
self.channels[groupName][label] = Channel()
# check the number of tokens matches between the line and what we're expecting from the FMT definition
if (len(tokens2)-1) != len(self.formats[groupName].labels):
errorMsg = "%s line's value count (%d) not matching FMT specification (%d) on line %d" % (groupName, len(tokens2)-1, len(self.formats[groupName].labels), lineNumber)
if ignoreBadlines:
print(errorMsg + " (skipping line)", file=sys.stderr)
self.skippedLines += 1
continue
else:
raise Exception(errorMsg)
# store each token in its relevant channel
for (index,label) in enumerate(self.formats[groupName].labels):
#value = float(tokens2[index+1]) # simple read without handling datatype
value = self.__castToFormatType(tokens2[index+1], self.formats[groupName].types[index]) # handling datatype via this call slows down ready by about 50%
channel = self.channels[groupName][label]
#print "Set data {%s,%s} for line %d to value %s, of type %c, stored at address %s" % (groupName, label, lineNumber, `value`, self.formats[groupName].types[index], hex(id(channel.dictData)))
channel.dictData[lineNumber] = value
channel.listData.append((lineNumber,value))
if not tokens[0] in self.formats:
raise ValueError("Unknown Format {}".format(tokens[0]))
e = self.formats[tokens[0]](*tokens[1:])
self.process(lineNumber, e)
except Exception as e:
print("BAD LINE: " + line, file=sys.stderr)
raise Exception("Error parsing line %d of log file %s - %s" % (lineNumber,self.filename,e.args[0]))
# gather some general stats about the log
self.lineCount = lineNumber
self.filesizeKB = numBytes / 1024.0 # For data that comes from a process pipe, filesize is not supported
# TODO: switch duration calculation to use TimeMS values rather than GPS timestemp
if "GPS" in self.channels:
# the GPS time label changed at some point, need to handle both
timeLabel = "TimeMS"
if timeLabel not in self.channels["GPS"]:
timeLabel = "Time"
firstTimeGPS = self.channels["GPS"][timeLabel].listData[0][1]
lastTimeGPS = self.channels["GPS"][timeLabel].listData[-1][1]
self.durationSecs = (lastTimeGPS-firstTimeGPS) / 1000
# TODO: calculate logging rate based on timestamps
# ...
return (numBytes,lineNumber)

View File

@ -210,6 +210,7 @@ def main():
# deal with command line arguments
parser = argparse.ArgumentParser(description='Analyze an APM Dataflash log for known issues')
parser.add_argument('logfile', type=argparse.FileType('r'), help='path to Dataflash log file (or - for stdin)')
parser.add_argument('-f', '--format', metavar='', type=str, action='store', choices=['bin','log','auto'], default='auto')
parser.add_argument('-q', '--quiet', metavar='', action='store_const', const=True, help='quiet mode, do not print results')
parser.add_argument('-p', '--profile', metavar='', action='store_const', const=True, help='output performance profiling data')
parser.add_argument('-s', '--skip_bad', metavar='', action='store_const', const=True, help='skip over corrupt dataflash lines')
@ -221,7 +222,7 @@ def main():
# load the log
startTime = time.time()
logdata = DataflashLog.DataflashLog()
logdata.read(args.logfile.name, ignoreBadlines=args.skip_bad) # read log
logdata.read(args.logfile.name, args.format, ignoreBadlines=args.skip_bad) # read log
endTime = time.time()
if args.profile:
print "Log file read time: %.2f seconds" % (endTime-startTime)