Tools/px_process_module_doc.py: add script to extract documentation

And turn it into a markdown page.
It also does some simple validation, to check that the getopt() arguments
match the documentation.
This commit is contained in:
Beat Küng 2017-05-02 16:05:15 +02:00
parent c22b79ece5
commit 6c9574b336
6 changed files with 646 additions and 0 deletions

View File

@ -0,0 +1 @@
This folder contains a python library used by px_process_module_doc.py

View File

@ -0,0 +1 @@
__all__ = ["srcscanner", "srcparser"]

View File

@ -0,0 +1,43 @@
from xml.sax.saxutils import escape
import codecs
import os
class MarkdownTablesOutput():
def __init__(self, module_groups):
result = """
# Modules & Commands Reference
Documentation of PX4 modules, drivers and commands. It describes the provided
functionality, high-level implementation overview and how to use the
command-line interface.
> **Note** **This list is auto-generated from the source code** and contains the
> most recent modules documentation.
This is not a complete list and NuttX provides some additional commands
as well (such as `free`). Use `help` on the console to get a list of all
available commands, and in most cases `command help` will print the usage.
Since this is generated from source, errors must be reported/fixed
in the [Firmware](https://github.com/PX4/Firmware) repository.
"""
for category in sorted(module_groups):
result += "# Category: %s\n" % category.capitalize()
module_list = module_groups[category]
for module in module_list:
result += "## %s\n" % module.name()
result += "Source: [%s](https://github.com/PX4/Firmware/tree/master/src/%s)\n\n" % (module.scope(), module.scope())
doc = module.documentation()
if len(doc) > 0:
result += "%s\n" % doc
usage_string = module.usage_string()
if len(usage_string) > 0:
result += "### Usage\n```\n%s\n```\n" % usage_string
self.output = result
def Save(self, dirname):
with codecs.open(os.path.join(dirname, 'modules.md'), 'w', 'utf-8') as f:
f.write(self.output)

View File

@ -0,0 +1,459 @@
import sys
import re
import math
import textwrap
class ModuleDocumentation(object):
"""
documentation for a single module
"""
valid_categories = ['driver', 'estimator', 'controller', 'system',
'communication', 'command']
max_line_length = 80 # wrap lines that are longer than this
def __init__(self, function_calls, scope):
"""
:param function_calls: list of tuples (function_name, [str(arg)])
"""
self._name = ''
self._category = ''
self._doc_string = ''
self._usage_string = ''
self._first_command = True
self._scope = scope
self._options = '' # all option chars
self._all_values = [] # list of all values
self._all_commands = []
for func_name, args in function_calls:
attribute_name = '_handle_'+func_name.lower()
try:
f = getattr(self, attribute_name)
f(args)
except AttributeError:
raise Exception('unhandled function: PRINT_MODULE_'+func_name)
self._usage_string = self._wrap_long_lines(self._usage_string, 17)
def _handle_description(self, args):
assert(len(args) == 1) # description
self._doc_string = self._get_string(args[0])
def _handle_usage_name(self, args):
assert(len(args) == 2) # executable_name, category
self._name = self._get_string(args[0])
self._category = self._get_string(args[1])
self._usage_string = "%s <command> [arguments...]\n" % self._name
self._usage_string += " Commands:\n"
def _handle_usage_name_simple(self, args):
assert(len(args) == 2) # executable_name, category
self._name = self._get_string(args[0])
self._category = self._get_string(args[1])
self._usage_string = "%s [arguments...]\n" % self._name
def _handle_usage_command_descr(self, args):
assert(len(args) == 2) # name, description
name = self._get_string(args[0])
self._all_commands.append(name)
if self._first_command:
self._first_command = False
else:
self._usage_string += "\n"
if self._is_string(args[1]):
description = self._get_string(args[1])
self._usage_string += " %-13s %s\n" % (name, description)
else:
self._usage_string += " %s\n" % name
def _handle_usage_command(self, args):
assert(len(args) == 1) # name
args.append('nullptr')
self._handle_usage_command_descr(args)
def _handle_usage_default_commands(self, args):
assert(len(args) == 0)
self._handle_usage_command(['"stop"'])
self._handle_usage_command_descr(['"status"', '"print status info"'])
def _handle_usage_param_int(self, args):
assert(len(args) == 6) # option_char, default_val, min_val, max_val, description, is_optional
option_char = self._get_option_char(args[0])
default_val = int(args[1])
description = self._get_string(args[4])
if self._is_bool_true(args[5]):
self._usage_string += " [-%s <val>] %s\n" % (option_char, description)
self._usage_string += " default: %i\n" % default_val
else:
self._usage_string += " -%s <val> %s\n" % (option_char, description)
def _handle_usage_param_float(self, args):
assert(len(args) == 6) # option_char, default_val, min_val, max_val, description, is_optional
option_char = self._get_option_char(args[0])
default_val = self._get_float(args[1])
description = self._get_string(args[4])
if self._is_bool_true(args[5]):
self._usage_string += " [-%s <val>] %s\n" % (option_char, description)
self._usage_string += " default: %.1f\n" % default_val
else:
self._usage_string += " -%s <val> %s\n" % (option_char, description)
def _handle_usage_param_flag(self, args):
assert(len(args) == 3) # option_char, description, is_optional
option_char = self._get_option_char(args[0])
description = self._get_string(args[1])
if self._is_bool_true(args[2]):
self._usage_string += " [-%c] %s\n" % (option_char, description)
else:
self._usage_string += " -%c %s\n" % (option_char, description)
def _handle_usage_param_string(self, args):
assert(len(args) == 5) # option_char, default_val, values, description, is_optional
option_char = self._get_option_char(args[0])
description = self._get_string(args[3])
if self._is_bool_true(args[4]):
self._usage_string += " [-%c <val>] %s\n" % (option_char, description)
else:
self._usage_string += " -%c <val> %s\n" % (option_char, description)
if self._is_string(args[2]):
values = self._get_string(args[2])
self._all_values.append(values)
if self._is_string(args[1]):
default_val = self._get_string(args[1])
self._usage_string += " values: %s, default: %s\n" %(values, default_val)
else:
self._usage_string += " values: %s\n" % values
else:
if self._is_string(args[1]):
default_val = self._get_string(args[1])
self._usage_string += " default: %s\n" % default_val
def _handle_usage_param_comment(self, args):
assert(len(args) == 1) # comment
comment = self._get_string(args[0])
self._usage_string += self._wrap_long_lines("\n %s\n" % comment, 1)
def _handle_usage_arg(self, args):
assert(len(args) == 3) # values, description, is_optional
values = self._get_string(args[0])
self._all_values.append(values)
description = self._get_string(args[1])
if self._is_bool_true(args[2]):
values += ']'
self._usage_string += " [%-10s %s\n" % (values, description)
else:
self._usage_string += " %-11s %s\n" % (values, description)
def _get_string(self, string):
return string[1:-1] # remove the " at start & end
def _get_float(self, string):
f = string
if f[-1] == 'f':
f = f[:-1]
return float(f)
def _is_string(self, argument):
return len(argument) > 0 and argument[0] == '"'
def _is_bool_true(self, argument):
return len(argument) > 0 and argument == 'true'
def _get_option_char(self, argument):
assert(len(argument) == 3) # must have the form: 'p' (assume there's no escaping)
option_char = argument[1]
self._options += option_char
return option_char
def _wrap_long_lines(self, string, indentation_spaces):
"""
wrap long lines in a string
:param indentation_spaces: number of added spaces on continued lines
"""
ret = ''
for s in string.splitlines():
ret += textwrap.fill(s, self.max_line_length,
subsequent_indent=' '*indentation_spaces)+'\n'
return ret
def name(self):
return self._name
def category(self):
return self._category
def scope(self):
return self._scope
def documentation(self):
doc_string = self._doc_string
# convert ' $ cmd' commands into code blocks
# use lookahead (?=...) so the multiple consecutive command lines work
doc_string = re.sub(r"\n\$ (.*)(?=\n)", r"\n```\n\1\n```", doc_string)
# now merge consecutive blocks
doc_string = re.sub(r"\n```\n```\n", r"\n", doc_string)
return doc_string
def usage_string(self):
usage_string = self._usage_string
while len(usage_string) > 1 and usage_string[-1] == '\n':
usage_string = usage_string[:-1]
return usage_string
def options(self):
"""
get all the -p options as string of chars
"""
return self._options
def all_values(self):
"""
get a list of all command values
"""
return self._all_values
def all_commands(self):
"""
get a list of all commands
"""
return self._all_commands
class SourceParser(object):
"""
Parses provided data and stores all found parameters internally.
"""
re_doc_definition = re.compile(r'PRINT_MODULE_([A-Z_]*)\s*\(')
def __init__(self):
self._modules = {} # all found modules: key is the module name
def Parse(self, scope, contents):
"""
Incrementally parse program contents and append all found documentations
to the list.
"""
extracted_function_calls = [] # list of tuples: (FUNC_NAME, list(ARGS))
start_index = 0
while start_index < len(contents):
# skip whitespace
while start_index < len(contents) and contents[start_index] in [ ' ', '\t']:
start_index += 1
end_index = contents.find('\n', start_index)
if end_index == -1: end_index = len(contents)
line = contents[start_index:end_index]
# Ignore empty lines and macro #if's
if line == "" or line.startswith('#if'):
start_index = end_index + 1
continue
m = self.re_doc_definition.match(contents, start_index, end_index)
if m:
func_name = m.group(1)
end_index_match = m.span()[1]
next_start_index, arguments = self._parse_arguments(contents, end_index_match)
extracted_function_calls.append((func_name, arguments))
start_index = end_index + 1
if next_start_index > start_index:
start_index = next_start_index
continue
start_index = end_index + 1
if len(extracted_function_calls) > 0:
# add the module to the dict
module_doc = ModuleDocumentation(extracted_function_calls, scope)
if module_doc.name() == '':
raise Exception('PRINT_MODULE_USAGE_NAME not given for ' + scope)
if not module_doc.category() in ModuleDocumentation.valid_categories:
raise Exception('Invalid/unknown category ' +
module_doc.category() + ' for ' + scope)
self._do_consistency_check(contents, scope, module_doc)
self._modules[module_doc.name()] = module_doc
return True
def _do_consistency_check(self, contents, scope, module_doc):
"""
check the documentation for consistency with the code (arguments to
getopt() and others). This is only approximative, but should catch cases
where an option was added and not documented.
"""
# search all option chars in getopt() calls, combine them & compare
# against the documented set
getopt_args = re.findall(r"\b(px4_|)getopt\b.*\"([a-zA-Z:]+)\"", contents)
# there could be several getopt calls and it is not simple to find which
# command it belongs to, so combine all into a single string
getopt_args = reduce(lambda a, b: a + b[1], getopt_args, '').replace(':', '')
# some modules don't use getopt or parse the options in another file,
# so only check if both lists are not empty
if len(getopt_args) > 0 and len(module_doc.options()) > 0:
# sort & remove duplicates
sorted_getopt_args = ''.join(set(sorted(getopt_args)))
sorted_module_options = ''.join(set(sorted(module_doc.options())))
if sorted_getopt_args != sorted_module_options:
failed = True
# do one more test: check if strcmp(..."-x"... is used instead
if len(sorted_getopt_args) < len(sorted_module_options):
failed = False
# iterate options that are only in module doc
for c in set(sorted_module_options) - set(sorted_getopt_args):
if len(re.findall(r"\bstrcmp\b.*\"-"+c+r"\"", contents)) == 0:
failed = True
if failed:
print("Warning: documentation inconsistency in %s:" % scope)
print(" Documented options : %s" % sorted_module_options)
print(" Options found in getopt(): %s" % sorted_getopt_args)
# now check the commands: search for strcmp(argv[i], "command".
# this will also find the value arguments, so append them too to the
# module doc strings
commands = re.findall(r"\bstrcmp\b.*argv\[.*\"(.+)\"", contents) + \
re.findall(r"\bstrcmp\b.*\"(.+)\".*argv\[", contents) + \
re.findall(r"\bstrcmp\b.*\bverb\b.*\"(.+)\"", contents)
doc_commands = module_doc.all_commands() + \
[x for value in module_doc.all_values() for x in value.split('|')]
for command in commands:
if len(command) == 2 and command[0] == '-':
continue # skip options
if command in ['start', 'stop', 'status']:
continue # handled in the base class
if not command in doc_commands:
print("Warning: undocumented command '%s' in %s" %(command, scope))
def _parse_arguments(self, contents, start_index):
"""
parse function arguments into a list and return a tuple with (index, [str(args)])
where the index points to the start of the next line.
example: contents[start_index:] may look like:
'p', nullptr, "<topic_name>");
[...]
"""
args = []
next_position = start_index
current_string = ''
while next_position < len(contents):
# skip whitespace
while next_position < len(contents) and contents[next_position] in [' ', '\t', '\n']:
next_position += 1
if next_position >= len(contents):
continue
if contents[next_position] == '\"':
next_position += 1
string = ''
string_start = next_position
while next_position < len(contents):
if contents[next_position] == '\\': # escaping
if contents[next_position + 1] != '\n': # skip if continued on next line
string += contents[next_position:next_position+2].decode('string_escape')
next_position += 2
elif contents[next_position] == '"':
next_position += 1
break
else:
string += contents[next_position]
next_position += 1
# store the string, as it could continue in the form "a" "b"
current_string += string
elif contents.startswith('//', next_position): # comment
next_position = contents.find('\n', next_position)
elif contents.startswith('/*', next_position): # comment
next_position = contents.find('*/', next_position) + 2
else:
if current_string != '':
args.append('"'+current_string+'"')
current_string = ''
if contents.startswith('R\"', next_position): # C++11 raw string literal
bracket = contents.find('(', next_position)
identifier = contents[next_position+2:bracket]
raw_string_end = contents.find(')'+identifier+'"', next_position)
args.append('"'+contents[next_position+3+len(identifier):raw_string_end]+'"')
next_position = raw_string_end+len(identifier)+2
elif contents[next_position] == ')':
break # finished
elif contents[next_position] == ',':
next_position += 1 # skip
elif contents[next_position] == '(':
raise Exception('parser error: unsupported "(" in function arguments')
else:
# keyword (true, nullptr, ...), number or char (or variable).
# valid separators are: \n, ,, ), //, /*
next_arg_pos = contents.find(',', next_position)
m = re.search(r"\n|,|\)|//|/\*", contents[next_position:])
if m:
next_arg_pos = m.start() + next_position
args.append(contents[next_position:next_arg_pos].strip())
else:
raise Exception('parser error')
next_position = next_arg_pos
#print(args)
# find the next line
next_position = contents.find('\n', next_position)
if next_position >= 0: next_position += 1
return next_position, args
def GetModuleGroups(self):
"""
Returns a dictionary of all categories with a list of associated modules.
"""
groups = {}
for module_name in self._modules:
module = self._modules[module_name]
if module.category() in groups:
groups[module.category()].append(module)
else:
groups[module.category()]= [module]
# sort by module name
for category in groups:
group = groups[category]
groups[category] = sorted(group, key=lambda x: x.name())
return groups

View File

@ -0,0 +1,46 @@
import os
import re
import codecs
import sys
class SourceScanner(object):
"""
Traverses directory tree, reads all source files, and passes their contents
to the Parser.
"""
def ScanDir(self, srcdirs, parser):
"""
Scans provided path and passes all found contents to the parser using
parser.Parse method.
"""
extensions = tuple([".cpp", ".c"])
for srcdir in srcdirs:
for dirname, dirnames, filenames in os.walk(srcdir):
for filename in filenames:
if filename.endswith(extensions):
path = os.path.join(dirname, filename)
try:
if not self.ScanFile(path, parser):
return False
except:
print("Exception in file %s" % path)
raise
return True
def ScanFile(self, path, parser):
"""
Scans provided file and passes its contents to the parser using
parser.Parse method.
"""
prefix = "^(|.*" + os.path.sep + ")src" + os.path.sep
scope = re.sub(prefix.replace("\\", "/"), "", os.path.dirname(os.path.relpath(path)).replace("\\", "/"))
with codecs.open(path, 'r', 'utf-8') as f:
try:
contents = f.read()
except:
contents = ''
print('Failed reading file: %s, skipping content.' % path)
pass
return parser.Parse(scope, contents)

96
Tools/px_process_module_doc.py Executable file
View File

@ -0,0 +1,96 @@
#!/usr/bin/env python
############################################################################
#
# Copyright (C) 2017 PX4 Development Team. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# 3. Neither the name PX4 nor the names of its contributors may be
# used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
############################################################################
#
# PX4 module documentation processor (main executable file)
#
# This tool scans the PX4 source code for declarations of module documentations
# in the form PRINT_MODULE_* and converts them into Mardown output
#
from __future__ import print_function
import sys
import os
import argparse
from px4moduledoc import srcscanner, srcparser, markdownout
import re
import json
import codecs
def main():
# Parse command line arguments
parser = argparse.ArgumentParser(description="Process module documentation.")
parser.add_argument("-s", "--src-path",
default=["../src"],
metavar="PATH",
nargs='*',
help="one or more paths to source files to scan for parameters")
parser.add_argument("-m", "--markdown",
nargs='?',
const=".",
metavar="DIRECTORY",
help="Markdown output directory"
" (default DIRECTORY: .)")
parser.add_argument('-v', '--verbose', action='store_true', help="verbose output")
args = parser.parse_args()
# Check for valid command
if not (args.markdown):
print("Error: You need to specify at least one output method!")
parser.print_usage()
sys.exit(1)
# Initialize source scanner and parser
scanner = srcscanner.SourceScanner()
parser = srcparser.SourceParser()
# Scan directories, and parse the files
if (args.verbose): print("Scanning source path " + str(args.src_path))
if not scanner.ScanDir(args.src_path, parser):
sys.exit(1)
module_groups = parser.GetModuleGroups()
# Output to Markdown/HTML tables
if args.markdown:
out = markdownout.MarkdownTablesOutput(module_groups)
out.Save(args.markdown)
if __name__ == "__main__":
main()