worst case analysis of stack usage (#7883)

* Makefile target "check_stack"
This commit is contained in:
Julien Lecoeur 2017-09-10 19:34:34 +02:00 committed by Daniel Agar
parent 24ed06156e
commit ceeae7587e
4 changed files with 422 additions and 0 deletions

View File

@ -363,6 +363,21 @@ cppcheck: posix_sitl_default
@cppcheck -i$(SRC_DIR)/src/examples --std=c++11 --std=c99 --std=posix --project=build_posix_sitl_default/compile_commands.json --xml-version=2 2> cppcheck-result.xml
@cppcheck-htmlreport --source-encoding=ascii --file=cppcheck-result.xml --report-dir=cppcheck --source-dir=$(SRC_DIR)/src/
check_stack: px4fmu-v3_default
@echo "Checking worst case stack usage with avstack.pl ..."
@echo " "
@cd build_px4fmu-v3_default/ && mkdir -p stack_usage && $(SRC_DIR)/Tools/stack_usage/avstack.pl `find . -name *.obj` > stack_usage/avstack_output.txt 2> stack_usage/avstack_errors.txt
@head -n 10 build_px4fmu-v3_default/stack_usage/avstack_output.txt | c++filt
@echo " "
@echo "Checking worst case stack usage with checkstack.pl ..."
@echo " "
@echo "Top 10:"
@cd build_px4fmu-v3_default/ && mkdir -p stack_usage && arm-none-eabi-objdump -d src/firmware/nuttx/firmware_nuttx | $(SRC_DIR)/Tools/stack_usage/checkstack.pl arm 0 > stack_usage/checkstack_output.txt 2> stack_usage/checkstack_errors.txt
@head -n 10 build_px4fmu-v3_default/stack_usage/checkstack_output.txt | c++filt
@echo " "
@echo "Symbols with 'main', 'thread' or 'task':"
@cat build_px4fmu-v3_default/stack_usage/checkstack_output.txt | c++filt | grep -E 'thread|main|task'
# Cleanup
# --------------------------------------------------------------------
.PHONY: clean submodulesclean distclean

251
Tools/stack_usage/avstack.pl Executable file
View File

@ -0,0 +1,251 @@
#!/usr/bin/perl -w
# avstack.pl: AVR stack checker
# Copyright (C) 2013 Daniel Beer <dlbeer@gmail.com>
#
# Permission to use, copy, modify, and/or distribute this software for
# any purpose with or without fee is hereby granted, provided that the
# above copyright notice and this permission notice appear in all
# copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
# PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.
#
# Usage
# -----
#
# This script requires that you compile your code with -fstack-usage.
# This results in GCC generating a .su file for each .o file. Once you
# have these, do:
#
# ./avstack.pl <object files>
#
# This will disassemble .o files to construct a call graph, and read
# frame size information from .su. The call graph is traced to find, for
# each function:
#
# - Call height: the maximum call height of any callee, plus 1
# (defined to be 1 for any function which has no callees).
#
# - Inherited frame: the maximum *inherited* frame of any callee, plus
# the GCC-calculated frame size of the function in question.
#
# Using these two pieces of information, we calculate a cost (estimated
# peak stack usage) for calling the function. Functions are then listed
# on stdout in decreasing order of cost.
#
# Functions which are recursive are marked with an 'R' to the left of
# them. Their cost is calculated for a single level of recursion.
#
# The peak stack usage of your entire program can usually be estimated
# as the stack cost of "main", plus the maximum stack cost of any
# interrupt handler which might execute.
use strict;
# Configuration: set these as appropriate for your architecture/project.
my $objdump = "arm-none-eabi-objdump";
my $call_cost = 4;
# First, we need to read all object and corresponding .su files. We're
# gathering a mapping of functions to callees and functions to frame
# sizes. We're just parsing at this stage -- callee name resolution
# comes later.
my %frame_size; # "func@file" -> size
my %call_graph; # "func@file" -> {callees}
my %addresses; # "addr@file" -> "func@file"
my %global_name; # "func" -> "func@file"
my %ambiguous; # "func" -> 1
foreach (@ARGV) {
# Disassemble this object file to obtain a callees. Sources in the
# call graph are named "func@file". Targets in the call graph are
# named either "offset@file" or "funcname". We also keep a list of
# the addresses and names of each function we encounter.
my $objfile = $_;
my $source;
open(DISASSEMBLY, "$objdump -dr $objfile|") ||
die "Can't disassemble $objfile";
while (<DISASSEMBLY>) {
chomp;
if (/^([0-9a-fA-F]+) <(.*)>:/) {
my $a = $1;
my $name = $2;
$source = "$name\@$objfile";
$call_graph{$source} = {};
$ambiguous{$name} = 1 if defined($global_name{$name});
$global_name{$name} = "$name\@$objfile";
$a =~ s/^0*//;
$addresses{"$a\@$objfile"} = "$name\@$objfile";
}
if (/: R_[A-Za-z0-9_]+_CALL[ \t]+(.*)/) {
my $t = $1;
if ($t eq ".text") {
$t = "\@$objfile";
} elsif ($t =~ /^\.text\+0x(.*)$/) {
$t = "$1\@$objfile";
}
$call_graph{$source}->{$t} = 1;
}
}
close(DISASSEMBLY);
# Extract frame sizes from the corresponding .su file.
if ($objfile =~ /^(.*).obj$/) {
my $sufile = "$1.su";
open(SUFILE, "<$sufile") || die "Can't open $sufile";
while (<SUFILE>) {
$frame_size{"$1\@$objfile"} = $2 + $call_cost
if /^.*:([^\t ]+)[ \t]+([0-9]+)/;
}
close(SUFILE);
}
}
# In this step, we enumerate each list of callees in the call graph and
# try to resolve the symbols. We omit ones we can't resolve, but keep a
# set of them anyway.
my %unresolved;
foreach (keys %call_graph) {
my $from = $_;
my $callees = $call_graph{$from};
my %resolved;
foreach (keys %$callees) {
my $t = $_;
if (defined($addresses{$t})) {
$resolved{$addresses{$t}} = 1;
} elsif (defined($global_name{$t})) {
$resolved{$global_name{$t}} = 1;
warn "Ambiguous resolution: $t" if defined ($ambiguous{$t});
} elsif (defined($call_graph{$t})) {
$resolved{$t} = 1;
} else {
$unresolved{$t} = 1;
}
}
$call_graph{$from} = \%resolved;
}
# Create fake edges and nodes to account for dynamic behaviour.
$call_graph{"INTERRUPT"} = {};
foreach (keys %call_graph) {
$call_graph{"INTERRUPT"}->{$_} = 1 if /^__vector_/;
}
# Trace the call graph and calculate, for each function:
#
# - inherited frames: maximum inherited frame of callees, plus own
# frame size.
# - height: maximum height of callees, plus one.
# - recursion: is the function called recursively (including indirect
# recursion)?
my %has_caller;
my %visited;
my %total_cost;
my %call_depth;
sub trace {
my $f = shift;
if ($visited{$f}) {
$visited{$f} = "R" if $visited{$f} eq "?";
return;
}
$visited{$f} = "?";
my $max_depth = 0;
my $max_frame = 0;
my $targets = $call_graph{$f} || die "Unknown function: $f";
if (defined($targets)) {
foreach (keys %$targets) {
my $t = $_;
$has_caller{$t} = 1;
trace($t);
my $is = $total_cost{$t};
my $d = $call_depth{$t};
$max_frame = $is if $is > $max_frame;
$max_depth = $d if $d > $max_depth;
}
}
$call_depth{$f} = $max_depth + 1;
$total_cost{$f} = $max_frame + ($frame_size{$f} || 0);
$visited{$f} = " " if $visited{$f} eq "?";
}
foreach (keys %call_graph) { trace $_; }
# Now, print results in a nice table.
printf " %-30s %8s %8s %8s\n",
"Func", "Cost", "Frame", "Height";
print "------------------------------------";
print "------------------------------------\n";
my $max_iv = 0;
my $main = 0;
foreach (sort { $total_cost{$b} <=> $total_cost{$a} } keys %visited) {
my $name = $_;
if (/^(.*)@(.*)$/) {
$name = $1 unless $ambiguous{$name};
}
my $tag = $visited{$_};
my $cost = $total_cost{$_};
$name = $_ if $ambiguous{$name};
$tag = ">" unless $has_caller{$_};
if (/^__vector_/) {
$max_iv = $cost if $cost > $max_iv;
} elsif (/^main@/) {
$main = $cost;
}
if ($ambiguous{$name}) { $name = $_; }
printf "%s %-30s %8d %8d %8d\n", $tag, $name, $cost,
$frame_size{$_} || 0, $call_depth{$_};
}
print "\n";
print "Peak execution estimate (main + worst-case IV):\n";
printf " main = %d, worst IV = %d, total = %d\n",
$total_cost{$global_name{"main"}},
$total_cost{"INTERRUPT"},
$total_cost{$global_name{"main"}} + $total_cost{"INTERRUPT"};
print "\n";
print "The following functions were not resolved:\n";
foreach (keys %unresolved) { print " $_\n"; }

154
Tools/stack_usage/checkstack.pl Executable file
View File

@ -0,0 +1,154 @@
#!/usr/bin/perl
# Stolen from Linux kernel :)
# borrowed from busybox and modified by:
# Maciek Borzecki <maciek.borzecki@gmail.com>
# Check the stack usage of functions
#
# Copyright Joern Engel <joern@wh.fh-wedel.de>
# Inspired by Linus Torvalds
# Original idea maybe from Keith Owens
# s390 port and big speedup by Arnd Bergmann <arnd@bergmann-dalldorf.de>
# Mips port by Juan Quintela <quintela@mandrakesoft.com>
# IA64 port via Andreas Dilger
# Arm port by Holger Schurig
# sh64 port by Paul Mundt
# Random bits by Matt Mackall <mpm@selenic.com>
# M68k port by Geert Uytterhoeven and Andreas Schwab
#
# Usage:
# objdump -d vmlinux | checkstack.pl [arch] [threshold]
#
# TODO : Port to all architectures (one regex per arch)
# modifications to version from busybox:
# 1. fix regex for ARM sp, Rd may not be included
# 2. add threshold parameter
# check for arch
#
# $re is used for two matches:
# $& (whole re) matches the complete objdump line with the stack growth
# $1 (first bracket) matches the size of the stack growth
#
# use anything else and feel the pain ;)
my (@stack, $re, $x, $xs, $thresh);
{
my $arch = shift;
if ($arch eq "") {
$arch = `uname -m`;
}
$thresh = shift;
if ($thresh eq "") {
$thresh = 100;
}
$x = "[0-9a-f]"; # hex character
$xs = "[0-9a-f ]"; # hex character or space
if ($arch eq 'arm') {
#c0008ffc: e24dd064 sub sp, sp, #100 ; 0x64
# or without Rd
#800362e: b082 sub sp, #8
$re = qr/.*sub.*(?:sp, )?sp, #([0-9]+)/o;
} elsif ($arch eq 'blackfin') {
# 52: 00 e8 03 00 LINK 0xc;
$re = qr/.*LINK (0x$x{1,5});$/o;
} elsif ($arch =~ /^i[3456]86$/) {
#c0105234: 81 ec ac 05 00 00 sub $0x5ac,%esp
$re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%esp$/o;
} elsif ($arch eq 'x86_64') {
# 2f60: 48 81 ec e8 05 00 00 sub $0x5e8,%rsp
$re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%rsp$/o;
} elsif ($arch eq 'ia64') {
#e0000000044011fc: 01 0f fc 8c adds r12=-384,r12
$re = qr/.*adds.*r12=-(([0-9]{2}|[3-9])[0-9]{2}),r12/o;
} elsif ($arch eq 'm68k') {
# 2b6c: 4e56 fb70 linkw %fp,#-1168
# 1df770: defc ffe4 addaw #-28,%sp
$re = qr/.*(?:linkw %fp,|addaw )#-([0-9]{1,4})(?:,%sp)?$/o;
} elsif ($arch eq 'mips64') {
#8800402c: 67bdfff0 daddiu sp,sp,-16
$re = qr/.*daddiu.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o;
} elsif ($arch eq 'mips') {
#88003254: 27bdffe0 addiu sp,sp,-32
$re = qr/.*addiu.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o;
} elsif ($arch eq 'ppc') {
#c00029f4: 94 21 ff 30 stwu r1,-208(r1)
$re = qr/.*stwu.*r1,-($x{1,8})\(r1\)/o;
} elsif ($arch eq 'ppc64') {
#XXX
$re = qr/.*stdu.*r1,-($x{1,8})\(r1\)/o;
} elsif ($arch eq 'powerpc') {
$re = qr/.*st[dw]u.*r1,-($x{1,8})\(r1\)/o;
} elsif ($arch =~ /^s390x?$/) {
# 11160: a7 fb ff 60 aghi %r15,-160
$re = qr/.*ag?hi.*\%r15,-(([0-9]{2}|[3-9])[0-9]{2})/o;
} elsif ($arch =~ /^sh64$/) {
#XXX: we only check for the immediate case presently,
# though we will want to check for the movi/sub
# pair for larger users. -- PFM.
#a00048e0: d4fc40f0 addi.l r15,-240,r15
$re = qr/.*addi\.l.*r15,-(([0-9]{2}|[3-9])[0-9]{2}),r15/o;
} else {
print("wrong or unknown architecture\n");
exit
}
}
sub bysize($) {
my ($asize, $bsize);
($asize = $a) =~ s/.*: *(.*)$/$1/;
($bsize = $b) =~ s/.*: *(.*)$/$1/;
$bsize <=> $asize
}
#
# main()
#
my $funcre = qr/^$x* <(.*)>:$/;
my $func;
my $file, $lastslash;
while (my $line = <STDIN>) {
if ($line =~ m/$funcre/) {
$func = $1;
}
elsif ($line =~ m/(.*):\s*file format/) {
$file = $1;
$file =~ s/\.ko//;
$lastslash = rindex($file, "/");
if ($lastslash != -1) {
$file = substr($file, $lastslash + 1);
}
}
elsif ($line =~ m/$re/) {
my $size = $1;
$size = hex($size) if ($size =~ /^0x/);
if ($size > 0xf0000000) {
$size = - $size;
$size += 0x80000000;
$size += 0x80000000;
}
next if ($size > 0x10000000);
next if $line !~ m/^($xs*)/;
my $addr = $1;
$addr =~ s/ /0/g;
$addr = "0x$addr";
# bbox: was: my $intro = "$addr $func [$file]:";
my $intro = "$func [$file]:";
my $padlen = 56 - length($intro);
while ($padlen > 0) {
$intro .= ' ';
$padlen -= 8;
}
next if ($size < $thresh);
push @stack, "$intro$size\n";
}
}
print sort bysize @stack;

View File

@ -573,10 +573,12 @@ function(px4_os_add_flags)
set(added_c_flags
-nodefaultlibs
-nostdlib
-fstack-usage
)
set(added_cxx_flags
-nodefaultlibs
-nostdlib
-fstack-usage
)
set(added_optimization_flags)