From ceeae7587eba6e293f3d9811136fd38747a8ba13 Mon Sep 17 00:00:00 2001 From: Julien Lecoeur Date: Sun, 10 Sep 2017 19:34:34 +0200 Subject: [PATCH] worst case analysis of stack usage (#7883) * Makefile target "check_stack" --- Makefile | 15 ++ Tools/stack_usage/avstack.pl | 251 +++++++++++++++++++++++++++++++ Tools/stack_usage/checkstack.pl | 154 +++++++++++++++++++ cmake/nuttx/px4_impl_nuttx.cmake | 2 + 4 files changed, 422 insertions(+) create mode 100755 Tools/stack_usage/avstack.pl create mode 100755 Tools/stack_usage/checkstack.pl diff --git a/Makefile b/Makefile index a1476c0ceb..b063130c01 100644 --- a/Makefile +++ b/Makefile @@ -363,6 +363,21 @@ cppcheck: posix_sitl_default @cppcheck -i$(SRC_DIR)/src/examples --std=c++11 --std=c99 --std=posix --project=build_posix_sitl_default/compile_commands.json --xml-version=2 2> cppcheck-result.xml @cppcheck-htmlreport --source-encoding=ascii --file=cppcheck-result.xml --report-dir=cppcheck --source-dir=$(SRC_DIR)/src/ +check_stack: px4fmu-v3_default + @echo "Checking worst case stack usage with avstack.pl ..." + @echo " " + @cd build_px4fmu-v3_default/ && mkdir -p stack_usage && $(SRC_DIR)/Tools/stack_usage/avstack.pl `find . -name *.obj` > stack_usage/avstack_output.txt 2> stack_usage/avstack_errors.txt + @head -n 10 build_px4fmu-v3_default/stack_usage/avstack_output.txt | c++filt + @echo " " + @echo "Checking worst case stack usage with checkstack.pl ..." + @echo " " + @echo "Top 10:" + @cd build_px4fmu-v3_default/ && mkdir -p stack_usage && arm-none-eabi-objdump -d src/firmware/nuttx/firmware_nuttx | $(SRC_DIR)/Tools/stack_usage/checkstack.pl arm 0 > stack_usage/checkstack_output.txt 2> stack_usage/checkstack_errors.txt + @head -n 10 build_px4fmu-v3_default/stack_usage/checkstack_output.txt | c++filt + @echo " " + @echo "Symbols with 'main', 'thread' or 'task':" + @cat build_px4fmu-v3_default/stack_usage/checkstack_output.txt | c++filt | grep -E 'thread|main|task' + # Cleanup # -------------------------------------------------------------------- .PHONY: clean submodulesclean distclean diff --git a/Tools/stack_usage/avstack.pl b/Tools/stack_usage/avstack.pl new file mode 100755 index 0000000000..5af499b145 --- /dev/null +++ b/Tools/stack_usage/avstack.pl @@ -0,0 +1,251 @@ +#!/usr/bin/perl -w +# avstack.pl: AVR stack checker +# Copyright (C) 2013 Daniel Beer +# +# Permission to use, copy, modify, and/or distribute this software for +# any purpose with or without fee is hereby granted, provided that the +# above copyright notice and this permission notice appear in all +# copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL +# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE +# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL +# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR +# PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. +# +# Usage +# ----- +# +# This script requires that you compile your code with -fstack-usage. +# This results in GCC generating a .su file for each .o file. Once you +# have these, do: +# +# ./avstack.pl +# +# This will disassemble .o files to construct a call graph, and read +# frame size information from .su. The call graph is traced to find, for +# each function: +# +# - Call height: the maximum call height of any callee, plus 1 +# (defined to be 1 for any function which has no callees). +# +# - Inherited frame: the maximum *inherited* frame of any callee, plus +# the GCC-calculated frame size of the function in question. +# +# Using these two pieces of information, we calculate a cost (estimated +# peak stack usage) for calling the function. Functions are then listed +# on stdout in decreasing order of cost. +# +# Functions which are recursive are marked with an 'R' to the left of +# them. Their cost is calculated for a single level of recursion. +# +# The peak stack usage of your entire program can usually be estimated +# as the stack cost of "main", plus the maximum stack cost of any +# interrupt handler which might execute. + +use strict; + +# Configuration: set these as appropriate for your architecture/project. + +my $objdump = "arm-none-eabi-objdump"; +my $call_cost = 4; + +# First, we need to read all object and corresponding .su files. We're +# gathering a mapping of functions to callees and functions to frame +# sizes. We're just parsing at this stage -- callee name resolution +# comes later. + +my %frame_size; # "func@file" -> size +my %call_graph; # "func@file" -> {callees} +my %addresses; # "addr@file" -> "func@file" + +my %global_name; # "func" -> "func@file" +my %ambiguous; # "func" -> 1 + +foreach (@ARGV) { + # Disassemble this object file to obtain a callees. Sources in the + # call graph are named "func@file". Targets in the call graph are + # named either "offset@file" or "funcname". We also keep a list of + # the addresses and names of each function we encounter. + my $objfile = $_; + my $source; + + open(DISASSEMBLY, "$objdump -dr $objfile|") || + die "Can't disassemble $objfile"; + while () { + chomp; + + if (/^([0-9a-fA-F]+) <(.*)>:/) { + my $a = $1; + my $name = $2; + + $source = "$name\@$objfile"; + $call_graph{$source} = {}; + $ambiguous{$name} = 1 if defined($global_name{$name}); + $global_name{$name} = "$name\@$objfile"; + + $a =~ s/^0*//; + $addresses{"$a\@$objfile"} = "$name\@$objfile"; + } + + if (/: R_[A-Za-z0-9_]+_CALL[ \t]+(.*)/) { + my $t = $1; + + if ($t eq ".text") { + $t = "\@$objfile"; + } elsif ($t =~ /^\.text\+0x(.*)$/) { + $t = "$1\@$objfile"; + } + + $call_graph{$source}->{$t} = 1; + } + } + close(DISASSEMBLY); + + # Extract frame sizes from the corresponding .su file. + if ($objfile =~ /^(.*).obj$/) { + my $sufile = "$1.su"; + + open(SUFILE, "<$sufile") || die "Can't open $sufile"; + while () { + $frame_size{"$1\@$objfile"} = $2 + $call_cost + if /^.*:([^\t ]+)[ \t]+([0-9]+)/; + } + close(SUFILE); + } +} + +# In this step, we enumerate each list of callees in the call graph and +# try to resolve the symbols. We omit ones we can't resolve, but keep a +# set of them anyway. + +my %unresolved; + +foreach (keys %call_graph) { + my $from = $_; + my $callees = $call_graph{$from}; + my %resolved; + + foreach (keys %$callees) { + my $t = $_; + + if (defined($addresses{$t})) { + $resolved{$addresses{$t}} = 1; + } elsif (defined($global_name{$t})) { + $resolved{$global_name{$t}} = 1; + warn "Ambiguous resolution: $t" if defined ($ambiguous{$t}); + } elsif (defined($call_graph{$t})) { + $resolved{$t} = 1; + } else { + $unresolved{$t} = 1; + } + } + + $call_graph{$from} = \%resolved; +} + +# Create fake edges and nodes to account for dynamic behaviour. +$call_graph{"INTERRUPT"} = {}; + +foreach (keys %call_graph) { + $call_graph{"INTERRUPT"}->{$_} = 1 if /^__vector_/; +} + +# Trace the call graph and calculate, for each function: +# +# - inherited frames: maximum inherited frame of callees, plus own +# frame size. +# - height: maximum height of callees, plus one. +# - recursion: is the function called recursively (including indirect +# recursion)? + +my %has_caller; +my %visited; +my %total_cost; +my %call_depth; + +sub trace { + my $f = shift; + + if ($visited{$f}) { + $visited{$f} = "R" if $visited{$f} eq "?"; + return; + } + + $visited{$f} = "?"; + + my $max_depth = 0; + my $max_frame = 0; + + my $targets = $call_graph{$f} || die "Unknown function: $f"; + if (defined($targets)) { + foreach (keys %$targets) { + my $t = $_; + + $has_caller{$t} = 1; + trace($t); + + my $is = $total_cost{$t}; + my $d = $call_depth{$t}; + + $max_frame = $is if $is > $max_frame; + $max_depth = $d if $d > $max_depth; + } + } + + $call_depth{$f} = $max_depth + 1; + $total_cost{$f} = $max_frame + ($frame_size{$f} || 0); + $visited{$f} = " " if $visited{$f} eq "?"; +} + +foreach (keys %call_graph) { trace $_; } + +# Now, print results in a nice table. +printf " %-30s %8s %8s %8s\n", + "Func", "Cost", "Frame", "Height"; +print "------------------------------------"; +print "------------------------------------\n"; + +my $max_iv = 0; +my $main = 0; + +foreach (sort { $total_cost{$b} <=> $total_cost{$a} } keys %visited) { + my $name = $_; + + if (/^(.*)@(.*)$/) { + $name = $1 unless $ambiguous{$name}; + } + + my $tag = $visited{$_}; + my $cost = $total_cost{$_}; + + $name = $_ if $ambiguous{$name}; + $tag = ">" unless $has_caller{$_}; + + if (/^__vector_/) { + $max_iv = $cost if $cost > $max_iv; + } elsif (/^main@/) { + $main = $cost; + } + + if ($ambiguous{$name}) { $name = $_; } + + printf "%s %-30s %8d %8d %8d\n", $tag, $name, $cost, + $frame_size{$_} || 0, $call_depth{$_}; +} + +print "\n"; + +print "Peak execution estimate (main + worst-case IV):\n"; +printf " main = %d, worst IV = %d, total = %d\n", + $total_cost{$global_name{"main"}}, + $total_cost{"INTERRUPT"}, + $total_cost{$global_name{"main"}} + $total_cost{"INTERRUPT"}; + +print "\n"; + +print "The following functions were not resolved:\n"; +foreach (keys %unresolved) { print " $_\n"; } diff --git a/Tools/stack_usage/checkstack.pl b/Tools/stack_usage/checkstack.pl new file mode 100755 index 0000000000..b09efc362b --- /dev/null +++ b/Tools/stack_usage/checkstack.pl @@ -0,0 +1,154 @@ +#!/usr/bin/perl + +# Stolen from Linux kernel :) +# borrowed from busybox and modified by: +# Maciek Borzecki + +# Check the stack usage of functions +# +# Copyright Joern Engel +# Inspired by Linus Torvalds +# Original idea maybe from Keith Owens +# s390 port and big speedup by Arnd Bergmann +# Mips port by Juan Quintela +# IA64 port via Andreas Dilger +# Arm port by Holger Schurig +# sh64 port by Paul Mundt +# Random bits by Matt Mackall +# M68k port by Geert Uytterhoeven and Andreas Schwab +# +# Usage: +# objdump -d vmlinux | checkstack.pl [arch] [threshold] +# +# TODO : Port to all architectures (one regex per arch) + +# modifications to version from busybox: +# 1. fix regex for ARM sp, Rd may not be included +# 2. add threshold parameter + +# check for arch +# +# $re is used for two matches: +# $& (whole re) matches the complete objdump line with the stack growth +# $1 (first bracket) matches the size of the stack growth +# +# use anything else and feel the pain ;) +my (@stack, $re, $x, $xs, $thresh); +{ + my $arch = shift; + if ($arch eq "") { + $arch = `uname -m`; + } + + $thresh = shift; + if ($thresh eq "") { + $thresh = 100; + } + + $x = "[0-9a-f]"; # hex character + $xs = "[0-9a-f ]"; # hex character or space + if ($arch eq 'arm') { + #c0008ffc: e24dd064 sub sp, sp, #100 ; 0x64 + # or without Rd + #800362e: b082 sub sp, #8 + $re = qr/.*sub.*(?:sp, )?sp, #([0-9]+)/o; + } elsif ($arch eq 'blackfin') { + # 52: 00 e8 03 00 LINK 0xc; + $re = qr/.*LINK (0x$x{1,5});$/o; + } elsif ($arch =~ /^i[3456]86$/) { + #c0105234: 81 ec ac 05 00 00 sub $0x5ac,%esp + $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%esp$/o; + } elsif ($arch eq 'x86_64') { + # 2f60: 48 81 ec e8 05 00 00 sub $0x5e8,%rsp + $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%rsp$/o; + } elsif ($arch eq 'ia64') { + #e0000000044011fc: 01 0f fc 8c adds r12=-384,r12 + $re = qr/.*adds.*r12=-(([0-9]{2}|[3-9])[0-9]{2}),r12/o; + } elsif ($arch eq 'm68k') { + # 2b6c: 4e56 fb70 linkw %fp,#-1168 + # 1df770: defc ffe4 addaw #-28,%sp + $re = qr/.*(?:linkw %fp,|addaw )#-([0-9]{1,4})(?:,%sp)?$/o; + } elsif ($arch eq 'mips64') { + #8800402c: 67bdfff0 daddiu sp,sp,-16 + $re = qr/.*daddiu.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o; + } elsif ($arch eq 'mips') { + #88003254: 27bdffe0 addiu sp,sp,-32 + $re = qr/.*addiu.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o; + } elsif ($arch eq 'ppc') { + #c00029f4: 94 21 ff 30 stwu r1,-208(r1) + $re = qr/.*stwu.*r1,-($x{1,8})\(r1\)/o; + } elsif ($arch eq 'ppc64') { + #XXX + $re = qr/.*stdu.*r1,-($x{1,8})\(r1\)/o; + } elsif ($arch eq 'powerpc') { + $re = qr/.*st[dw]u.*r1,-($x{1,8})\(r1\)/o; + } elsif ($arch =~ /^s390x?$/) { + # 11160: a7 fb ff 60 aghi %r15,-160 + $re = qr/.*ag?hi.*\%r15,-(([0-9]{2}|[3-9])[0-9]{2})/o; + } elsif ($arch =~ /^sh64$/) { + #XXX: we only check for the immediate case presently, + # though we will want to check for the movi/sub + # pair for larger users. -- PFM. + #a00048e0: d4fc40f0 addi.l r15,-240,r15 + $re = qr/.*addi\.l.*r15,-(([0-9]{2}|[3-9])[0-9]{2}),r15/o; + } else { + print("wrong or unknown architecture\n"); + exit + } +} + +sub bysize($) { + my ($asize, $bsize); + ($asize = $a) =~ s/.*: *(.*)$/$1/; + ($bsize = $b) =~ s/.*: *(.*)$/$1/; + $bsize <=> $asize +} + +# +# main() +# +my $funcre = qr/^$x* <(.*)>:$/; +my $func; +my $file, $lastslash; + +while (my $line = ) { + if ($line =~ m/$funcre/) { + $func = $1; + } + elsif ($line =~ m/(.*):\s*file format/) { + $file = $1; + $file =~ s/\.ko//; + $lastslash = rindex($file, "/"); + if ($lastslash != -1) { + $file = substr($file, $lastslash + 1); + } + } + elsif ($line =~ m/$re/) { + my $size = $1; + $size = hex($size) if ($size =~ /^0x/); + + if ($size > 0xf0000000) { + $size = - $size; + $size += 0x80000000; + $size += 0x80000000; + } + next if ($size > 0x10000000); + + next if $line !~ m/^($xs*)/; + my $addr = $1; + $addr =~ s/ /0/g; + $addr = "0x$addr"; + + # bbox: was: my $intro = "$addr $func [$file]:"; + my $intro = "$func [$file]:"; + my $padlen = 56 - length($intro); + while ($padlen > 0) { + $intro .= ' '; + $padlen -= 8; + } + next if ($size < $thresh); + push @stack, "$intro$size\n"; + } +} + +print sort bysize @stack; diff --git a/cmake/nuttx/px4_impl_nuttx.cmake b/cmake/nuttx/px4_impl_nuttx.cmake index d9b943c743..71616272f1 100644 --- a/cmake/nuttx/px4_impl_nuttx.cmake +++ b/cmake/nuttx/px4_impl_nuttx.cmake @@ -573,10 +573,12 @@ function(px4_os_add_flags) set(added_c_flags -nodefaultlibs -nostdlib + -fstack-usage ) set(added_cxx_flags -nodefaultlibs -nostdlib + -fstack-usage ) set(added_optimization_flags)