Dear developer colleagues,
I have been asked to post the code fragment that I use to generate a
stack backtrace for SIGSEGV etc. - here it is.
The need to write such code arose because we do our own multithreading,
and the debuggers for AIX (supplied by IBM) would fail when analyzing
such a 'core' file ('adb' would even write its own core, destroying
what it should analyze). So we avoided using the debuggers and wrote
the addresses of the backtrace into a file before really crashing.
It was important that it works on a customer machine with just
standard AIX, no additional software!
Below, you will find three parts:
1) a C code fragment that follows the chain of stack frames backwards
from the point of failure towards 'main', it writes each address
as a hex value to a diagnostic file;
2) a piece of assembler code that helps to check the validity of the
start point of that chain (the first address must be numerically
greater than the current stack address);
3) some shell skript code that analyses the file written by the C code
and merges those (numeric) addresses with the namelist.
I will try to answer any emerging questions as best I can.
Joerg Bruehe, SQL Datenbanksysteme, Berlin (Germany)
email: jo...@sql.de
=-=-=-=-= cut here =-=-=-=-=-= the C code following the chain: =-=-=-=-=
/* imported functions */
#if _IBMR2
extern struct STACK_TYPE * e71a_own_frame ();
#endif
/* local functions */
#ifdef _IBMR2
static SIGNALFUNCTYPE e81_core_handler ();
#endif
/* ===== */
int main ( argc , argv )
int argc ;
char ** argv ;
{
#ifdef _IBMR2
struct sigaction force_full_dump ;
#endif
/*
* Initalize signal handling.
* These should not stop the program.
*/
(void) signal ( SIGHUP , SIG_IGN );
(void) signal ( SIGINT , SIG_IGN );
#ifndef LINUX
(void) signal ( SIGSYS , SIG_IGN );
#endif
(void) signal ( SIGPIPE , SIG_IGN );
#ifdef SIGWINCH
(void) signal ( SIGWINCH , SIG_IGN );
#endif
/*
* These should crash the program.
*/
(void) signal ( SIGFPE , SIG_DFL );
(void) signal ( SIGBUS , SIG_DFL );
(void) signal ( SIGSEGV , SIG_DFL );
#ifdef _IBMR2
force_full_dump.sa_handler = e81_core_handler ;
SIGINITSET ( force_full_dump.sa_mask );
force_full_dump.sa_flags = SA_FULLDUMP | SA_OLDSTYLE ;
/* 'OLDSTYLE' is needed so that the handler may kill the process.
*/
(void) sigaction ( SIGILL , &force_full_dump , NULL );
(void) sigaction ( SIGFPE , &force_full_dump , NULL );
(void) sigaction ( SIGBUS , &force_full_dump , NULL );
(void) sigaction ( SIGSEGV , &force_full_dump , NULL );
#endif
/*
* Code deleted ... Here follows the real work of the program ...
*/
.....
/*
* This function can be used to write the stack backtrace of a dieing
process
* into the diag file to evade debugger incompetencies (needed on
RS/6000)
* and to do any other operations while the process is still alive,
* e.g. to move to another directory to save the core file (inactive).
*/
#ifdef _IBMR2
static SIGNALFUNCTYPE e81_core_handler ( sig , dummy , SCP )
int sig , dummy ;
struct sigcontext * SCP ;
{
struct STACK_TYPE *that_frame ;
struct STACK_TYPE *prev_frame ;
printf ( "e81_core_handler: ABORTING due to signal %d \n", sig );
prev_frame = e71a_own_frame () ;
that_frame = (struct STACK_TYPE *) SCP->sc_jmpbuf.jmp_context.gpr [
1 ] ;
printf ( "e81_core_handler: current inst.addr.reg 0x%08lx\n",
SCP->sc_jmpbuf.jmp_context.iar ); /* 'iar' saved */
while ( that_frame > prev_frame ) /* stack grows from high to low
addr */
{
printf ( "e81_core_handler: called from code addr 0x%08lx\n",
that_frame->lr ); /* 'link register' saved */
prev_frame = that_frame ;
that_frame = (struct STACK_TYPE *) that_frame->sp /* previous level */
;
}
/*
* Signal is reset to SIG_DFL on entry of this function
('SA_OLDSTYLE').
* The following reissues the signal and creates a core.
* SIGIOT should force a core if 'sig' is ignored (_IBMR2),
* SIGKILL is a last resort to ensure termination.
*/
(void) kill ( getpid() , sig );
(void) kill ( getpid() , SIGIOT );
(void) kill ( getpid() , SIGKILL );
pause ();
/*NOTREACHED*/
...
=-=-=-=-= cut here =-=-=-=-=-= the assembler code giving the start
point: =-=-=-=-=
# This is the routine to return the own stack pointer as a result.
.globl .e71a_own_frame[pr]
.csect .e71a_own_frame[pr]
# routine is extremely primitive: no stack frame etc.
.e71a_own_frame:
# first, clear register (GPR) 3 (to be sure)
cal 3,0(0)
# now, add GPR1 (= the stack pointer) to this (empty) reg.
a 3,1,3
# finally, return (GPR 3 is the result register, see Assembler docu)
br
=-=-=-=-= cut here =-=-=-=-=-= the shell code merging with the namelist:
=-=-=-=-=
# Here comes part of the shell skript to analyze that diagnostic file:
# PROG the filename of the program that crashed
# DIAGFIL the file that got the 'printf' in the C "core handler"
routine
# TMP, TMP1, TMP2 temporary files
# OUT the analysis output file
###
_check_namelist_valid ()
# sets VALID to 1 if a valid namelist was found, to 0 otherwise
{
SUM=`sum $PROG`
NAMELIST=`basename $PROG`.nm
VALID=0
if [ -f $NAMELIST ]
then # we have a name list file for $PROG - is it valid ?
FILSUM=`head -1 $NAMELIST | cut -c32-`
if [ "$FILSUM" = "$SUM" ]
then
VALID=1
fi
fi
_make_valid_namelist ()
# namelist missing or invalid, create new, attention: checksum format !
{
echo "0x00000000|... UNIX checksum: $SUM" > $NAMELIST
echo "0x00000000|... `ls -l $PROG`" >> $NAMELIST
if [ ! -w $NAMELIST ]
then
echo "$IAM : Cannot write namelist file '$NAMELIST' - ABORT"
exit 9
fi
case "`uname -srv`" in
"AIX 2 3" )
nm -vxT $PROG | fgrep '.text' | cut -c1-32 > $TMP
cut -c22-32 $TMP > $TMP1
cut -c1-20 $TMP > $TMP2
paste -d'\0' $TMP1 $TMP2 | uniq >> $NAMELIST
rm -f $TMP $TMP1 $TMP2 ;;
"AIX 1 4" | "AIX 2 4" )
nm -vxT $PROG | grep ' [tT] ' | cut -c2-41 > $TMP
cut -c23-32 $TMP | sed 's/0x1/0x0/' > $TMP1
cut -c1-20,34- $TMP > $TMP2
paste -d'|' $TMP1 $TMP2 | uniq >> $NAMELIST
rm -f $TMP $TMP1 $TMP2 ;;
esac
_generate_backtrace ()
{
fgrep 'e81_core_handler' $DIAGFIL | fgrep ' 0x' | sed 's/^.* 0x/0x/' >
$TMP
for ADDR in `head -35 $TMP`
do
echo ' ' >> $OUT
case "$ADDR" in
0x10* ) # an address in the text segment - work on it
ADDR_NM=`echo $ADDR | sed 's/0x1/0x0/'`
echo "$ADDR_NM|ADDRESS IN BACKTRACE" > $TMP1
sort $NAMELIST $TMP1 > $TMP2
ed - $TMP2 >> $OUT << +
H
/ADDRESS IN BACKTRACE/-3,/ADDRESS IN BACKTRACE/+2p
q
+
;;
* ) # obviously invalid
echo ' (not in text segment, invalid stack)' >> $OUT
;;
esac
done
echo ' ' >> $OUT
=-=-=-=-= cut here =-=-=-=-=-= end of code fragments =-=-=-=-=