Amended : Digital Unix 4.0 bug on DEC Alpha

Amended : Digital Unix 4.0 bug on DEC Alpha

Post by Kin Ming N » Tue, 31 Mar 1998 04:00:00



Sorry that I have attached an old version of the "handler.c" in my first
news, it has the "colouring" part missing.  The correct version is
attached in this amendment.

I share my PC and netscape with a friend.  The reply address in the
previous news is my friend's.  My address is victo...@cse.unsw.edu.au.
But messages sent to my friend will also be forwarded to me.  Sorry for
the confusion caused.

[ HANDLER.C 4K ]
#include <signal.h>
#include <pthread.h>
#include <ucontext.h>
#include <stdio.h>

ucontext_t mach_c1, mach_c2, mach_c3, mach_c4;
struct sigaction  s_action;
pthread_t tid;

void handler(int sig, siginfo_t *sigp, void *tcp)
{
        printf("Signal handler :\n");
        printf("===========================================================");
        printf("=============\n");
        printf("catches signal %d in thread 0x%lx\n",
                                         sig, pthread_self());
        printf("Pointer to context is pointing at 0x%lx\n\n\n", tcp);

        /* take machine context offered by signal handler */
        memcpy((void *)&mach_c2, (void *)((caddr_t)tcp), sizeof(ucontext_t));

        /* take machine context at an address */
        /* 48bytes lower than the signal handler suggests */
        memcpy((void *)&mach_c3, (void *)((caddr_t)tcp-0x30),
                                         sizeof(ucontext_t));

        /* take machine context obtained by getcontext() from within handler*/
        getcontext(&mach_c4);

}

void *target(void *p)
{
        long *t, i, j;
        j = 0x12344321;

        printf("Target thread :\n");
        printf("===========================================================");
        printf("=============\n");
        printf("Address of local variables :\n");
        printf("&t = 0x%lx\n&i = 0x%lx\n&j = 0x%lx\n\n\n", &t, &i, &j);

        /* colouring memory contents preceding j */
        t = &j;
        t--;
        for (i = 0; i < 100; i++)
          *t-- = 0xaaaaaaaaaaaaaa0 + i % 0x10;

        /* take machine context obtained by getcontext() */
        getcontext(&mach_c1);

        while (1) {
          i = j + j;
        }
        return;

}

main()
{
        long *t1, *t2, *t3, *t4;
        int i;

        /* setup signal handler */
        s_action.sa_sigaction = handler;
        s_action.sa_flags = SA_SIGINFO;
        sigaction(SIGUSR1, &s_action, NULL);

        /* create the target thread */
        pthread_create(&tid, NULL, target, (void *)NULL);

        printf("Some basic information\n");
        printf("===========================================================");
        printf("=============\n");
        printf("sizeof(ucontext_t) = 0x%x\n", sizeof(ucontext_t));
        printf("sizeof(sigset_t)   = 0x%x\n", sizeof(sigset_t));
        printf("sizeof(stack_t)    = 0x%x\n", sizeof(stack_t));
        printf("sizeof(mcontext_t) = 0x%x\n\n\n", sizeof(mcontext_t));

        /* allow target thread to list context by getcontext() in thread */
        sleep(5);

        /* send the target thread a SIGUSR1 */
        pthread_kill(tid, SIGUSR1);

        /* allow target thread to progress */
        sched_yield();

        sleep(5);

        printf("Listing of machine contexts taken :\n");
        printf("===========================================================");
        printf("=============\n");
        printf("    by                by                by                ");
        printf("by\n");
        printf("    getcontext()      handler           handler           ");
        printf("getcontext\n");
        printf("                                        with shift        ");
        printf("within handler\n\n");
        printf("-----------------------------------------------------------");
        printf("-------------\n");

        printf("__XSE(sc_pc) :\n");
        printf("    0x%-16lx0x%-16lx0x%-16lx0x%-16lx\n\n",
                                     mach_c1.uc_mcontext.__XSE(sc_pc),
                                     mach_c2.uc_mcontext.__XSE(sc_pc),
                                     mach_c3.uc_mcontext.__XSE(sc_pc),
                                     mach_c4.uc_mcontext.__XSE(sc_pc));

        printf("__XSE(sc_sbase) :\n");
        printf("    0x%-16lx0x%-16lx0x%-16lx0x%-16lx\n\n",
                                     mach_c1.uc_mcontext.__XSE(sc_sbase),
                                     mach_c2.uc_mcontext.__XSE(sc_sbase),
                                     mach_c3.uc_mcontext.__XSE(sc_sbase),
                                     mach_c4.uc_mcontext.__XSE(sc_sbase));

        printf("__XSE(sc_regs)[30] :\n");
        printf("    0x%-16lx0x%-16lx0x%-16lx0x%-16lx\n\n",
                             mach_c1.uc_mcontext.__XSE(sc_regs)[30],
                             mach_c2.uc_mcontext.__XSE(sc_regs)[30],
                             mach_c3.uc_mcontext.__XSE(sc_regs)[30],
                             mach_c4.uc_mcontext.__XSE(sc_regs)[30]);

        t1 = (long *)&(mach_c1);
        t2 = (long *)&(mach_c2);
        t3 = (long *)&(mach_c3);
        t4 = (long *)&(mach_c4);

        printf("The ucontext_t data stuctures listed as array of long\n");
        for (i = 0; i < (sizeof(ucontext_t) / 8); i++)
          printf("%2d. 0x%-16lx0x%-16lx0x%-16lx0x%-16lx\n" ,
                             i, *t1++, *t2++, *t3++, *t4++);
        printf("End of listing\n");

}

[ DISCUSS.TXT 19K ]
According to the on-line help manual on the DEC Alpha machines running Digital Unix 4.0, as one setup option, when a signal handler is triggered, the 3rd argument passed by the system to it will be pointing at an object of type ucontext_t, referring to the receiving process' context that was interrupted when the signal is delivered.  It is found that the object being pointed to is of type mcontext_t, a member of the ucontext_t mentioned above, rather than ucontext_t itself.  This discrepancy can result in misinterpretation on the content of the object being pointed to.  For example, the address of the stack base shows up in the place expected to be occupied by the program counter.  

1       The 3-argument-option of the signal handler function
The sigaction function can be used to set up a signal handler function to react upon the delivery of a signal.  A sigaction structure, filled with specifications for the manner in which the signal handler behaves, is to be passed to the sigaction function as the second argument.  If the sa_flags member of the sigaction structure has the SIGINFO bit being cleared, the signal handler function triggered on the delivery of a signal will be passed one argument from the operating system.  If the SIGINFO bit is set, the signal handler triggered will be passed three arguments instead.  The third argument will be a pointer to an object of type ucontext_t [man page : sigaction].  This object is to be used by the kernel to restore state following execution of the signal handler.  It is also made available to the signal handler to allow it to properly restore state if a non-standard exit is performed [/usr/include/machine/context.h on Digital Unix 4.0].
Execution contexts captured in the form of a ucontext_t type object are also available by a call to the getcontext function.
Accesses to execution contexts via the 3rd argument of a signal handler or the getcontext function are both used to implement checkpointing and rollback recovery of an application thread in Libra.

2       The ucontext_t structure
The ucontext_t structure, used to hold execution context, is defined in the header file /usr/include/sys/context_t.h, as:
typedef struct  ucontext {
        unsigned long           uc_flags;
        struct ucontext         *uc_link;
        sigset_t                uc_sigmask;
        stack_t                 uc_stack;
        mcontext_t              uc_mcontext;
        long                    uc_filler[5];

} ucontext_t;

The mcontext_t structure, as a member of the ucontext_t structure, holds snapshot of CPU registers.  It is defined in /usr/include/sys/context_t.h, as a sigcontext structure.  While sigcontext structure is defined in /usr/include/machine/context.h.

3       Discrepancy between documented and observed
Investigation to the contents actually present at the location pointed to by the 3rd argument of the signal handler is triggered by the failure of rollbacking via a signal handler the execution context checkpointed via a call to the getcontext() function.
The program handler_3rd_argument_threaded_version.c best summarize investigation gone through.  Its complete listing can be found in the appendix.  The sequence of events in the program are listed below:
(a).    A signal handler with 3 arguments is set up to catch the signal SIGUSR1.
(b).    A thread target is created.  It prints the addresses of its local variables, including that of its last declared local variable j, in order to provide an indication of an upper bound on the stack space available for execution context storage,  because the 3rd argument of a signal handler is found in previous experiments to be pointing at somewhere in the stack of the target thread.  Furthermore, in order to make visible the system's action of execution context storing, which is going to be triggered in (c),  it colours its not yet used stack space with a special pattern: 0xaaaaaaaaaaaaaa0-f.  That is,  a trail of a's followed by a digit recurring from 0 through f.  Then it calls the getcontext() function and have the context saved in ucontext_t variable No. 1.  Then it enters into an infinite loop.
(c).    Then a signal SIGUSR1 is sent to thread target.
(d).    Thread target catches the signal, enters into the signal handler.  The address pointed to by the 3rd argument of the signal handler is printed.  This address and the address of the last declared local variable j, enclose a region available to the ucontext_t structure in question.  We are going to compare this size available to the size of a ucontext_t structure.  Then the signal handler copies the ucontext_t structure pointed to by the 3rd argument to ucontext_t variable No.2.
(e).    A copying similar to that done in (d) is preformed.  Only this time the region being copied is a 48 byte-shift to low address of that in (d), and the destination of copying is ucontext_t variable No.3.
(f).    The signal handler then calls getcontext function and have the context saved in ucontext_t variable No.4.
(g).    The signal handler returns.
(h).    A table of the contents of the ucontext_t variables is printed.
        The first column is for ucontext_t variable No.1, 2nd for No.2, and so on.
        The first three rows list the program counter, stack pointer of main thread, and stackpointer of the thread concerned, respectively.
        Then comes a listing of contents of entire ucontext_t variables, Every 8 bytes is printed as a long integer.  The boundaries separating colour remaining and colour overwritten will become visible in columns 2 and 3.
The following figure list the output of the program.
Some basic information
========================================================================
sizeof(ucontext_t) = 0x2e0
sizeof(sigset_t)   = 0x8
sizeof(stack_t)    = 0x18
sizeof(mcontext_t) = 0x288

Target thread :
========================================================================
Address of local variables :
&t = 0x140031a38
&i = 0x140031a30
&j = 0x140031a28

Signal handler :
========================================================================
catches signal 30 in thread 0x140006030
Pointer to context is pointing at 0x140031778

Listing of machine contexts taken :
========================================================================
    by                by                by                by
    getcontext()      handler           handler           getcontext
                                        with shift        within handler

------------------------------------------------------------------------
__XSE(sc_pc) :
    0x120001aa8       0x120000000       0x120001ab0       0x120001994      

__XSE(sc_sbase) :
    0x120000000       0x1               0x120000000       0x120000000      

__XSE(sc_regs)[30] :
    0x140031a18       0x0               0x140031a18       0x140031710      

The ucontext_t data stuctures listed as array of long
 0. 0x8               0x0               0xaaaaaaaaaaaaaab 0x8              
 1. 0x0               0x0               0xaaaaaaaaaaaaaaa 0x0              
 2. 0x0               0x120001ab0       0xaaaaaaaaaaaaaa9 0x20000000        
 3. 0x120000000       0x8               0xaaaaaaaaaaaaaa8 0x120000000      
 4. 0x0               0x0               0xaaaaaaaaaaaaaa7 0x0              
 5. 0x4000            0x3ffc00931b0     0xaaaaaaaaaaaaaa6 0x4000            
 6. 0x0               0x4000            0x0               0x0              
 7. 0x0               0x0               0x0               0x20000000        
 8. 0x120001aa8       0x120000000       0x120001ab0       0x120001994      
 9. 0x8               0x0               0x8               0x8              
10. 0x0               0x0               0x0               0x0              
11. 0x0               0x3ffc0080c50     0x3ffc00931b0     0x0              
12. 0x0               0x3ffc008e790     0x4000            0x0              
13. 0x0               0x24688642        0x0               0x0              
14. 0x0               0x140006030       0x120000000       0x0              
15. 0x0               0x0               0x0               0x0              
16. 0x0               0x1               0x0               0x0              
17. 0x0               0x3ffc0183400     0x3ffc0080c50     0x0              
18. 0x0               0x3ffc0189dc0     0x3ffc008e790     0x0              
19. 0x1400000d0       0x0               0x24688642        0x140031778      
20. 0x140006030       0x140000760       0x140006030       0x1e              
21. 0x0               0x140000790       0x0               0x140000010      
22. 0x1               0x1               0x1               0x1              
23. 0x3ffc0183400     0x0               0x3ffc0183400     0x3ffc0183400    
24. 0x3ffc0189dc0     0x0               0x3ffc0189dc0     0x3ffc0189dc0    
25. 0x0               0x1000000         0x0               0x0              
26. 0x0               0x0               0x140000760       0x0              
27. 0x0               0x3ff8010bb68     0x140000790       0x0              
28. 0x0               0x0               0x1               0x0              
29. 0x0               0x0               0x0               0x0              
30. 0x0               0x120001aa8       0x0               0x0              
31. 0x0               0x3ff80576a40     0x1000000         0x0              
32. 0x0               0x0               0x0               0x0              
33. 0x0               0x3ffc00931b0     0x3ff8010bb68     0x0              
34. 0x0               0x140031a18       0x0               0x0              
35. 0x0               0x0               0x0               0x0              
36. 0x120001aa8       0x1               0x120001aa8       0x120001994      
37. 0x0               0x0               0x3ff80576a40     0x0              
38. 0x0               0x0               0x0               0x0              
39. 0x1400085c0       0x0               0x3ffc00931b0     0x1400085c0      
40. 0x140031a18       0x0               0x140031a18       0x140031710      
41. 0xacedbade        0x0               0x0               0xacedbade        
42. 0x0               0x0               0x1               0x0              
43. 0x0               0x0               0x0               0x0              
44. 0x0               0x0               0x0               0x0              
45. 0x0               0x0               0x0               0x0              
46. 0x0               0x0               0x0               0x0              
47. 0x0               0x0               0x0               0x0              
48. 0x0               0x0               0x0               0x0              
49. 0x0               0x0               0x0               0x0              
50. 0x0               0x0               0x0               0x0              
51. 0x0               0x0               0x0               0x0              
52. 0x0               0x0               0x0               0x0              
53. 0x0               0x0               0x0               0x0              
54. 0x0               0x0               0x0               0x0              
55. 0x0               0x0               0x0               0x0              
56. 0x0               0x0               0x0               0x0              
57. 0x0               0x0               0x0               0x0              
58. 0x0               0x0               0x0               0x0              
59. 0x0               0x0               0x0               0x0              
60. 0x0               0x0               0x0               0x0              
61. 0x0               0x0               0x0               0x0              
62. 0x0               0x0               0x0               0x0              
63. 0x0               0x0               0x0               0x0              
64. 0x0               0x0               0x0               0x0              
65. 0x0               0x0               0x0               0x0              
66. 0x0               0x0               0x0               0x0              
67. 0x0               0x0               0x0               0x0              
68. 0x0               0x0               0x0               0x0              
69. 0x0               0x800000000000000 0x0               0x0              
70. 0x0               0x0               0x0               0x0              
71. 0x0               0x120001aa8       0x0               0x0              
72. 0x0               0x3ff80576a40     0x0               0x0              
73. 0x0               0x4000            0x0               0x0              
74. 0x0               0x120000000       0x0               0x0              
75. 0x0               0x0               0x800000000000000 0x0              
76. 0x0               0x0               0x0               0x0              
77. 0x0               0x0               0x120001aa8       0x0              
78. 0x0               0x140000760       0x3ff80576a40     0x0              
79. 0x4000            0x140000790       0x4000            0x4000            
80. 0x120000000       0x1               0x120000000       0x120000000      
81. 0x0               0x140000760       0x0               0x0              
82. 0x0               0x1400085c0       0x0               0x0              
83. 0x0               0xaaaaaaaaaaaaaa2 0x0               0x0              
84. 0x0               0xaaaaaaaaaaaaaa1 0x140000760       0x0              
85. 0x0               0xaaaaaaaaaaaaaa0 0x140000790       0x0              
86. 0x0               0x12344321        0x1               0x0              
87. 0x0               0x24688642        0x140000760       0x0              
88. 0x0               0x140031700       0x1400085c0       0x0              
89. 0x0               0x0               0xaaaaaaaaaaaaaa2 0x0              
90. 0x0               0x0               0xaaaaaaaaaaaaaa1 0x0              
91. 0x0               0x0               0xaaaaaaaaaaaaaa0 0x0              
End of listing

Figure 1        Output listing of program handler_3rd_argument_threaded_version.c

From the listing of output from the program, the following are observed:
1.      Not enough room for a ucontext_t object to fit in the place where it is documented to be in:
The 3rd argument of the signal handler is found to be pointing at an location on the stack of the target thread.
A local variable long int j, is occupying the location starting at 0x140031a28
The 3rd argument of the signal handler is pointing at address 0x140031778.
The two addresses differ by  0x140031a28 - 0x140031778 = 0x2b0.
The size of a ucontext_t structure is 0x2e0.
An entire ucontext_t object can not have been placed there, not even excluding its last five long integer fillers.
2.      The data in the suggested location do not look reasonable.
The 8th line of the listing, when the object is interpreted as a ucontext_t structure, represents the program counter.  For the object pointed to by the 3rd argument of the signal handler, the value is 0x120000000.  According to information obtained via getcontext(), that value is the address of the base of the main stack, and the program counter can not be pointing at that address.
3.      Things look fine if column 2 is shifted down by 48 bytes.
For the lines listed, columns 1, 3 and 4 are in line. Just to mention a few as lines 9, 23, 24, not to mention so many cohering zeros.  Column 3 is definitely a better match to columns 1 and 4 than column 2 is.
4.      The trace of a smaller object is found in that place.
Only 0x298 bytes of the coloured region are overwritten.  According to the colours remaining in the first 6 long integers in the 3rd column,  overwriting is found to start exactly from the location being pointed to by the 3rd argument of the signal handler, not a single byte before.  On the other end, the last 72 bytes of the supposed to be ucontext_t structure in the 2nd column is untouched.  Amongst these 72 bytes, the 24 bytes adjacent to the overwritten regions keep their colour.  The bytes next to them are local variables of thread target.  In the program, the thread's local variable j keeps a constant value 0x12344321 throughout the execution, and i is feed with j doubled.  Both values are untouched and appear at the end of the listing of ucontext_t variable No.2, as line 86 and 87 respectively.
5       When the 3rd argument of the signal handler is used as a pointer to an mcontext_t structure, rollbacking via a signal handler the execution context checkpointed via a call to the getcontext function works fine.

4       Interpretation
A tentative interpretation of what has been observed so far is that the 3rd argument of the signal handler in Digital Unix 4.0 is pointing at a mcontext_t structure rather than a ucontext_t structure.
With the size of an mcontext_t structure being 0x288 bytes, this interpretation does not upset the upper bound established by observation 1 for the size of the object placed there.

This interpretation is compatible with observation 4 in the sense that the entire mcontext_t object has been written there, together with 16 bytes of other data with unknown identity.

The 5th member of a ucontext_t structure is an mcontext_t structure.  The sum of the sizes of the 4 preceding members is 8 + 8 + 8 +24 = 48 bytes.  For the 3rd column, by starting copying 48 bytes early, the mcontext_t structure expected by this interpretation is copied exactly to the mcontext_t member of the ucontext_t variable 3,  so ucontext_t variable 3 offers good match to ucontext_t variable 1 and 4, which are obtained reliably via getcontext() function.  Hence this interpretation is compatible to observation 3.

An mcontext_t structure put in the place of a ucontext_t structure does not look right.  Hence this interpretation is compatible to observation 2.
5       Conclusion
It is clear from both content validity and size considerations that the 3rd argument of a signal handler in Digital Unix 4.0 is not pointing to a ucontext_t structure.  However, the hypothesis that it is pointing to an mcontext_t structure is not supported with like strength.  The interpretations above corresponds to a solution.  Its validity lends support from both matching structural patterns and the fact that it works on a handful of benchmark applications used to test a software library I am working on in my study.

 
 
 

1. Digital Unix 4.0 bug on DEC Alpha ?

You are getting the sigcontext structure (which is a typedef for
mcontext_t.) This is default behaviour for Digital Unix. If you
want the ucontext_t structure you must use the XPG4-UNIX standard
which is achieved by compiling with -std1 -D_XOPEN_SOURCE_EXTENDED
(see 'man standards' and 'man sigaction' for further info.)

... which is completely useless to us, because MSWord is a propriatery
format and we unixers have no viewer to read it (nor do we want such
a viewer.) Plain text is preferred, but if you insist on nice formatting
then Postscript is acceptable.

2. PHP3 : preg_replace question : Help ! please !

3. Development Tools for Dec Alpha/Digital Unix 4.0?

4. Question about NFS!

5. Elm for DEC Alpha DEC Unix

6. Choosing laptop (...Linux, neoMagic)

7. Digital Alpha DEC Alphastation 500/266

8. How printing A3 size HP printer at SunOS ?

9. sna (lu6.2) data transfer between IBM mainframe and Digital DEC/ALPHA

10. Digital Alpha OSF1 4.0 and Loopback interface

11. Red Hat Linux 4.0 for DEC Alpha FS

12. Help: Install Redhat 4.0 on DEC Alpha Multia Box

13. AMD running on OSF/1 (dec/alpha) version 3.2 run on OSF/1 4.0?