This is a resync of the last patch for 2.5.34 that resulted from this discussion
(not the original patch):
http://lwn.net/Articles/1642/
The only change was readding the reference to task_info in the beginning of
common_interrupt. It had been dropped when we stopped messing with
preempt_count there.
I've beaten this thing with my normal array of Specweb tests and it is behaving
so far. I've booted on an 8-way with and without SMP.
part of Ben's original message:
> plus a seperate 4KB irq stack for use by interrupts. There are a couple
> of reasons for doing this: 4KB stacks put less pressure on the VM
> subsystem, reduces the overall memory usage for systems with large
> numbers of tasks, and increases the reliability of the system when
> under heavy irq load by provide a fixed stack size for interrupt
> handlers that other kernel code will not eat into.
> The interrupt stacks are stackable, so we could use multiple
> 4KB irq stacks. The thread_info structure is included in each
> interrupt stack, and has the current pointer copied into it upon
> entry.
Dave Hansen
haveb...@us.ibm.com
[
irqstack-2.5.34-1.patch 15K ]
# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
# ChangeSet 1.624 -> 1.626
# arch/i386/kernel/process.c 1.40 -> 1.41
# arch/i386/kernel/irq.c 1.18 -> 1.19
# arch/i386/kernel/head.S 1.15 -> 1.16
# include/asm-i386/thread_info.h 1.7 -> 1.8
# include/asm-i386/page.h 1.16 -> 1.17
# arch/i386/kernel/entry.S 1.41 -> 1.43
# arch/i386/config.in 1.47 -> 1.48
# arch/i386/Makefile 1.17 -> 1.18
# arch/i386/kernel/i386_ksyms.c 1.30 -> 1.31
# arch/i386/kernel/smpboot.c 1.33 -> 1.34
# arch/i386/boot/compressed/misc.c 1.7 -> 1.8
# arch/i386/kernel/init_task.c 1.6 -> 1.7
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 02/09/09 haveblue@elm3b96.(none) 1.625
# Import patch v2.5.20-stack-A2.diff
# --------------------------------------------
# 02/09/11 haveblue@elm3b96.(none) 1.626
# don't fetch things out of ebx when it has garbage in it :(
# --------------------------------------------
#
diff -Nru a/arch/i386/Makefile b/arch/i386/Makefile
--- a/arch/i386/Makefile Wed Sep 11 15:30:18 2002
+++ b/arch/i386/Makefile Wed Sep 11 15:30:18 2002
@@ -85,6 +85,10 @@
CFLAGS += -march=i586
endif
+ifdef CONFIG_X86_STACK_CHECK
+CFLAGS += -p
+endif
+
HEAD := arch/i386/kernel/head.o arch/i386/kernel/init_task.o
SUBDIRS += arch/i386/kernel arch/i386/mm arch/i386/lib
diff -Nru a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
--- a/arch/i386/boot/compressed/misc.c Wed Sep 11 15:30:18 2002
+++ b/arch/i386/boot/compressed/misc.c Wed Sep 11 15:30:18 2002
@@ -377,3 +377,7 @@
if (high_loaded) close_output_buffer_if_we_run_high(mv);
return high_loaded;
}
+
+/* We don't actually check for stack overflows this early. */
+__asm__(".globl mcount ; mcount: ret\n");
+
diff -Nru a/arch/i386/config.in b/arch/i386/config.in
--- a/arch/i386/config.in Wed Sep 11 15:30:18 2002
+++ b/arch/i386/config.in Wed Sep 11 15:30:18 2002
@@ -35,6 +35,7 @@
#
# Define implied options from the CPU selection here
#
+define_bool CONFIG_X86_HAVE_CMOV n
if [ "$CONFIG_M386" = "y" ]; then
define_bool CONFIG_X86_CMPXCHG n
@@ -91,18 +92,21 @@
define_bool CONFIG_X86_GOOD_APIC y
define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
define_bool CONFIG_X86_PPRO_FENCE y
+ define_bool CONFIG_X86_HAVE_CMOV y
fi
if [ "$CONFIG_MPENTIUMIII" = "y" ]; then
define_int CONFIG_X86_L1_CACHE_SHIFT 5
define_bool CONFIG_X86_TSC y
define_bool CONFIG_X86_GOOD_APIC y
define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
+ define_bool CONFIG_X86_HAVE_CMOV y
fi
if [ "$CONFIG_MPENTIUM4" = "y" ]; then
define_int CONFIG_X86_L1_CACHE_SHIFT 7
define_bool CONFIG_X86_TSC y
define_bool CONFIG_X86_GOOD_APIC y
define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
+ define_bool CONFIG_X86_HAVE_CMOV y
fi
if [ "$CONFIG_MK6" = "y" ]; then
define_int CONFIG_X86_L1_CACHE_SHIFT 5
@@ -116,6 +120,7 @@
define_bool CONFIG_X86_GOOD_APIC y
define_bool CONFIG_X86_USE_3DNOW y
define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
+ define_bool CONFIG_X86_HAVE_CMOV y
fi
if [ "$CONFIG_MELAN" = "y" ]; then
define_int CONFIG_X86_L1_CACHE_SHIFT 4
@@ -132,6 +137,7 @@
if [ "$CONFIG_MCRUSOE" = "y" ]; then
define_int CONFIG_X86_L1_CACHE_SHIFT 5
define_bool CONFIG_X86_TSC y
+ define_bool CONFIG_X86_HAVE_CMOV y
fi
if [ "$CONFIG_MWINCHIPC6" = "y" ]; then
define_int CONFIG_X86_L1_CACHE_SHIFT 5
@@ -429,6 +435,7 @@
if [ "$CONFIG_HIGHMEM" = "y" ]; then
bool ' Highmem debugging' CONFIG_DEBUG_HIGHMEM
fi
+ bool ' Check for stack overflows' CONFIG_X86_STACK_CHECK
fi
endmenu
diff -Nru a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
--- a/arch/i386/kernel/entry.S Wed Sep 11 15:30:18 2002
+++ b/arch/i386/kernel/entry.S Wed Sep 11 15:30:18 2002
@@ -136,7 +136,7 @@
movl %ecx,CS(%esp) #
movl %esp, %ebx
pushl %ebx
- andl $-8192, %ebx # GET_THREAD_INFO
+ GET_THREAD_INFO_WITH_ESP(%ebx)
movl TI_EXEC_DOMAIN(%ebx), %edx # Get the execution domain
movl 4(%edx), %edx # Get the lcall7 handler for the domain
pushl $0x7
@@ -158,7 +158,7 @@
movl %ecx,CS(%esp) #
movl %esp, %ebx
pushl %ebx
- andl $-8192, %ebx # GET_THREAD_INFO
+ GET_THREAD_INFO_WITH_ESP(%ebx)
movl TI_EXEC_DOMAIN(%ebx), %edx # Get the execution domain
movl 4(%edx), %edx # Get the lcall7 handler for the domain
pushl $0x27
@@ -334,7 +334,39 @@
ALIGN
common_interrupt:
SAVE_ALL
+
+ GET_THREAD_INFO(%ebx)
+ movl TI_IRQ_STACK(%ebx),%ecx
+ movl TI_TASK(%ebx),%edx
+ movl %esp,%eax
+ leal (THREAD_SIZE-4)(%ecx),%esi
+ testl %ecx,%ecx # is there a valid irq_stack?
+
+ # switch to the irq stack
+#ifdef CONFIG_X86_HAVE_CMOV
+ cmovnz %esi,%esp
+#else
+ jnz 1f
+ mov %esi,%esp
+1:
+#endif
+
+ # update the task pointer in the irq stack
+ GET_THREAD_INFO(%esi)
+ movl %edx,TI_TASK(%esi)
+
call do_IRQ
+
+ movl %eax,%esp # potentially restore non-irq stack
+
+ # copy flags from the irq stack back into the task's thread_info
+ # %esi is saved over the do_IRQ call and contains the irq stack
+ # thread_info pointer
+ # %ebx contains the original thread_info pointer
+ movl TI_FLAGS(%esi),%eax
+ movl $0,TI_FLAGS(%esi)
+ LOCK orl %eax,TI_FLAGS(%ebx)
+
jmp ret_from_intr
#define BUILD_INTERRUPT(name, nr) \
@@ -506,6 +538,61 @@
pushl $0
pushl $do_spurious_interrupt_bug
jmp error_code
+
+#ifdef CONFIG_X86_STACK_CHECK
+.data
+ .globl stack_overflowed
+stack_overflowed:
+ .long 0
+
+.text
+
+ENTRY(mcount)
+ push %eax
+ movl $(THREAD_SIZE - 1),%eax
+ andl %esp,%eax
+ cmpl $0x200,%eax /* 512 byte danger zone */
+ jle 1f
+2:
+ popl %eax
+ ret
+1:
+ lock; btsl $0,stack_overflowed /* Prevent reentry via printk */
+ jc 2b
+
+ # switch to overflow stack
+ movl %esp,%eax
+ movl $(stack_overflow_stack + THREAD_SIZE - 4),%esp
+
+ pushf
+ cli
+ pushl %eax
+
+ # push eip then esp of error for stack_overflow_panic
+ pushl 4(%eax)
+ pushl %eax
+
+ # update the task pointer and cpu in the overflow stack's thread_info.
+ GET_THREAD_INFO_WITH_ESP(%eax)
+ movl TI_TASK(%eax),%ebx
+ movl %ebx,stack_overflow_stack+TI_TASK
+ movl TI_CPU(%eax),%ebx
+ movl %ebx,stack_overflow_stack+TI_CPU
+
+ # never neverland
+ call stack_overflow_panic
+
+ addl $8,%esp
+
+ popf
+ popl %eax
+ movl %eax,%esp
+ popl %eax
+ movl $0,stack_overflowed
+ ret
+
+#warning stack check enabled
+#endif
.data
ENTRY(sys_call_table)
diff -Nru a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
--- a/arch/i386/kernel/head.S Wed Sep 11 15:30:18 2002
+++ b/arch/i386/kernel/head.S Wed Sep 11 15:30:18 2002
@@ -15,6 +15,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/desc.h>
+#include <asm/thread_info.h>
#define OLD_CL_MAGIC_ADDR 0x90020
#define OLD_CL_MAGIC 0xA33F
@@ -305,7 +306,7 @@
ret
ENTRY(stack_start)
- .long init_thread_union+8192
+ .long init_thread_union+THREAD_SIZE
.long __KERNEL_DS
/* This is the default interrupt "handler" :-) */
diff -Nru a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
--- a/arch/i386/kernel/i386_ksyms.c Wed Sep 11 15:30:18 2002
+++ b/arch/i386/kernel/i386_ksyms.c Wed Sep 11 15:30:18 2002
@@ -172,3 +172,8 @@
EXPORT_SYMBOL(is_sony_vaio_laptop);
EXPORT_SYMBOL(__PAGE_KERNEL);
+
+#ifdef CONFIG_X86_STACK_CHECK
+extern void mcount(void);
+EXPORT_SYMBOL(mcount);
+#endif
diff -Nru a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c
--- a/arch/i386/kernel/init_task.c Wed Sep 11 15:30:18 2002
+++ b/arch/i386/kernel/init_task.c Wed Sep 11 15:30:18 2002
@@ -13,6 +13,14 @@
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
struct mm_struct init_mm = INIT_MM(init_mm);
+union thread_union init_irq_union
+ __attribute__((__section__(".data.init_task")));
+
+#ifdef CONFIG_X86_STACK_CHECK
+union thread_union stack_overflow_stack
+ __attribute__((__section__(".data.init_task")));
+#endif
+
/*
* Initial thread structure.
*
@@ -22,7 +30,15 @@
*/
union thread_union init_thread_union
__attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+ { {
+ task: &init_task,
+ exec_domain: &default_exec_domain,
+ flags: 0,
+ cpu: 0,
+ addr_limit: KERNEL_DS,
+ irq_stack: &init_irq_union,
+ } };
+
/*
* Initial task structure.
diff -Nru a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
--- a/arch/i386/kernel/irq.c Wed Sep 11 15:30:18 2002
+++ b/arch/i386/kernel/irq.c Wed Sep 11 15:30:18 2002
@@ -311,7 +311,8 @@
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
-asmlinkage unsigned int do_IRQ(struct pt_regs regs)
+struct pt_regs *do_IRQ(struct pt_regs *regs) __attribute__((regparm(1)));
+struct pt_regs *do_IRQ(struct pt_regs *regs)
{
/*
* We ack quickly, we don't want the irq controller
@@ -323,7 +324,7 @@
* 0 return value means that this irq is already being
* handled by some other CPU. (or is disabled)
*/
- int irq = regs.orig_eax & 0xff; /* high bits used in ret_from_ code */
+ int irq = regs->orig_eax & 0xff; /* high bits used in ret_from_ code */
int cpu = smp_processor_id();
irq_desc_t *desc = irq_desc + irq;
struct irqaction * action;
@@ -373,7 +374,7 @@
*/
for (;;) {
spin_unlock(&desc->lock);
- handle_IRQ_event(irq, ®s, action);
+ handle_IRQ_event(irq, regs, action);
spin_lock(&desc->lock);
if (likely(!(desc->status & IRQ_PENDING)))
@@ -392,7 +393,7 @@
irq_exit();
- return 1;
+ return regs;
}
/**
diff -Nru a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
--- a/arch/i386/kernel/process.c Wed Sep 11 15:30:18 2002
+++ b/arch/i386/kernel/process.c Wed Sep 11 15:30:18 2002
@@ -438,6 +438,16 @@
extern void show_trace(unsigned long* esp);
+#ifdef CONFIG_X86_STACK_CHECK
+void stack_overflow_panic(void *esp, void *eip)
+{
+ printk("stack overflow from %p. esp:
...
read more »