x86 boot enhancements, Clean up the 32bit entry points 6/11

x86 boot enhancements, Clean up the 32bit entry points 6/11

Post by Eric W. Biederm » Fri, 19 Apr 2002 02:10:13



Linus please apply,

In general allow any kernel entry point to work given any set of
initial register values, while saving the original registers
values so the C code can do something with it if we desire.

- trampoline.S fix comments, and enter the kernel at
  secondary_startup_32 instead of startup_32
- trampoline.S fix gdt_48 to have the correct gdt limit
- Save all of the registers we get from any 32bit entry point,
  and don't assume they have any particular value.
- head.S split up startup_32
  - secondary_startup_32 handles the SMP case
  - move finding the command line to startup.c
  - Don't copy the kernel parameters to the initial_zero_page,
    instead just pass setup.c where they are located.
- Seperate the segments used by setup.S from the rest of the kernel.
  This way bootloader can continue to make assumptions about
  which segments setup.S uses while the rest of the kernel
  can do whatever is convinient.
- Move boot specific defines into boot.h

Eric

diff -uNr linux-2.5.8.boot.heap/arch/i386/boot/compressed/head.S linux-2.5.8.boot.clean_32bit_entries/arch/i386/boot/compressed/head.S
--- linux-2.5.8.boot.heap/arch/i386/boot/compressed/head.S      Wed Jul  5 13:03:12 2000
+++ linux-2.5.8.boot.clean_32bit_entries/arch/i386/boot/compressed/head.S       Wed Apr 17 00:37:46 2002
@@ -25,24 +25,30 @@

 #include <linux/linkage.h>
 #include <asm/segment.h>
+#include <asm/boot.h>

        .globl startup_32

 startup_32:
        cld
        cli
-       movl $(__KERNEL_DS),%eax
-       movl %eax,%ds
-       movl %eax,%es
-       movl %eax,%fs
-       movl %eax,%gs

-       lss SYMBOL_NAME(stack_start),%esp
-       xorl %eax,%eax
-1:     incl %eax               # check that A20 really IS enabled
-       movl %eax,0x000000      # loop forever if it isn't
-       cmpl %eax,0x100000
-       je 1b
+       /*
+        * Save the initial registers
+        */
+       movl %eax, eax
+       movl %ebx, ebx
+       movl %ecx, ecx
+       movl %edx, edx
+       movl %esi, esi
+       movl %edi, edi
+       movl %esp, esp
+       movl %ebp, ebp
+
+       /*
+        * Setup the stack
+        */
+       movl SYMBOL_NAME(stack_start), %esp

 /*
  * Initialize eflags.  Some BIOS's leave bits like NT set.  This would
@@ -66,16 +72,10 @@
  */
        subl $16,%esp   # place for structure on the stack
        movl %esp,%eax
-       pushl %esi      # real mode pointer as second arg
        pushl %eax      # address of structure as first arg
        call SYMBOL_NAME(decompress_kernel)
        orl  %eax,%eax
-       jnz  3f
-       popl %esi       # discard address
-       popl %esi       # real mode pointer
-       xorl %ebx,%ebx
-       ljmp $(__KERNEL_CS), $0x100000
-
+       jz out
 /*
  * We come here, if we were loaded high.
  * We need to move the move-in-place routine down to 0x1000
@@ -83,8 +83,21 @@
  * which we got from the stack.
  */
 3:
-       movl $move_routine_start,%esi
-       movl $0x1000,%edi
+       /* Relocate the move routine */
+       movl $move_routine_start,%esi   #src
+       movl %eax,%edi                  #dest
+       movl %eax, %ebp                 #saved dest
+       movl %edi, %eax
+       subl %esi, %eax                 # The relocation factor
+       addl %eax, reloc1
+       addl %eax, reloc2
+       addl %eax, reloc3
+       addl %eax, reloc4
+       addl %eax, reloc5
+       addl %eax, reloc6
+       addl %eax, reloc7
+       addl %eax, reloc8
+       addl %eax, reloc9
        movl $move_routine_end,%ecx
        subl %esi,%ecx
        addl $3,%ecx
@@ -93,20 +106,23 @@
        rep
        movsl

+       /* Load it's arguments and jump to the move routine */
        popl %esi       # discard the address
-       popl %ebx       # real mode pointer
        popl %esi       # low_buffer_start
        popl %ecx       # lcount
        popl %edx       # high_buffer_start
        popl %eax       # hcount
-       movl $0x100000,%edi
+       movl $HIGH_BASE,%edi
        cli             # make sure we don't get interrupted
-       ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
+       jmpl *%ebp

 /*
  * Routine (template) for moving the decompressed kernel in place,
- * if we were high loaded. This _must_ PIC-code !
+ * if we were high loaded. This _must_ be PIC-code !
+ * Or it must be anotated with lables so it can be manually relocated.
  */
+       .globl move_routine_start, move_routine_end
+       .balign 4
 move_routine_start:
        movl %ecx,%ebp
        shrl $2,%ecx
@@ -122,7 +138,35 @@
        shrl $2,%ecx
        rep
        movsl
-       movl %ebx,%esi  # Restore setup pointer
-       xorl %ebx,%ebx
-       ljmp $(__KERNEL_CS), $0x100000
+out:
+       .byte 0xa1              # movl eax,  %eax
+reloc1:        .long eax
+       .byte 0x8b, 0x1d        # movl ebx,  %ebx
+reloc2:        .long ebx
+       .byte 0x8b, 0x0d        # movl ecx,  %ecx
+reloc3:        .long ecx
+       .byte 0x8b, 0x15        # movl edx, %edx
+reloc4:        .long edx
+       .byte 0x8b, 0x35        # movl esi, %esi
+reloc5:        .long esi
+       .byte 0x8b, 0x3d        # movl edi, %edi
+reloc6:        .long edi
+       .byte 0x8b, 0x25        # movl esp, %esp
+reloc7:        .long esp
+       .byte 0x8b, 0x2d        # movl ebp, %ebp
+reloc8:        .long ebp
+       .byte 0xff, 0x25        # jmpl *(kernel_start)
+reloc9:        .long kernel_start
+       .balign 4
+ENTRY(initial_regs)
+eax:   .long 0x12345678 /* eax */
+ebx:   .long 0x12345678 /* ebx */
+ecx:   .long 0x12345678 /* ecx */
+edx:   .long 0x12345678 /* edx */
+esi:   .long 0x12345678 /* esi */
+edi:   .long 0x12345678 /* edi */
+esp:   .long 0x12345678 /* esp */
+ebp:   .long 0x12345678 /* ebp */
+kernel_start:
+       .long HIGH_BASE
 move_routine_end:
diff -uNr linux-2.5.8.boot.heap/arch/i386/boot/compressed/misc.c linux-2.5.8.boot.clean_32bit_entries/arch/i386/boot/compressed/misc.c
--- linux-2.5.8.boot.heap/arch/i386/boot/compressed/misc.c      Tue Apr 16 11:14:30 2002
+++ linux-2.5.8.boot.clean_32bit_entries/arch/i386/boot/compressed/misc.c       Wed Apr 17 00:37:46 2002
@@ -16,6 +16,7 @@
 #include <linux/apm_bios.h>
 #include <asm/e820.h>
 #include <asm/boot_param.h>
+#include <asm/boot.h>

 /*
  * gzip declarations
@@ -111,10 +112,11 @@
 static long free_mem_ptr = (long)&end;
 static long free_mem_end_ptr;

-#define INPLACE_MOVE_ROUTINE  0x1000
-#define LOW_BUFFER_START      0x2000
-#define LOW_BUFFER_MAX       0x90000
-#define HEAP_SIZE             0x3000
+/* Decompressor constants */
+#define HEAP_SIZE         0x003000
+static unsigned long move_routine;
+extern unsigned char move_routine_end[], move_routine_start[];
+
 static unsigned int low_buffer_end, low_buffer_size;
 static int high_loaded =0;
 static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
@@ -170,11 +172,15 @@
                vidmem[i] = ' ';
 }

+static int vid_initialized = 0;
 static void puts(const char *s)
 {
        int x,y,pos;
        char c;

+       if (!vid_initialized)
+               return;
+
        x = real_mode->screen.info.orig_x;
        y = real_mode->screen.info.orig_y;

@@ -209,6 +215,7 @@

 static void vid_puts_init(void)
 {
+       vid_initialized = 1;
        if (real_mode->screen.info.orig_video_mode == 7) {
                vidmem = (char *) 0xb0000;
                vidport = 0x3b4;
@@ -315,8 +322,10 @@

 struct {
        long * a;
-       short b;
-       } stack_start = { & user_stack [STACK_SIZE] , __KERNEL_DS };
+       } stack_start = { & user_stack [STACK_SIZE] };
+
+extern struct initial_regs32 initial_regs;
+extern __u32 kernel_start;

 static void setup_normal_output_buffer(void)
 {
@@ -333,11 +342,13 @@
 static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
 {
        high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
+       move_routine = LOW_BASE;
        if (mem_k < (4*1024))  error("Less than 4MB of memory.\n");
-       mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START;
-       low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
-         ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
-       low_buffer_size = low_buffer_end - LOW_BUFFER_START;
+       mv->low_buffer_start = output_data =
+               (char *)move_routine + (move_routine_end - move_routine_start);
+       low_buffer_end = ((unsigned int)real_mode > LOW_MAX
+         ? LOW_MAX : (unsigned int)real_mode) & ~0xfff;
+       low_buffer_size = low_buffer_end - (unsigned long)mv->low_buffer_start;
        high_loaded = 1;
        free_mem_end_ptr = (long)high_buffer_start;
        if ( (0x100000 + low_buffer_size) > ((ulg)high_buffer_start)) {
@@ -346,6 +357,7 @@
        }
        else mv->hcount = -1;
        mv->high_buffer_start = high_buffer_start;
+       if ((ulg)output_data >= low_buffer_end) output_data=high_buffer_start;
 }

 static void close_output_buffer_if_we_run_high(struct moveparams *mv)
@@ -361,21 +373,28 @@
 }

-asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode)
+asmlinkage unsigned long decompress_kernel(struct moveparams *mv)
 {
-       real_mode = rmode;
-
-       vid_puts_init();
+       /* If we don't know better assume we can't use any
+        * real mode memory, and we have enough protected mode memory.
+        */
+       real_mode = 0;
+       if ((initial_regs.ebp == ENTRY16) || (initial_regs.ebp == ENTRY32)) {
+               real_mode = (struct boot_params *)initial_regs.esi;
+       }
+       if (initial_regs.ebp == ENTRY16) {
+               vid_puts_init();

-       mem_k = real_mode->screen.overlap.ext_mem_k;
+               mem_k = real_mode->screen.overlap.ext_mem_k;
 #ifndef STANDARD_MEMORY_BIOS_CALL
-       if (real_mode->alt_mem_k > mem_k) {
-               mem_k = real_mode->alt_mem_k;
-       }
+               if (real_mode->alt_mem_k > mem_k) {
+                       mem_k = real_mode->alt_mem_k;
+               }
 #endif
-       mem_k += 1024;
+               mem_k += 1024;
+       }

-       if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
+       if (free_mem_ptr < HIGH_BASE) setup_normal_output_buffer();
        else setup_output_buffer_if_we_run_high(mv);

        makecrc();
@@ -383,5 +402,5 @@
        gunzip();
        puts("Ok, booting the kernel.\n");
        if (high_loaded) close_output_buffer_if_we_run_high(mv);
-       return high_loaded;
+       return move_routine;
 }
diff -uNr linux-2.5.8.boot.heap/arch/i386/boot/setup.S linux-2.5.8.boot.clean_32bit_entries/arch/i386/boot/setup.S
--- linux-2.5.8.boot.heap/arch/i386/boot/setup.S        Wed Apr 17 00:23:51 2002
+++ linux-2.5.8.boot.clean_32bit_entries/arch/i386/boot/setup.S Wed Apr 17 00:37:46 2002
@@ -64,6 +64,12 @@

 #define DELTA_BOOTSECT 512

+/* Segments used by setup.S */
+#define __SETUP_CS      0x10
+#define __SETUP_DS      0x18
+#define __SETUP_REAL_CS 0x20
+#define __SETUP_REAL_DS 0x28
+
 INITSEG  = DEF_INITSEG         # 0x9000, we move boot here, out of the way
 SYSSEG   = DEF_SYSSEG          # 0x1000, system loaded at 0x10000 (65536).
 SETUPSEG = DEF_SETUPSEG                # 0x9020, this is the current segment
@@ -784,13 +790,22 @@
        jmp     flush_instr

 flush_instr:
-       xorw    %bx, %bx                        # Flag to indicate a boot
        xorl    %esi, %esi                      # Pointer to real-mode code
        movw    %cs, %si
        subw    $DELTA_INITSEG, %si
        shll    $4, %esi                        # Convert to 32-bit pointer
+       movl    $ENTRY16, %ebp                  # Magic to
...

read more »

 
 
 

x86 boot enhancements, Clean up the 32bit entry points 6/11

Post by Etienne Lorrai » Fri, 19 Apr 2002 18:50:05


You propose:
----------------------
diff -uNr linux-2.5.8.boot.heap/arch/i386/boot/compressed/head.S \

linux-2.5.8.boot.clean_32bit_entries/arch/i386/boot/compressed/head.S
--- linux-2.5.8.boot.heap/arch/i386/boot/compressed/head.S      Wed Jul  5
13:03:12 2000
+++ linux-2.5.8.boot.clean_32bit_entries/arch/i386/boot/compressed/head.S
Wed Apr 17 \

 #include <linux/linkage.h>
 #include <asm/segment.h>
+#include <asm/boot.h>

        .globl startup_32

 startup_32:
        cld
        cli
-       movl $(__KERNEL_DS),%eax
-       movl %eax,%ds
-       movl %eax,%es
-       movl %eax,%fs
-       movl %eax,%gs

-       lss SYMBOL_NAME(stack_start),%esp
-       xorl %eax,%eax
-1:     incl %eax               # check that A20 really IS enabled
-       movl %eax,0x000000      # loop forever if it isn't
-       cmpl %eax,0x100000
-       je 1b
+       /*
+        * Save the initial registers
+        */
+       movl %eax, eax
+       movl %ebx, ebx
-------------------

You want to change completely the protected mode entry point, that does
bother me, you know why (gujin). It is a simple (as simple as possible)
interface, available from a _very_ long time.

  I would say:
- Please initialise registers (segment registers) before using them, it
 is already complex enough. A bug there will be really difficult to find.
 Moreover that remind me another OS using registers (%bx) without
 initialising it first.
- Please keep the 'lss SYMBOL_NAME(stack_start),%esp' around, it is the
_only_ way to know if a kernel is "loaded low" or "loaded high", just in
case you want to write a bootloader which loads _any_ kernel, even 1.x
- Please stay compatible.

  Etienne.

___________________________________________________________

Yahoo! Mail : http://fr.mail.yahoo.com
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

x86 boot enhancements, Clean up the 32bit entry points 6/11

Post by Eric W. Biederm » Fri, 19 Apr 2002 21:40:07



> You want to change completely the protected mode entry point, that does
> bother me, you know why (gujin). It is a simple (as simple as possible)
> interface, available from a _very_ long time.

*Cough*  
It is impossibly broken to use, and it has long been said it is
unsupported.  That means when someone breaks it you get to keep the
pieces.

Quote:>   I would say:
> - Please initialise registers (segment registers) before using them, it
>  is already complex enough. A bug there will be really difficult to find.
>  Moreover that remind me another OS using registers (%bx) without
>  initialising it first.

I do.  See setup.S

I could probably change that code sequence to only rely on %cs having
a sane value.  But it is much saner to rely on having %cs, %ds, and
%es having a sane value than to rely on the presence of global
descriptor table, with the descriptors you need.  Especially since
after loading a segment register it is safe to throw away the
descriptor table, and still use the segment.

Also there is only one set of sane protected mode segments values to
use.  Do you know anyone who doesn't use flat 4G segments with a base
of 0?  (While in protected mode.)

Arguing against using registers without initialization might
be credible if you weren't also arguing for, using a global
descriptor table and %ebx and %esi without initialization.

As for bugs I have run that code with %cs, %ds, and %es having totally
different descriptor values from an overwritten gdt and it worked just
fine.

Quote:> - Please keep the 'lss SYMBOL_NAME(stack_start),%esp' around, it is the
> _only_ way to know if a kernel is "loaded low" or "loaded high", just in
> case you want to write a bootloader which loads _any_ kernel, even 1.x

????  I guess if you skim forward and find that instruction, and then
read the address of the stack_start symbol, you could figure that out.
But that instruction hasn't been at a constant offset, and there is a
much cleaner way of detecting that, examining the bit in the kernel
header that tells you explicitly.

Quote:> - Please stay compatible.

I bumped the kernel boot protocol, so auto detect should be trivial.

I did not break any bootloader using a supported interface.

I added a supportable 32bit entry point.

The interface has not been fixed in time so I don't know what you want
me to be compatible with.

Eric
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

x86 boot enhancements, Clean up the 32bit entry points 6/11

Post by Etienne Lorrai » Sat, 20 Apr 2002 02:10:05


 Seems that previous message did not go through, rewrite.

 I am sorry I did not check enough your patch.
 You are speaking of: arch/i386/boot/compressed/head.S
 I am speaking of:    arch/i386/kernel/head.S

 Gujin skip completely arch/i386/boot/compressed/* and really
 boots the file '$$tmppiggy.gz' line 44 of file:
arch/i386/boot/compressed/Makefile

 So you can do whatever you want with the "first" 32 bits entry point,
 I am just concerned by the "second" kernel 32 bits entry point, in
 arch/i386/kernel/head.S

 I still have a problem to detect the size of your decompressor, and that
 is my use of the "lss" instruction.
 This "lss SYMBOL_NAME(stack_start),%esp" gives an access to the symbol
 'stack_start', so it is quite easy to find back the GZIP signature
 of the initial '$$tmppiggy.gz' in what I call my "compatibility" mode,
 i.e. booting the legacy vmlinuz files - and skipping all of the real mode
 code and the decompressor code.

 This "lss" line has not always been at the same offset, but is around
 since maybe even the 0.01 kernel, it is quite easy to find it from its
 hexadecimal form. (function vmlinuz_header_treat() in vmlinuz.c of
 Gujin).

 The loaded high/loaded low stuff is just to know if I have to remove
 0x100000 or 0x1000 from this symbol to have the number of bytes
 to skip on the file.
 By the way, the bit in the kernel header is set by the bootloader to say
 where it has loaded the kernel, not by the compiler/linker chain.

 So is it possible to write somewhere how much code to skip or the offset
 of the kernel GZIP signature?
 Something like:
  jmp next
  lss SYMBOL_NAME(stack_start),%esp
next:
 Would make me really happy, but is dirty.
 Changing the 'tmppiggy.lnk' in the Makefile can be done, but the value
 (to know the length of the decompressor code) has to be _before_ the code
 itself in the raw file.
 Else whatever signature at whatever fixed address with the code+rodata
 size following would make me happy.

  Sorry again for the confusion,
  Etienne.

___________________________________________________________

Yahoo! Mail : http://fr.mail.yahoo.com
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

x86 boot enhancements, Clean up the 32bit entry points 6/11

Post by Eric W. Biederm » Sat, 20 Apr 2002 03:30:06



>  Seems that previous message did not go through, rewrite.

>  I am sorry I did not check enough your patch.
>  You are speaking of: arch/i386/boot/compressed/head.S

That is what you quoted, so I assumed that is what you were
talking about.

Quote:>  I am speaking of:    arch/i386/kernel/head.S

>  Gujin skip completely arch/i386/boot/compressed/* and really
>  boots the file '$$tmppiggy.gz' line 44 of file:
> arch/i386/boot/compressed/Makefile

>  So you can do whatever you want with the "first" 32 bits entry point,
>  I am just concerned by the "second" kernel 32 bits entry point, in
>  arch/i386/kernel/head.S

>  I still have a problem to detect the size of your decompressor, and that
>  is my use of the "lss" instruction.
>  This "lss SYMBOL_NAME(stack_start),%esp" gives an access to the symbol
>  'stack_start', so it is quite easy to find back the GZIP signature
>  of the initial '$$tmppiggy.gz' in what I call my "compatibility" mode,
>  i.e. booting the legacy vmlinuz files - and skipping all of the real mode
>  code and the decompressor code.

Well it should be easier I put an explicit pointer to it.

Quote:>  This "lss" line has not always been at the same offset, but is around
>  since maybe even the 0.01 kernel, it is quite easy to find it from its
>  hexadecimal form. (function vmlinuz_header_treat() in vmlinuz.c of
>  Gujin).

>  The loaded high/loaded low stuff is just to know if I have to remove
>  0x100000 or 0x1000 from this symbol to have the number of bytes
>  to skip on the file.
>  By the way, the bit in the kernel header is set by the bootloader to say
>  where it has loaded the kernel, not by the compiler/linker chain.

Nope.  LOADED_HIGH in loadflags is set at compile time.  It determines
where the bootloader must load the compressed part of the kernel.

Quote:>  So is it possible to write somewhere how much code to skip or the offset
>  of the kernel GZIP signature?

Already done.

Quote:>  Something like:
>   jmp next
>   lss SYMBOL_NAME(stack_start),%esp
> next:
>  Would make me really happy, but is dirty.
>  Changing the 'tmppiggy.lnk' in the Makefile can be done, but the value
>  (to know the length of the decompressor code) has to be _before_ the code
>  itself in the raw file.

Yep.

Quote:>  Else whatever signature at whatever fixed address with the code+rodata
>  size following would make me happy.

Check out the code.

Eric
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/