I've found in:
sunsite.unc.edu/pub/Linux/Kernel/patches/misc/lx10inline_v2.tgz
inlined assembler for library functions (memcpy etc.) for linux on
i486 CPU's. Including the functions (made for kernel version 1.0.9)
into kernel 1.2.5 really increase the kernel speed on my machine.
Note: I'm _NOT_ the author of the assembler functions. For more
information please read the README from the authors Alberto Vignani and
Davide Parodi in the original source lx10inline_v2.tgz!
The enclosed patch modifies the files
linux/arch/i386/config.in
linux/lib/string.c
linux/include/asm-i386/string.h
against 1.2.6+.
After patching run `make config'
with enabled 486-specific optimizations _and_ enabled assembler optimized
library.
Werner
PS: Todo: Including more functions like bcopy;
Better optimizing of the C-code part of memcpy
and memset in linux/include/asm-i386/string.h
(like the i386 versions written by Linus)
------------------------------------------------------------------------
diff -u5 linux/arch/i386/config.in.oldd linux/arch/i386/config.in
--- linux/arch/i386/config.in.oldd Sun Apr 30 21:25:11 1995
+++ linux/arch/i386/config.in Sun Apr 30 21:23:29 1995
@@ -33,10 +33,14 @@
#bool 'Use -mpentium flag for Pentium-specific optimizations' CONFIG_M586 n
#if [ "$CONFIG_M586" = "n" ]; then
bool 'Use -m486 flag for 486-specific optimizations' CONFIG_M486 y
#fi
+comment 'Using of assembler optimized library (experimental)'
+comment 'combined with 486-specific optimizations only on 486 or 586'
+bool 'Assembler optimized library' CONFIG_ASM_STRING_H y
+
comment 'Loadable module support'
bool 'Set version information on all symbols for modules' CONFIG_MODVERSIONS n
if [ "$CONFIG_NET" = "y" ]; then
comment 'Networking options'
diff -u5 linux/lib/string.c.oldd linux/lib/string.c
--- linux/lib/string.c.oldd Wed Apr 19 11:13:35 1995
+++ linux/lib/string.c Wed Apr 19 11:54:19 1995
@@ -9,10 +9,18 @@
* as inline code in <asm-xx/string.h>
*
* These are buggy as well..
*/
+#if defined(CONFIG_ASM_STRING_H)
+
+#include <linux/string.h>
+
+char * ___strtok = NULL;
+
+#else /* CONFIG_ASM_STRING_H */
+
#include <linux/types.h>
char * ___strtok = NULL;
char * strcpy(char * dest,const char *src)
@@ -239,5 +247,7 @@
p++;
size--;
}
return (void *) p;
}
+
+#endif /* CONFIG_ASM_STRING_H */
diff -u5 linux/include/asm-i386/string.h.oldd linux/include/asm-i386/string.h
--- linux/include/asm-i386/string.h.oldd Mon Jan 9 05:33:23 1995
+++ linux/include/asm-i386/string.h Tue May 2 12:02:21 1995
@@ -8,28 +8,72 @@
* see especially strtok,strstr,str[c]spn. They should work, but are not
* very easy to understand. Everything is done entirely within the register
* set, making the functions fast and clean. String instructions have been
* used through-out, making for "slightly" unclear code :-)
*
- * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 1991,1992,1993,1994 Linus Torvalds
+ * Revised and optimized for i486/pentium
+ * 1994/03/15 by Alberto Vignani/Davide Parodi @crf.it
*/
extern inline char * strcpy(char * dest,const char *src)
{
+#if defined(CONFIG_M486) || defined(CONFIG_M586)
+ register char *tmp= (char *)dest;
+ register char dummy;
+ __asm__ __volatile__
+ ("\n1:\t"
+ "movb (%0),%2\n\t"
+ "incl %0\n\t"
+ "movb %2,(%1)\n\t"
+ "incl %1\n\t"
+ "testb %2,%2\n\t"
+ "jne 1b"
+ :"=r" (src), "=r" (tmp), "=q" (dummy)
+ :"0" (src), "1" (tmp)
+ :"memory");
+ return dest;
+#else /* CONFIG_M486 || CONFIG_M586 */
__asm__ __volatile__(
"cld\n"
"1:\tlodsb\n\t"
"stosb\n\t"
"testb %%al,%%al\n\t"
"jne 1b"
: /* no output */
:"S" (src),"D" (dest):"si","di","ax","memory");
return dest;
+#endif /* CONFIG_M486 || CONFIG_M586 */
}
extern inline char * strncpy(char * dest,const char *src,size_t count)
{
+#if defined(CONFIG_M486) || defined(CONFIG_M586)
+ register char *tmp= (char *)dest;
+ register char dummy;
+ if (count) {
+ __asm__ __volatile__ (
+ "\n1:\t"
+ "movb (%0),%2\n\t"
+ "incl %0\n\t"
+ "movb %2,(%1)\n\t"
+ "incl %1\n\t"
+ "decl %3\n\t"
+ "je 3f\n\t"
+ "testb %2,%2\n\t"
+ "jne 1b\n\t"
+ "2:\tmovb %2,(%1)\n\t"
+ "incl %1\n\t"
+ "decl %3\n\t"
+ "jne 2b\n\t"
+ "3:"
+ :"=r" (src), "=r" (tmp), "=q" (dummy), "=r" (count)
+ :"0" (src), "1" (tmp), "3" (count)
+ :"memory");
+ }
+ return dest;
+#else /* CONFIG_M486 || CONFIG_M586 */
__asm__ __volatile__(
"cld\n"
"1:\tdecl %2\n\t"
"js 2f\n\t"
"lodsb\n\t"
@@ -40,14 +84,33 @@
"stosb\n"
"2:"
: /* no output */
:"S" (src),"D" (dest),"c" (count):"si","di","ax","cx","memory");
return dest;
+#endif /* CONFIG_M486 || CONFIG_M586 */
}
extern inline char * strcat(char * dest,const char * src)
{
+#if defined(CONFIG_M486) || defined(CONFIG_M586)
+ register char *tmp = (char *)(dest-1);
+ register char dummy;
+ __asm__ __volatile__
+ ("\n1:\tincl %1\n\t"
+ "cmpb $0,(%1)\n\t"
+ "jne 1b\n"
+ "2:\tmovb (%2),%b0\n\t"
+ "incl %2\n\t"
+ "movb %b0,(%1)\n\t"
+ "incl %1\n\t"
+ "testb %b0,%b0\n\t"
+ "jne 2b\n"
+ :"=q" (dummy), "=r" (tmp), "=r" (src)
+ :"1" (tmp), "2" (src)
+ :"memory");
+ return dest;
+#else /* CONFIG_M486 || CONFIG_M586 */
__asm__ __volatile__(
"cld\n\t"
"repne\n\t"
"scasb\n\t"
"decl %1\n"
@@ -56,14 +119,37 @@
"testb %%al,%%al\n\t"
"jne 1b"
: /* no output */
:"S" (src),"D" (dest),"a" (0),"c" (0xffffffff):"si","di","ax","cx");
return dest;
+#endif /* CONFIG_M486 || CONFIG_M586 */
}
extern inline char * strncat(char * dest,const char * src,size_t count)
{
+#if defined(CONFIG_M486) || defined(CONFIG_M586)
+ register char *tmp = (char *)(dest-1);
+ register char dummy;
+ __asm__ __volatile__
+ ("\n1:\tincl %1\n\t"
+ "cmpb $0,(%1)\n\t"
+ "jne 1b\n"
+ "2:\tdecl %3\n\t"
+ "js 3f\n\t"
+ "movb (%2),%b0\n\t"
+ "incl %2\n\t"
+ "movb %b0,(%1)\n\t"
+ "incl %1\n\t"
+ "testb %b0,%b0\n\t"
+ "jne 2b\n"
+ "3:\txorl %0,%0\n\t"
+ "movb %b0,(%1)\n\t"
+ :"=q" (dummy), "=r" (tmp), "=r" (src), "=r" (count)
+ :"1" (tmp), "2" (src), "3" (count)
+ :"memory");
+ return dest;
+#else /* CONFIG_M486 || CONFIG_M586 */
__asm__ __volatile__(
"cld\n\t"
"repne\n\t"
"scasb\n\t"
"decl %1\n\t"
@@ -78,14 +164,36 @@
"stosb"
: /* no output */
:"S" (src),"D" (dest),"a" (0),"c" (0xffffffff),"g" (count)
:"si","di","ax","cx","memory");
return dest;
+#endif /* CONFIG_M486 || CONFIG_M586 */
}
extern inline int strcmp(const char * cs,const char * ct)
{
+#if defined(CONFIG_M486) || defined(CONFIG_M586)
+ register int __res;
+ __asm__ __volatile__
+ ("\n1:\tmovb (%1),%b0\n\t"
+ "incl %1\n\t"
+ "cmpb %b0,(%2)\n\t"
+ "jne 2f\n\t"
+ "incl %2\n\t"
+ "testb %b0,%b0\n\t"
+ "jne 1b\n\t"
+ "xorl %0,%0\n\t"
+ "jmp 3f\n"
+ "2:\tmovl $1,%0\n\t"
+ "jb 3f\n\t"
+ "negl %0\n"
+ "3:"
+ :"=q" (__res), "=r" (cs), "=r" (ct)
+ :"1" (cs), "2" (ct)
+ : "memory" );
+ return __res;
+#else /* CONFIG_M486 || CONFIG_M586 */
register int __res;
__asm__ __volatile__(
"cld\n"
"1:\tlodsb\n\t"
"scasb\n\t"
@@ -97,14 +205,37 @@
"2:\tsbbl %%eax,%%eax\n\t"
"orb $1,%%eax\n"
"3:"
:"=a" (__res):"S" (cs),"D" (ct):"si","di");
return __res;
+#endif /* CONFIG_M486 || CONFIG_M586 */
}
extern inline int strncmp(const char * cs,const char * ct,size_t count)
{
+#if defined(CONFIG_M486) || defined(CONFIG_M586)
+ register int __res;
+ __asm__ __volatile__
+ ("\n1:\tdecl %3\n\t"
+ "js 2f\n\t"
+ "movb (%1),%b0\n\t"
+ "incl %1\n\t"
+ "cmpb %b0,(%2)\n\t"
+ "jne 3f\n\t"
+ "incl %2\n\t"
+ "testb %b0,%b0\n\t"
+ "jne 1b\n"
+ "2:\txorl %0,%0\n\t"
+ "jmp 4f\n"
+ "3:\tmovl $1,%0\n\t"
+ "jb 4f\n\t"
+ "negl %0\n"
+ "4:"
+ :"=q" (__res), "=r" (cs), "=r" (ct), "=r" (count)
+ :"1" (cs), "2" (ct), "3" (count));
+ return __res;
+#else /* CONFIG_M486 || CONFIG_M586 */
register int __res;
__asm__ __volatile__(
"cld\n"
"1:\tdecl %3\n\t"
"js 2f\n\t"
@@ -118,14 +249,31 @@
"3:\tsbbl %%eax,%%eax\n\t"
"orb $1,%%al\n"
"4:"
:"=a" (__res):"S" (cs),"D" (ct),"c" (count):"si","di","cx");
return __res;
+#endif /* CONFIG_M486 || CONFIG_M586 */
}
extern inline char * strchr(const char * s, int c)
{
+#if defined(CONFIG_M486) || defined(CONFIG_M586)
+ register char * __res;
+ __asm__ __volatile__
+ ("movb %%al,%%ah\n"
+ "1:\tmovb (%1),%%al\n\t"
+ "cmpb %%ah,%%al\n\t"
+ "je 2f\n\t"
+ "incl %1\n\t"
+ "testb %%al,%%al\n\t"
+ "jne 1b\n\t"
+ "xorl %1,%1\n"
+ "2:\tmovl %1,%0\n\t"
+ :"=a" (__res), "=r" (s)
+ :"0" (c), "1" (s));
+ return __res;
+#else /* CONFIG_M486 || CONFIG_M586 */
register char * __res;
__asm__ __volatile__(
"cld\n\t"
"movb %%al,%%ah\n"
"1:\tlodsb\n\t"
@@ -136,14 +284,30 @@
"movl $1,%1\n"
"2:\tmovl %1,%0\n\t"
"decl %0"
:"=a" (__res):"S" (s),"0" (c):"si");
return __res;
+#endif /* CONFIG_M486 || CONFIG_M586 */
}
extern inline char * strrchr(const char * s, int c)
{
+#if defined(CONFIG_M486) || defined(CONFIG_M586)
+ register char * __res;
+ __asm__ __volatile__
+ ("movb %b2,%h2\n"
+ "1:\tmovb (%1),%b2\n\t"
+ "cmpb %h2,%b2\n\t"
+ "jne 2f\n\t"
+ "movl %1,%0\n"
+ "2:\tincl %1\n\t"
+ "testb %b2,%h2\n\t"
+ "jne 1b"
+ :"=r" (__res), "=r" (s), "=q" (c)
+ :"0" (0), "1" (s), "2" (c));
+ return __res;
+#else /* CONFIG_M486 || CONFIG_M586 */
register char * __res;
__asm__ __volatile__(
"cld\n\t"
"movb %%al,%%ah\n"
"1:\tlodsb\n\t"
@@ -152,10 +316,11 @@
"leal -1(%%esi),%0\n"
"2:\ttestb %%al,%%al\n\t"
"jne 1b"
:"=d" (__res):"0" (0),"S" (s),"a" (c):"ax","si");
return __res;
+#endif /* CONFIG_M486 || CONFIG_M586 */
}
extern inline size_t strspn(const char *
...
read more »