Scalable statistics counters using kmalloc_percpu

Scalable statistics counters using kmalloc_percpu

Post by Ravikiran G Thirumala » Sun, 28 Jul 2002 00:20:06



Here is a Scalable statistics counter implementation which works on top
of the kmalloc_percpu dynamic allocator published by Dipankar.
This patch is against 2.5.27.

Description:
The following patch provides easy to use interfaces to replace
kernel counters where read accuracy is not that important and write is
frequent, for better cache characteristics.  The foll patch also exports
the kernel counters to user apps via /proc.  
Major benefits can be acheived when these statistics counters are used
inplace of  atomic_t counters as you avoid expensive locked instructions.

This patch provides the following interfaces :

1. statctr_init(statctr_t *ctr, unsigned long initval,
        struct proc_dir_entry *parent, const char *procname);
   Allocates memory to the counter and initialises it;
2. statctr_cleanup(statctr_t *);
   Cleans up allocated counter
3. statctr_inc(statctr_t *);
   Increments the counter
4. statctr_dec(statctr_t *);
   Decrements the counter
5. statctr_add
.
.

For more details visit
http://lse.sf.net/counters

Rik, You were interested in using this.  Does this implementation suit
your needs?

Comments most welcome

Thanks,
Kiran

diff -ruN -X dontdiff linux-2.5.25/fs/proc/root.c statctr-2.5.25/fs/proc/root.c
--- linux-2.5.25/fs/proc/root.c Sat Jul  6 05:12:33 2002
+++ statctr-2.5.25/fs/proc/root.c       Sun Jul 14 14:24:47 2002
@@ -19,6 +19,7 @@
 #include <linux/smp_lock.h>

 struct proc_dir_entry *proc_net, *proc_bus, *proc_root_fs, *proc_root_driver;
+struct proc_dir_entry *proc_stats;

 #ifdef CONFIG_SYSCTL
 struct proc_dir_entry *proc_sys_root;
@@ -77,6 +78,7 @@
        proc_rtas_init();
 #endif
        proc_bus = proc_mkdir("bus", 0);
+       proc_stats = proc_mkdir("stats", 0);
 }

 static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry)

diff -ruN -X dontdiff linux-2.5.25/include/linux/statctr.h statctr-2.5.25/include/linux/statctr.h
--- linux-2.5.25/include/linux/statctr.h        Thu Jan  1 05:30:00 1970
+++ statctr-2.5.25/include/linux/statctr.h      Sun Jul 14 14:24:47 2002
@@ -0,0 +1,186 @@
+/*
+ * Scalable Statistics Counters.
+ *
+ * Visit http://lse.sourceforge.net/counters for detailed explanation of
+ *  Scalable Statistic Counters
+ *
+ * Copyright (c) International Business Machines Corp., 2001
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Author:              Ravikiran Thirumalai <ki...@in.ibm.com>
+ *
+ * include/linux/statctr.h
+ *
+ */
+
+#if     !defined(_LINUX_STATCTR_H)
+#define _LINUX_STATCTR_H
+
+#if     defined(__KERNEL__)
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+
+#ifdef CONFIG_PROC_FS
+extern struct proc_dir_entry *proc_stats;
+#endif
+
+typedef struct {
+#ifdef CONFIG_SMP
+       unsigned long *ctr;
+#else
+       unsigned long ctr;
+#endif
+#ifdef CONFIG_PROC_FS
+       struct proc_dir_entry *base;
+       char *name;
+#endif  /* CONFIG_PROC_FS */
+} statctr_t;
+
+/* prototypes */
+extern int statctr_init(statctr_t *, unsigned long,
+                       struct proc_dir_entry *, const char *);
+extern void statctr_cleanup(statctr_t *);
+extern int statctr_ninit(statctr_t *, unsigned long, int);
+extern void statctr_ncleanup(statctr_t *, int);
+
+#ifdef CONFIG_SMP
+
+static inline int __statctr_init(statctr_t *stctr)
+{
+       stctr->ctr = kmalloc_percpu(sizeof(*(stctr->ctr)), GFP_ATOMIC);
+       if(!stctr->ctr)
+               return -1;
+       return 0;
+}
+
+static inline void __statctr_cleanup(statctr_t *stctr)
+{
+       kfree_percpu(stctr->ctr);
+}
+
+#else  /* CONFIG_SMP */
+
+static inline int __statctr_init(statctr_t *stctr)
+{
+       return 0;
+}
+
+static inline void __statctr_cleanup(statctr_t *stctr) {}
+
+#endif /* CONFIG_SMP */
+
+/* inlines */
+#ifdef CONFIG_SMP
+/**
+ * statctr_inc - Increment the statistics counter by one.
+ * @stctr: Statistics counter
+ *
+ * Increments the counter by one.  Internally only the per-cpu counter is
+ * incremented.
+ */
+
+static inline void statctr_inc(statctr_t *stctr)
+{
+       (*per_cpu_ptr(stctr->ctr, smp_processor_id()))++;
+}
+
+/**
+ * statctr_dec - Deccrement the statistics counter by one.
+ * @stctr: Statistics counter
+ *
+ * Decrements the counter by one.  Internally only the per-cpu counter is
+ * incremented.
+ */
+
+static inline void statctr_dec(statctr_t *stctr)
+{
+       (*per_cpu_ptr(stctr->ctr, smp_processor_id()))--;
+}
+
+/**
+ * statctr_set - Set the statistics counter to value passed.
+ * @stctr: Statistcs counter
+ * @val: Value to be set..
+ *
+ * Sets the statistics counter. If statctr_read() is invoked after a counter
+ * is set, return value of statctr_read shud reflect the value set.
+ */
+
+static inline void statctr_set(statctr_t *stctr, unsigned long val)
+{
+       int i;
+
+       for (i=0; i < NR_CPUS; i++) {
+               *per_cpu_ptr(stctr->ctr, i) = 0;
+       }
+       *this_cpu_ptr(stctr->ctr) = val;
+}
+
+/**
+ * statctr_read - Returns the counter value.
+ * @stctr: Statistics counter
+ *
+ * Reads all of the other per-cpu versions of this counter, consolidates them
+ * and returns to the caller.
+ */
+
+static inline long statctr_read(statctr_t *stctr)
+{
+       int i;
+       unsigned long res = 0;
+       for( i=0; i < NR_CPUS; i++ )
+               res += *per_cpu_ptr(stctr->ctr, i);
+       return res;
+}
+
+/**
+ * statctr_add - Adds the passed val to the counter value.
+ * @stctr: Statistics counter
+ * @val: Addend
+ *
+ */
+
+static inline void statctr_add(statctr_t *stctr, unsigned long val)
+{
+        *per_cpu_ptr(stctr->ctr, smp_processor_id()) += val;
+}
+
+/**
+ * statctr_sub - Subtracts the passed val from the counter value.
+ * @stctr: Statistics counter
+ * @val: Subtrahend
+ *
+ */
+
+static inline void statctr_sub(statctr_t *stctr, unsigned long val)
+{
+        *per_cpu_ptr(stctr->ctr, smp_processor_id()) -= val;
+}
+#else /* CONFIG_SMP */
+#define statctr_inc(stctr)     (((stctr)->ctr)++)
+#define statctr_dec(stctr) (((stctr)->ctr)--)
+#define statctr_read(stctr) ((stctr)->ctr)
+#define statctr_set(stctr,val) ((stctr)->ctr = (val))
+#define statctr_add(stctr,val) (((stctr)->ctr)+=(val))
+#define statctr_sub(stctr,val) (((stctr)->ctr)-=(val))
+#endif
+
+#endif  /* __KERNEL__ */
+
+#endif  /* _LINUX_STATCTR_H */
diff -ruN -X dontdiff linux-2.5.25/kernel/Makefile statctr-2.5.25/kernel/Makefile
--- linux-2.5.25/kernel/Makefile        Sat Jul  6 05:12:18 2002
+++ statctr-2.5.25/kernel/Makefile      Sun Jul 14 14:24:47 2002
@@ -10,12 +10,12 @@
 O_TARGET := kernel.o

 export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o \
-               printk.o platform.o suspend.o
+               printk.o platform.o suspend.o statctr.o

 obj-y     = sched.o dma.o fork.o exec_domain.o panic.o printk.o \
            module.o exit.o itimer.o time.o softirq.o resource.o \
            sysctl.o capability.o ptrace.o timer.o user.o \
-           signal.o sys.o kmod.o context.o futex.o platform.o
+           signal.o sys.o kmod.o context.o futex.o platform.o statctr.o

 obj-$(CONFIG_UID16) += uid16.o
 obj-$(CONFIG_MODULES) += ksyms.o
diff -ruN -X dontdiff linux-2.5.25/kernel/statctr.c statctr-2.5.25/kernel/statctr.c
--- linux-2.5.25/kernel/statctr.c       Thu Jan  1 05:30:00 1970
+++ statctr-2.5.25/kernel/statctr.c     Sun Jul 14 14:24:47 2002
@@ -0,0 +1,165 @@
+/*
+ * Scalable Statistics Counters.
+ *
+ * Visit http://lse.sourceforge.net/counters for detailed explanation of
+ *  Scalable Statistic Counters
+ *  
+ * Copyright (c) International Business Machines Corp., 2001
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Author:              Ravikiran Thirumalai <ki...@in.ibm.com>
+ *
+ * kernel/statctr.c
+ *
+ */
+
+#include <linux/statctr.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#ifdef CONFIG_PROC_FS
+static int proc_read_statctr(char *page, char **start,
+                            off_t off, int count, int *eof, void *data)
+{
+       int len;
+       statctr_t *stctr = (statctr_t *) data;
+       len = sprintf(page, "%ld\n", statctr_read(stctr));
+       return len;
+}
+
+static int statctr_proc_init(statctr_t *stctr, struct proc_dir_entry *procbase,
+                            const char *procname)
+{
+       struct proc_dir_entry *tmpbase, *tmp;
+
+       stctr->name = NULL;
+       stctr->base = NULL;
+       tmpbase = proc_stats;
+
+       if (procname != NULL) {
+               if(procbase != NULL)
+                       tmpbase = procbase;
+               tmp = create_proc_read_entry( procname, 0444, tmpbase,
+                       proc_read_statctr, stctr);
+               if (!tmp)
+                       return -1;
+               stctr->name = kmalloc(strlen(procname) + 1, GFP_ATOMIC);
+               if(!stctr->name) {
+                       remove_proc_entry(procname, tmpbase);
+                       return -1;
+               }
+               memcpy(stctr->name, procname, strlen(procname)+1);
+               stctr->base = tmpbase;
+       }
+       return 0;
+}
+
+static void statctr_proc_remove(statctr_t *stctr)
+{
+       if(stctr->name) {
+               remove_proc_entry(stctr->name, stctr->base);
+               kfree(stctr->name);
+       }
+}
+ ...

read more »

 
 
 

Scalable statistics counters using kmalloc_percpu

Post by Rik van Rie » Sun, 28 Jul 2002 00:30:22



> Rik, You were interested in using this.  Does this implementation suit
> your needs?

From a quick glance it looks like it will.

However, it might be more efficient to put the statistics
in one file in /proc with named fields, or have a way to
group them in one or multiple files.

Not sure about that, though ... really depends on how
expensive stat+open+read+close is compared to parsing a
file with multiple fields.

regards,

Rik
--
Bravely reimplemented by the knights who say "NIH".

http://www.surriel.com/             http://distro.conectiva.com/

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

Scalable statistics counters using kmalloc_percpu

Post by Dipankar Sarm » Sun, 28 Jul 2002 01:00:12




> > Rik, You were interested in using this.  Does this implementation suit
> > your needs?

> >From a quick glance it looks like it will.

> However, it might be more efficient to put the statistics
> in one file in /proc with named fields, or have a way to
> group them in one or multiple files.

> Not sure about that, though ... really depends on how
> expensive stat+open+read+close is compared to parsing a
> file with multiple fields.

Hi Rik,

It seems that either way it might not have the scalability
required for system monitoring software that needs faster
access. One of the possibilities is to see if they can be
mapped to user space, but that requires significant chage
in the percpu allocator. Does this seem like a logical next
step for exploration to you ?

Thanks
--

Linux Technology Center, IBM Software Lab, Bangalore, India.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

Scalable statistics counters using kmalloc_percpu

Post by Andrew Morto » Sun, 28 Jul 2002 04:00:10



> Here is a Scalable statistics counter implementation which works on top
> of the kmalloc_percpu dynamic allocator published by Dipankar.
> This patch is against 2.5.27.

> ...
> +static inline int __statctr_init(statctr_t *stctr)
> +{
> +       stctr->ctr = kmalloc_percpu(sizeof(*(stctr->ctr)), GFP_ATOMIC);
> +       if(!stctr->ctr)
> +               return -1;
> +       return 0;
> +}

Minor nit: please force the caller to pass in the gfp_flags when
designing an API like this.  The fact that you were forced to use
GFP_ATOMIC here shows why...

Quote:> +       for( i=0; i < NR_CPUS; i++ )
> +               res += *per_cpu_ptr(stctr->ctr, i);
> +       return res;
> +}

Oh dear.  Most people only have two CPUs.

Rusty, can we *please* fix this?  Really soon?

General comment:  we need to clean up the kernel_stat stuff.  We
cannot just make it per-cpu because it is 32k in size already.  I
would suggest that we should break out the disk accounting and
make the rest of kernel_stat per CPU.

That would be a great application of your interface, and a good
way to get your interface merged ;)  Is that something which you
have time to do?

Thanks.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

Scalable statistics counters using kmalloc_percpu

Post by William Lee Irwin II » Sun, 28 Jul 2002 04:50:11



> Oh dear.  Most people only have two CPUs.
> Rusty, can we *please* fix this?  Really soon?

I'll post the panic triggered by lowering NR_CPUS shortly. There's
an ugly showstopping i386 arch code issue here.

Cheers,
Bill
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

Scalable statistics counters using kmalloc_percpu

Post by Robert Lov » Sun, 28 Jul 2002 05:00:09




> > Oh dear.  Most people only have two CPUs.
> > Rusty, can we *please* fix this?  Really soon?

> I'll post the panic triggered by lowering NR_CPUS shortly. There's
> an ugly showstopping i386 arch code issue here.

In current 2.5?  I thought Andrew and I fixed all those issues and
pushed them to Linus...

The `configurable NR_CPUS' patch works fine for me.  I always boot with
NR_CPUS=2.

        Robert love

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

Scalable statistics counters using kmalloc_percpu

Post by William Lee Irwin II » Sun, 28 Jul 2002 05:00:15



>>> Oh dear.  Most people only have two CPUs.
>>> Rusty, can we *please* fix this?  Really soon?


>> I'll post the panic triggered by lowering NR_CPUS shortly. There's
>> an ugly showstopping i386 arch code issue here.

> In current 2.5?  I thought Andrew and I fixed all those issues and
> pushed them to Linus...
> The `configurable NR_CPUS' patch works fine for me.  I always boot with
> NR_CPUS=2.

Sorry I didn't get a chance to test in time, these things are slow to
boot and my testing bandwidth is limited. Please hold off until the
issue is resolved. You *will* prevent me from booting. IO-APIC APIC ID
reassignment panics. I'll follow up after this when the thing comes up
into the kernel exhibiting the problem.

Cheers,
Bill
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

Scalable statistics counters using kmalloc_percpu

Post by William Lee Irwin II » Sun, 28 Jul 2002 05:30:09


On Fri, Jul 26, 2002 at 11:46:34AM -0700, Andrew Morton wrote:
>>> Oh dear.  Most people only have two CPUs.
>>> Rusty, can we *please* fix this?  Really soon?

On Fri, 2002-07-26 at 12:46, William Lee Irwin III wrote:

>> I'll post the panic triggered by lowering NR_CPUS shortly. There's
>> an ugly showstopping i386 arch code issue here.
On Fri, Jul 26, 2002 at 12:50:12PM -0700, Robert Love wrote:
> In current 2.5?  I thought Andrew and I fixed all those issues and
> pushed them to Linus...
> The `configurable NR_CPUS' patch works fine for me.  I always boot with
> NR_CPUS=2.

No idea who it works for, it sure doesn't work here. Behold:

Script started on Fri Jul 26 12:55:23 2002
Linux version 2.5.28-akpm-1 (wli@megeira) (gcc version 2.95.4 20011002 (Debian prerelease)) #1 SMP Fri Jul 26 09:05:07 PDT 2002
Video mode to be used for restore is ffff
BIOS-provided physical RAM map:
 BIOS-e820: 0000000000000000 - 000000000009fc00 (usable)
 BIOS-e820: 0000000000100000 - 00000000e0000000 (usable)
 BIOS-e820: 00000000fec00000 - 00000000fec09000 (reserved)
 BIOS-e820: 00000000ffe80000 - 0000000100000000 (reserved)
 BIOS-e820: 0000000100000000 - 0000000400000000 (usable)
user-defined physical RAM map:
 user: 0000000000000000 - 000000000009fc00 (usable)
 user: 0000000000100000 - 00000000e0000000 (usable)
 user: 00000000fec00000 - 00000000fec09000 (reserved)
 user: 00000000ffe80000 - 0000000100000000 (reserved)
 user: 0000000100000000 - 0000000400000000 (usable)
15488MB HIGHMEM available.
896MB LOWMEM available.
found SMP MP-table at 000f6040
hm, page 000f6000 reserved twice.
hm, page 000f7000 reserved twice.
On node 0 totalpages: 4194304
zone(0): 4096 pages.
zone(1): 225280 pages.
zone(2): 3964928 pages.
Intel MultiProcessor Specification v1.4
    Virtual Wire compatibility mode.
OEM ID: IBM NUMA Product ID: SBB          APIC at: 0xFEC08000
Found an OEM MPC table at   7009c8 - parsing it ...
Translation: record 0, type 1, quad 0, global 3, local 3
Translation: record 1, type 1, quad 0, global 1, local 1
Translation: record 2, type 1, quad 0, global 1, local 1
Translation: record 3, type 1, quad 0, global 1, local 1
Translation: record 4, type 1, quad 1, global 1, local 3
Translation: record 5, type 1, quad 1, global 1, local 1
Translation: record 6, type 1, quad 1, global 1, local 1
Translation: record 7, type 1, quad 1, global 1, local 1
Translation: record 8, type 1, quad 2, global 1, local 3
Translation: record 9, type 1, quad 2, global 1, local 1
Translation: record 10, type 1, quad 2, global 1, local 1
Translation: record 11, type 1, quad 2, global 1, local 1
Translation: record 12, type 1, quad 3, global 1, local 3
Translation: record 13, type 1, quad 3, global 1, local 1
Translation: record 14, type 1, quad 3, global 1, local 1
Translation: record 15, type 1, quad 3, global 1, local 1
Translation: record 16, type 3, quad 0, global 0, local 0
Translation: record 17, type 3, quad 0, global 1, local 1
Translation: record 18, type 3, quad 0, global 2, local 2
Translation: record 19, type 4, quad 0, global 12, local 18
Translation: record 20, type 3, quad 1, global 3, local 0
Translation: record 21, type 3, quad 1, global 4, local 1
Translation: record 22, type 3, quad 1, global 5, local 2
Translation: record 23, type 4, quad 1, global 13, local 18
Translation: record 24, type 3, quad 2, global 6, local 0
Translation: record 25, type 3, quad 2, global 7, local 1
Translation: record 26, type 3, quad 2, global 8, local 2
Translation: record 27, type 4, quad 2, global 14, local 18
Translation: record 28, type 3, quad 3, global 9, local 0
Translation: record 29, type 3, quad 3, global 10, local 1
Translation: record 30, type 3, quad 3, global 11, local 2
Translation: record 31, type 4, quad 3, global 15, local 18
Translation: record 32, type 2, quad 0, global 13, local 14
Translation: record 33, type 2, quad 0, global 14, local 13
Translation: record 34, type 2, quad 1, global 15, local 14
Translation: record 35, type 2, quad 1, global 16, local 13
Translation: record 36, type 2, quad 2, global 17, local 14
Translation: record 37, type 2, quad 2, global 18, local 13
Translation: record 38, type 2, quad 3, global 19, local 14
Translation: record 39, type 2, quad 3, global 20, local 13
Processor #0 6:10 APIC version 17 (quad 0, apic 1)
Processor #4 6:10 APIC version 17 (quad 0, apic 8)
Processor #1 6:10 APIC version 17 (quad 0, apic 2)
Processor #2 6:10 APIC version 17 (quad 0, apic 4)
Processor #0 6:10 APIC version 17 (quad 1, apic 17)
Processor #4 6:10 APIC version 17 (quad 1, apic 24)
Processor #1 6:10 APIC version 17 (quad 1, apic 18)
Processor #2 6:10 APIC version 17 (quad 1, apic 20)
Processor #0 6:10 APIC version 17 (quad 2, apic 33)
Processor #4 6:10 APIC version 17 (quad 2, apic 40)
Processor #1 6:10 APIC version 17 (quad 2, apic 34)
Processor #2 6:10 APIC version 17 (quad 2, apic 36)
Processor #0 6:10 APIC version 17 (quad 3, apic 49)
Processor #4 6:10 APIC version 17 (quad 3, apic 56)
Processor #1 6:10 APIC version 17 (quad 3, apic 50)
Processor #2 6:10 APIC version 17 (quad 3, apic 52)
Bus #0 is PCI    (node 0)
Bus #1 is PCI    (node 0)
Bus #2 is PCI    (node 0)
Bus #12 is EISA   (node 0)
Bus #3 is PCI    (node 1)
Bus #4 is PCI    (node 1)
Bus #5 is PCI    (node 1)
Bus #13 is EISA   (node 1)
Bus #6 is PCI    (node 2)
Bus #7 is PCI    (node 2)
Bus #8 is PCI    (node 2)
Bus #14 is EISA   (node 2)
Bus #9 is PCI    (node 3)
Bus #10 is PCI    (node 3)
Bus #11 is PCI    (node 3)
Bus #15 is EISA   (node 3)
I/O APIC #13 Version 17 at 0xFE800000.
I/O APIC #14 Version 17 at 0xFE801000.
I/O APIC #15 Version 17 at 0xFE840000.
I/O APIC #16 Version 17 at 0xFE841000.
I/O APIC #17 Version 17 at 0xFE880000.
I/O APIC #18 Version 17 at 0xFE881000.
I/O APIC #19 Version 17 at 0xFE8C0000.
I/O APIC #20 Version 17 at 0xFE8C1000.
Processors: 16
Kernel command line: root=/dev/sda2 console=ttyS0,38400n8 mem=16777216K
Initializing CPU#0
Loading GDT/IDT for CPU#0
Loaded per-cpu LDT/TSS for CPU#0
Cleaned up FPU and debug regs for CPU#0
Detected 700.274 MHz processor.
Console: colour VGA+ 80x25
Calibrating delay loop... 1380.35 BogoMIPS
Memory: 16069096k/16777216k available (1370k kernel code, 183444k reserved, 530k data, 260k init, 15335424k highmem)
Security Scaffold v1.0.0 initialized
Dentry-cache hash table entries: 262144 (order: 9, 2097152 bytes)
Inode-cache hash table entries: 262144 (order: 9, 2097152 bytes)
Mount-cache hash table entries: 512 (order: 0, 4096 bytes)
CPU: L1 I cache: 16K, L1 D cache: 16K
CPU: L2 cache: 2048K
CPU serial number disabled.
Enabling fast FPU save and restore... done.
Enabling unmasked SIMD FPU exception support... done.
Checking 'hlt' instruction... OK.
POSIX conformance testing by UNIFIX
Remapping cross-quad port I/O for 4 quads
xquad_portio vaddr 0x00000000, len 00200000
CPU: L1 I cache: 16K, L1 D cache: 16K
CPU: L2 cache: 2048K
CPU0: Intel 00/0a stepping 04
per-CPU timeslice cutoff: 5846.34 usecs.
task migration cache decay timeout: 6 msecs.
enabled ExtINT on CPU#0
Leaving ESR disabled.
Booting processor 1/2 eip 2000
Initializing CPU#1
Loading GDT/IDT for CPU#1
Loaded per-cpu LDT/TSS for CPU#1
Cleaned up FPU and debug regs for CPU#1
masked ExtINT on CPU#1
Leaving ESR disabled.
Calibrating delay loop... 1396.73 BogoMIPS
CPU: L1 I cache: 16K, L1 D cache: 16K
CPU: L2 cache: 2048K
CPU serial number disabled.
CPU1: Intel 00/0a stepping 04
Restoring NMI vector
Booting processor 2/4 eip 2000
Initializing CPU#2
Loading GDT/IDT for CPU#2
Loaded per-cpu LDT/TSS for CPU#2
Cleaned up FPU and debug regs for CPU#2
masked ExtINT on CPU#2
Leaving ESR disabled.
Calibrating delay loop... 1396.73 BogoMIPS
CPU: L1 I cache: 16K, L1 D cache: 16K
CPU: L2 cache: 2048K
CPU serial number disabled.
CPU2: Intel 00/0a stepping 00
Restoring NMI vector
Booting processor 3/8 eip 2000
Initializing CPU#3
Loading GDT/IDT for CPU#3
Loaded per-cpu LDT/TSS for CPU#3
Cleaned up FPU and debug regs for CPU#3
masked ExtINT on CPU#3
Leaving ESR disabled.
Calibrating delay loop... 1396.73 BogoMIPS
CPU: L1 I cache: 16K, L1 D cache: 16K
CPU: L2 cache: 2048K
CPU serial number disabled.
CPU3: Intel 00/0a stepping 04
Restoring NMI vector
Booting processor 4/17 eip 2000
Initializing CPU#4
Loading GDT/IDT for CPU#4
Loaded per-cpu LDT/TSS for CPU#4
Cleaned up FPU and debug regs for CPU#4
masked ExtINT on CPU#4
Leaving ESR disabled.
Calibrating delay loop... 1392.64 BogoMIPS
CPU: L1 I cache: 16K, L1 D cache: 16K
CPU: L2 cache: 1024K
CPU serial number disabled.
CPU4: Intel 00/0a stepping 01
Restoring NMI vector
Booting processor 5/18 eip 2000
Initializing CPU#5
Loading GDT/IDT for CPU#5
Loaded per-cpu LDT/TSS for CPU#5
Cleaned up FPU and debug regs for CPU#5
masked ExtINT on CPU#5
Leaving ESR disabled.
Calibrating delay loop... 1392.64 BogoMIPS
CPU: L1 I cache: 16K, L1 D cache: 16K
CPU: L2 cache: 1024K
CPU serial number disabled.
CPU5: Intel 00/0a stepping 01
Restoring NMI vector
Booting processor 6/20 eip 2000
Initializing CPU#6
Loading GDT/IDT for CPU#6
Loaded per-cpu LDT/TSS for CPU#6
Cleaned up FPU and debug regs for CPU#6
masked ExtINT on CPU#6
Leaving ESR disabled.
Calibrating delay loop... 1388.54 BogoMIPS
CPU: L1 I cache: 16K, L1 D cache: 16K
CPU: L2 cache: 1024K
CPU serial number disabled.
CPU6: Intel 00/0a stepping 01
Restoring NMI vector
Booting processor 7/24 eip 2000
Initializing CPU#7
Loading GDT/IDT for CPU#7
Loaded per-cpu LDT/TSS for CPU#7
Cleaned up FPU and debug regs for CPU#7
masked ExtINT on CPU#7
Leaving ESR disabled.
Calibrating delay loop... 1392.64 BogoMIPS
CPU: L1 I cache: 16K, L1 D cache: 16K
CPU: L2 cache: 1024K
CPU serial number disabled.
CPU7: Intel 00/0a stepping 01
Restoring NMI vector
Booting processor 8/33 eip 2000
Initializing CPU#8
Loading GDT/IDT for CPU#8
Loaded per-cpu LDT/TSS for CPU#8
Cleaned up FPU and debug regs for CPU#8
masked ExtINT on CPU#8
Leaving ESR disabled.
Calibrating delay loop... 1392.64 BogoMIPS
CPU: L1 I cache: 16K, L1 D cache: 16K
CPU: L2 cache: 1024K
CPU serial number disabled.
CPU8: Intel 00/0a ...

read more »

 
 
 

Scalable statistics counters using kmalloc_percpu

Post by Robert Lov » Sun, 28 Jul 2002 05:30:15




> > In current 2.5?  I thought Andrew and I fixed all those issues and
> > pushed them to Linus...
> > The `configurable NR_CPUS' patch works fine for me.  I always boot with
> > NR_CPUS=2.

> No idea who it works for, it sure doesn't work here. Behold:

Hmm, is your CPU-space sparse?

If that is the case, and the max APIC ID is set to NR_CPUS, and the
kernel expects a 1:1 between NR_CPU value and logical CPU #... boom.

        Robert Love

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

Scalable statistics counters using kmalloc_percpu

Post by Rusty Russel » Sun, 28 Jul 2002 11:10:07


Quote:> > +       for( i=0; i < NR_CPUS; i++ )
> > +               res += *per_cpu_ptr(stctr->ctr, i);
> > +       return res;
> > +}

> Oh dear.  Most people only have two CPUs.

> Rusty, can we *please* fix this?  Really soon?

Linus just applied the hotplug cpu boot patch in bk, which gives
cpu_possible(i), for exactly this purpose.

Quote:> General comment:  we need to clean up the kernel_stat stuff.  We
> cannot just make it per-cpu because it is 32k in size already.  I
> would suggest that we should break out the disk accounting and
> make the rest of kernel_stat per CPU.

kernel_stat is dynamically allocated???

Personally, I think that dynamically allocated per-cpu datastructures,
like dynamically-allocated brlocks, are something we might need
eventually, but look at what a certain driver did with the "make it
per-cpu" concept already.  I don't want to rush in that direction.

Cheers,
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

Scalable statistics counters using kmalloc_percpu

Post by Andrew Morto » Sun, 28 Jul 2002 13:40:07



> > > +       for( i=0; i < NR_CPUS; i++ )
> > > +               res += *per_cpu_ptr(stctr->ctr, i);
> > > +       return res;
> > > +}

> > Oh dear.  Most people only have two CPUs.

> > Rusty, can we *please* fix this?  Really soon?

> Linus just applied the hotplug cpu boot patch in bk, which gives
> cpu_possible(i), for exactly this purpose.

Good.  And will it be possible to iterate across all CPUs
without having to iterate across NR_CPUS?

Quote:> > General comment:  we need to clean up the kernel_stat stuff.  We
> > cannot just make it per-cpu because it is 32k in size already.  I
> > would suggest that we should break out the disk accounting and
> > make the rest of kernel_stat per CPU.

> kernel_stat is dynamically allocated???

No.  It's jut a big lump of bss.

Quote:> Personally, I think that dynamically allocated per-cpu datastructures,
> like dynamically-allocated brlocks, are something we might need
> eventually, but look at what a certain driver did with the "make it
> per-cpu" concept already.  I don't want to rush in that direction.

What driver is that?

And no, we need to do something about the NR_CPUS bloat Right Now.

In my build there is a quarter megabyte of per cpu data.  And that
does not include the (currently small) .data.percpu * 32.

The is pretty much entirely wasted memory, and it will only get
worse. Making NR_CPUS compile-time configurable is a lame solution.
Wasting the memory is out of the question.

Dynamic allocation is the only thing left, yes?

-
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/