kmalloc_percpu

kmalloc_percpu

Post by Rusty Russel » Tue, 06 May 2003 10:20:07



Hi Andrew,

        This is the kmalloc_percpu patch.  I have another patch which
tests the allocator if you want to see that to.  This is the precursor
to per-cpu stuff in modules, but also allows other space gains for
structures which currently embed per-cpu arrays (eg. your fuzzy counters).

Cheers,
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

Name: kmalloc_percpu to use same percpu operators
Author: Rusty Russell
Status: Tested on 2.5.68-bk11

D: By overallocating the per-cpu data at boot, we can make quite an
D: efficient allocator, and then use it to support per-cpu data in
D: modules (next patch).

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .29880-linux-2.5.69/include/asm-generic/percpu.h .29880-linux-2.5.69.updated/include/asm-generic/percpu.h
--- .29880-linux-2.5.69/include/asm-generic/percpu.h    2003-01-02 12:32:47.000000000 +1100
+++ .29880-linux-2.5.69.updated/include/asm-generic/percpu.h    2003-05-05 17:36:25.000000000 +1000
@@ -2,37 +2,11 @@
 #define _ASM_GENERIC_PERCPU_H_
 #include <linux/compiler.h>

-#define __GENERIC_PER_CPU
+/* Some archs may want to keep __per_cpu_offset for this CPU in a register,
+   or do their own allocation. */
 #ifdef CONFIG_SMP
-
-extern unsigned long __per_cpu_offset[NR_CPUS];
-
-/* Separate out the type, so (int[3], foo) works. */
-#ifndef MODULE
-#define DEFINE_PER_CPU(type, name) \
-    __attribute__((__section__(".data.percpu"))) __typeof__(type) name##__per_cpu
-#endif
-
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*RELOC_HIDE(&var##__per_cpu, __per_cpu_offset[cpu]))
 #define __get_cpu_var(var) per_cpu(var, smp_processor_id())
-
-#else /* ! SMP */
-
-/* Can't define per-cpu variables in modules.  Sorry --RR */
-#ifndef MODULE
-#define DEFINE_PER_CPU(type, name) \
-    __typeof__(type) name##__per_cpu
-#endif
-
-#define per_cpu(var, cpu)                      ((void)cpu, var##__per_cpu)
-#define __get_cpu_var(var)                     var##__per_cpu
-
-#endif /* SMP */
-
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) name##__per_cpu
-
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var##__per_cpu)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var##__per_cpu)
-
+#define __get_cpu_ptr(var) per_cpu_ptr(ptr, smp_processor_id())
+#define __NEED_SETUP_PER_CPU_AREAS
+#endif /* SMP */
 #endif /* _ASM_GENERIC_PERCPU_H_ */
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .29880-linux-2.5.69/include/linux/genhd.h .29880-linux-2.5.69.updated/include/linux/genhd.h
--- .29880-linux-2.5.69/include/linux/genhd.h   2003-05-05 12:37:12.000000000 +1000
+++ .29880-linux-2.5.69.updated/include/linux/genhd.h   2003-05-05 17:36:25.000000000 +1000
@@ -160,10 +160,9 @@ static inline void disk_stat_set_all(str
 #ifdef  CONFIG_SMP
 static inline int init_disk_stats(struct gendisk *disk)
 {
-       disk->dkstats = kmalloc_percpu(sizeof (struct disk_stats), GFP_KERNEL);
+       disk->dkstats = kmalloc_percpu(struct disk_stats);
        if (!disk->dkstats)
                return 0;
-       disk_stat_set_all(disk, 0);
        return 1;
 }

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .29880-linux-2.5.69/include/linux/percpu.h .29880-linux-2.5.69.updated/include/linux/percpu.h
--- .29880-linux-2.5.69/include/linux/percpu.h  2003-02-07 19:20:01.000000000 +1100
+++ .29880-linux-2.5.69.updated/include/linux/percpu.h  2003-05-05 17:36:25.000000000 +1000
@@ -1,71 +1,85 @@
 #ifndef __LINUX_PERCPU_H
 #define __LINUX_PERCPU_H
-#include <linux/spinlock.h> /* For preempt_disable() */
-#include <linux/slab.h> /* For kmalloc_percpu() */
+#include <linux/preempt.h> /* For preempt_disable() */
+#include <linux/slab.h> /* For kmalloc() */
+#include <linux/cache.h>
+#include <linux/string.h>
+#include <asm/bug.h>
 #include <asm/percpu.h>

-/* Must be an lvalue. */
+/* Total pool for percpu data (for each CPU). */
+#ifndef PERCPU_POOL_SIZE
+#define PERCPU_POOL_SIZE 32768
+#endif
+
+/* For variables declared with DECLARE_PER_CPU()/DEFINE_PER_CPU(). */
 #define get_cpu_var(var) (*({ preempt_disable(); &__get_cpu_var(var); }))
 #define put_cpu_var(var) preempt_enable()
+/* Also, per_cpu(var, cpu) to get another cpu's value. */
+
+/* For ptrs allocated with kmalloc_percpu */
+#define get_cpu_ptr(ptr) ({ preempt_disable(); __get_cpu_ptr(ptr); })
+#define put_cpu_ptr(ptr) preempt_enable()
+/* Also, per_cpu_ptr(ptr, cpu) to get another cpu's value. */

 #ifdef CONFIG_SMP

-struct percpu_data {
-       void *ptrs[NR_CPUS];
-       void *blkp;
-};
+/* __alloc_percpu zeros memory for every cpu, as a convenience. */
+extern void *__alloc_percpu(size_t size, size_t align);
+extern void kfree_percpu(const void *);

-/*
- * Use this to get to a cpu's version of the per-cpu object allocated using
- * kmalloc_percpu.  If you want to get "this cpu's version", maybe you want
- * to use get_cpu_ptr...
- */
-#define per_cpu_ptr(ptr, cpu)                   \
-({                                              \
-        struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr); \
-        (__typeof__(ptr))__p->ptrs[(cpu)];  \
-})
+extern unsigned long __per_cpu_offset[NR_CPUS];

-extern void *kmalloc_percpu(size_t size, int flags);
-extern void kfree_percpu(const void *);
-extern void kmalloc_percpu_init(void);
+/* Separate out the type, so (int[3], foo) works. */
+#ifndef MODULE
+#define DEFINE_PER_CPU(type, name) \
+    __attribute__((__section__(".data.percpu"))) __typeof__(type) name##__per_cpu
+#endif

-#else /* CONFIG_SMP */
+/* var is in discarded region: offset to particular copy we want */
+#define per_cpu(var, cpu) (*RELOC_HIDE(&var##__per_cpu, __per_cpu_offset[cpu]))
+#define per_cpu_ptr(ptr, cpu) ((__typeof__(ptr))(RELOC_HIDE(ptr, __per_cpu_offset[cpu])))

-#define per_cpu_ptr(ptr, cpu) (ptr)
+extern void setup_per_cpu_areas(void);
+#else /* !CONFIG_SMP */

-static inline void *kmalloc_percpu(size_t size, int flags)
+/* Can't define per-cpu variables in modules.  Sorry --RR */
+#ifndef MODULE
+#define DEFINE_PER_CPU(type, name) \
+    __typeof__(type) name##__per_cpu
+#endif
+
+#define per_cpu(var, cpu)                      ((void)(cpu), var##__per_cpu)
+#define __get_cpu_var(var)                     var##__per_cpu
+#define per_cpu_ptr(ptr, cpu)                  ((void)(cpu), (ptr))
+#define __get_cpu_ptr(ptr)                     (ptr)
+
+static inline void *__alloc_percpu(size_t size, size_t align)
 {
-       return(kmalloc(size, flags));
+       void *ret;
+       /* kmalloc always cacheline aligns. */
+       BUG_ON(align > SMP_CACHE_BYTES);
+       BUG_ON(size > PERCPU_POOL_SIZE/2);
+       ret = kmalloc(size, GFP_KERNEL);
+       if (ret)
+               memset(ret, 0, size);
+       return ret;
 }
 static inline void kfree_percpu(const void *ptr)
 {      
        kfree(ptr);
 }
-static inline void kmalloc_percpu_init(void) { }

+static inline void setup_per_cpu_areas(void) { }
 #endif /* CONFIG_SMP */

-/*
- * Use these with kmalloc_percpu. If
- * 1. You want to operate on memory allocated by kmalloc_percpu (dereference
- *    and read/modify/write)  AND
- * 2. You want "this cpu's version" of the object AND
- * 3. You want to do this safely since:
- *    a. On multiprocessors, you don't want to switch between cpus after
- *    you've read the current processor id due to preemption -- this would
- *    take away the implicit  advantage to not have any kind of traditional
- *    serialization for per-cpu data
- *    b. On uniprocessors, you don't want another kernel thread messing
- *    up with the same per-cpu data due to preemption
- *    
- * So, Use get_cpu_ptr to disable preemption and get pointer to the
- * local cpu version of the per-cpu object. Use put_cpu_ptr to enable
- * preemption.  Operations on per-cpu data between get_ and put_ is
- * then considered to be safe. And ofcourse, "Thou shalt not sleep between
- * get_cpu_ptr and put_cpu_ptr"
- */
-#define get_cpu_ptr(ptr) per_cpu_ptr(ptr, get_cpu())
-#define put_cpu_ptr(ptr) put_cpu()
+/* Simple wrapper for the common case.  Zeros memory. */
+#define kmalloc_percpu(type) \
+       ((type *)(__alloc_percpu(sizeof(type), __alignof__(type))))
+
+#define DECLARE_PER_CPU(type, name) extern __typeof__(type) name##__per_cpu
+
+#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var##__per_cpu)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var##__per_cpu)

 #endif /* __LINUX_PERCPU_H */
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .29880-linux-2.5.69/include/net/ipv6.h .29880-linux-2.5.69.updated/include/net/ipv6.h
--- .29880-linux-2.5.69/include/net/ipv6.h      2003-05-05 12:37:12.000000000 +1000
+++ .29880-linux-2.5.69.updated/include/net/ipv6.h      2003-05-05 17:36:25.000000000 +1000
@@ -145,7 +145,7 @@ extern atomic_t                     inet6_sock_nr;

 int snmp6_register_dev(struct inet6_dev *idev);
 int snmp6_unregister_dev(struct inet6_dev *idev);
-int snmp6_mib_init(void *ptr[2], size_t mibsize);
+int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
 void snmp6_mib_free(void *ptr[2]);

 struct ip6_ra_chain
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .29880-linux-2.5.69/init/main.c .29880-linux-2.5.69.updated/init/main.c
--- .29880-linux-2.5.69/init/main.c     2003-05-05 12:37:13.000000000 +1000
+++ .29880-linux-2.5.69.updated/init/main.c     2003-05-05 17:36:25.000000000 +1000
@@ -301,35 +301,10 @@ static void __init smp_init(void)
 #define smp_init()     do { } while (0)
 #endif

-static inline void setup_per_cpu_areas(void) { }
 static inline void smp_prepare_cpus(unsigned int maxcpus) { }

 #else

-#ifdef __GENERIC_PER_CPU
-unsigned long __per_cpu_offset[NR_CPUS];
-
-static void __init setup_per_cpu_areas(void)
-{
-       unsigned long size, i;
-       char *ptr;
-       /* Created by linker magic */
-       extern char __per_cpu_start[], __per_cpu_end[];
-
-       /* Copy section for each CPU (we discard the original) */
-       size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
-       if (!size)
-               return;
-
-       ptr = alloc_bootmem(size *
...

read more »

 
 
 

kmalloc_percpu

Post by Andrew Morto » Tue, 06 May 2003 10:50:09



> This is the kmalloc_percpu patch.

How does it work?  What restrictions does it have, and
what compromises were made?

+#define PERCPU_POOL_SIZE 32768

What's this?

The current implementation of kmalloc_per_cpu() turned out to be fairly
disappointing because of the number of derefs which were necessary to get at
the data in fastpaths.   How does this implementation compare?

Thanks.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

kmalloc_percpu

Post by Rusty Russel » Wed, 07 May 2003 03:40:06




> > This is the kmalloc_percpu patch.

> How does it work?  What restrictions does it have, and
> what compromises were made?

> +#define PERCPU_POOL_SIZE 32768

> What's this?

OK.  It has a size restriction: PERCPU_POOL_SIZE is the maximum total
kmalloc_percpu + static DECLARE_PER_CPU you'll get, ever.  This is the
main downside.  It's allocated at boot.

The __alloc_percpu allocator is extremely space efficient, by not
insisting on cache-line aligning everything: __alloc_percpu(SIZE)
overhead is sizeof(int), plus SIZE bytes (rounded up to alignment
requirements) removed from per-cpu pool.

The allocator is fairly slow: they're not expected to be thrown around
like candy.

Quote:> The current implementation of kmalloc_per_cpu() turned out to be fairly
> disappointing because of the number of derefs which were necessary to get at
> the data in fastpaths.   How does this implementation compare?

It uses the same method as the static ones, so it's a single addition
of __per_cpu_offset (assuming arch doesn't override implementation).
This is a requirement for modules to use them (which was my aim: the
other side effects are cream).

Hope that clarifies,
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

kmalloc_percpu

Post by Andrew Morto » Wed, 07 May 2003 04:00:10



> > +#define PERCPU_POOL_SIZE 32768

> > What's this?

> OK.  It has a size restriction: PERCPU_POOL_SIZE is the maximum total
> kmalloc_percpu + static DECLARE_PER_CPU you'll get, ever.  This is the
> main downside.  It's allocated at boot.

And is subject to fragmentation.

Is it not possible to go allocate another N * PERCPU_POOL_SIZE from
slab if it runs out?

That way, PERCPU_POOL_SIZE only needs to be sized for non-modular static
percpu data, which sounds more robust.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

kmalloc_percpu

Post by David S. Mille » Wed, 07 May 2003 05:10:04




> > OK.  It has a size restriction: PERCPU_POOL_SIZE is the maximum total
> > kmalloc_percpu + static DECLARE_PER_CPU you'll get, ever.  This is the
> > main downside.  It's allocated at boot.

> And is subject to fragmentation.

> Is it not possible to go allocate another N * PERCPU_POOL_SIZE from
> slab if it runs out?

No, then you go back to things requireing multiple levels of
dereferencing.  It's hard to realloc() because you have to
freeze the whole kernel to do that properly, and that is not
simple at all.

I think the fixed size pool is perfectly reasonable.

--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

kmalloc_percpu

Post by Rusty Russel » Wed, 07 May 2003 06:20:15





> > > OK.  It has a size restriction: PERCPU_POOL_SIZE is the maximum total
> > > kmalloc_percpu + static DECLARE_PER_CPU you'll get, ever.  This is the
> > > main downside.  It's allocated at boot.

> > And is subject to fragmentation.

> > Is it not possible to go allocate another N * PERCPU_POOL_SIZE from
> > slab if it runs out?

> No, then you go back to things requireing multiple levels of
> dereferencing.

Actually, you can; my previous patch did this.  But then all CPUS have
to be one continuous allocation: since modern big-SMP machines are
non-uniform, so you don't want this.

        http://www.kernel.org/pub/linux/kernel/people/rusty/patches/Misc/kmal...

Quote:> I think the fixed size pool is perfectly reasonable.

Yes.  It's a tradeoff.  I think it's worth it at the moment (although
I'll add a limited printk to __alloc_percpu if it fails).

Cheers,
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

kmalloc_percpu

Post by Rusty Russel » Wed, 07 May 2003 06:20:22




> > > +#define PERCPU_POOL_SIZE 32768

> > > What's this?

> > OK.  It has a size restriction: PERCPU_POOL_SIZE is the maximum total
> > kmalloc_percpu + static DECLARE_PER_CPU you'll get, ever.  This is the
> > main downside.  It's allocated at boot.

> And is subject to fragmentation.

Absolutely.  However, we're looking at an allocation at module insert,
and maybe at each mount (if used for your fuzzy counters).  Until we
see this in practice, I don't think complicating the allocator is
worth it.

Cheers,
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

kmalloc_percpu

Post by Andrew Morto » Wed, 07 May 2003 06:30:19



> > I think the fixed size pool is perfectly reasonable.

> Yes.  It's a tradeoff.  I think it's worth it at the moment (although
> I'll add a limited printk to __alloc_percpu if it fails).

It's OK as long as nobody uses the feature!  Once it starts to be commonly
used (say, in driver ->open() methods) then we'll get into the same problems
as with vmalloc exhaustion, vmalloc fragmentation, large physically-contig
allocations, etc.

Ho-hum.  Can the magical constant become a __setup thing?
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

kmalloc_percpu

Post by David S. Mille » Wed, 07 May 2003 06:50:08



   Date: Tue, 06 May 2003 14:08:27 +1000


   > I think the fixed size pool is perfectly reasonable.

   Yes.  It's a tradeoff.  I think it's worth it at the moment (although
   I'll add a limited printk to __alloc_percpu if it fails).

I think you should BUG() if a module calls kmalloc_percpu() outside
of mod->init(), this is actually implementable.

Andrew's example with some module doing kmalloc_percpu() inside
of fops->open() is just rediculious.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

kmalloc_percpu

Post by David S. Mille » Wed, 07 May 2003 06:50:08



   Date: Mon, 5 May 2003 21:28:16 -0700

   It's OK as long as nobody uses the feature!

I think this is closer to say, allocation of kmap types,
than it is to vmalloc() et al. (as you suggest).

   Ho-hum.  Can the magical constant become a __setup thing?

Remember that there are physical limitations, for example
on ia64, as to how big this thing can be.  So whatever any
of us think about physical limitations, we have to deal with
them anyways :-)

I think firstly, that we should define that this isn't
something you be doing after module_init()  (ie. your
->open() example, that's rediculious).  Ideas on how to
enforce this are welcome.

Next, we can calculate how much per-cpu space all the modules
need.  And because we can do that, we can preallocate slots
if we wanted to in order to deal with whatever theoretical
fragmentation problems you think exist (much like how Jakub Jelink's
prelinking works).

I personally don't know how smart it is to let random modules use
kmalloc_percpu() with impunity.  But aparently someone thinks
there is some value in that.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

kmalloc_percpu

Post by Dipankar Sarm » Wed, 07 May 2003 07:10:06



> I think you should BUG() if a module calls kmalloc_percpu() outside
> of mod->init(), this is actually implementable.

> Andrew's example with some module doing kmalloc_percpu() inside
> of fops->open() is just rediculious.

The disk stats are already per-cpu. So, what happens when you offline/online
a disk ? How do you allocate per-cpu memory during that ?

Thanks
Dipankar
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

kmalloc_percpu

Post by Ravikiran G Thirumala » Wed, 07 May 2003 07:10:07




> > This is the kmalloc_percpu patch.

> How does it work?  What restrictions does it have, and
> what compromises were made?

> +#define PERCPU_POOL_SIZE 32768

> What's this?

> The current implementation of kmalloc_per_cpu() turned out to be fairly
> disappointing because of the number of derefs which were necessary to get at
> the data in fastpaths.   How does this implementation compare?

Andrew,
Here is a comparision of kmalloc_percpu techniques as I see it,

Current Implementation:
1. Two dereferences to get to the per-cpu data
2. Allocates for cpu_possible cpus only, and can deal with sparse cpu nos

Rusty's Implementation
1. One extra memory reference (__per_cpu_offset)
2. allocates for NR_CPUS and probably breaks with sparse cpu nos?
3. Let you do per-cpu data in modules
4. fragmentation

The simpler patch I mailed you sometime back,
1. Minimal dereference overhead, offsets to per-cpu data calculated at
   compile time
2. allocates for NR_CPUS and problems with sparse cpu nos
3. Very Simple.

My guess is performancewise Rusty's iplementation and the simpler
implementation of kmalloc_percpu will be comparable. (I'll run some
tests to compare them and post them later).  I am including the
simpler kmalloc_percpu patch which I'd mailed to you earlier.

Thanks,
Kiran

diff -ruN -X dontdiff linux-2.5.65/include/linux/percpu.h kmalloc-new-2.5.65/include/linux/percpu.h
--- linux-2.5.65/include/linux/percpu.h Tue Mar 18 03:14:43 2003

 #define put_cpu_var(var) preempt_enable()

 #ifdef CONFIG_SMP
-
-struct percpu_data {
-       void *ptrs[NR_CPUS];
-       void *blkp;
-};
-
 /*
  * Use this to get to a cpu's version of the per-cpu object allocated using
  * kmalloc_percpu.  If you want to get "this cpu's version", maybe you want
  * to use get_cpu_ptr...
  */
 #define per_cpu_ptr(ptr, cpu)                   \
-({                                              \
-        struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr); \
-        (__typeof__(ptr))__p->ptrs[(cpu)];  \
-})
+        ((__typeof__(ptr))                     \
+               (RELOC_HIDE(ptr, ALIGN(sizeof (*ptr), SMP_CACHE_BYTES)*cpu)))

 extern void *kmalloc_percpu(size_t size, int flags);
 extern void kfree_percpu(const void *);
diff -ruN -X dontdiff linux-2.5.65/mm/slab.c kmalloc-new-2.5.65/mm/slab.c
--- linux-2.5.65/mm/slab.c      Tue Mar 18 03:14:38 2003

 void *
 kmalloc_percpu(size_t size, int flags)
 {
-       int i;
-       struct percpu_data *pdata = kmalloc(sizeof (*pdata), flags);
-
-       if (!pdata)
-               return NULL;
-
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_possible(i))
-                       continue;
-               pdata->ptrs[i] = kmalloc(size, flags);
-               if (!pdata->ptrs[i])
-                       goto unwind_oom;
-       }
-
-       /* Catch derefs w/o wrappers */
-       return (void *) (~(unsigned long) pdata);
-
-unwind_oom:
-       while (--i >= 0) {
-               if (!cpu_possible(i))
-                       continue;
-               kfree(pdata->ptrs[i]);
-       }
-       kfree(pdata);
-       return NULL;
+       return kmalloc(ALIGN(size, SMP_CACHE_BYTES)*NR_CPUS, flags);
 }
 #endif

 void
 kfree_percpu(const void *objp)
 {
-       int i;
-       struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp);
-
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_possible(i))
-                       continue;
-               kfree(p->ptrs[i]);
-       }
+       kfree(objp);
 }
 #endif

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

kmalloc_percpu

Post by Andrew Morto » Wed, 07 May 2003 07:10:07



Quote:

> I think you should BUG() if a module calls kmalloc_percpu() outside
> of mod->init(), this is actually implementable.

> Andrew's example with some module doing kmalloc_percpu() inside
> of fops->open() is just rediculious.

crap.  Modules deal with per-device and per-mount objects.  If a module
cannot use kmalloc_per_cpu on behalf of the primary object which it manages
then the facility is simply not useful to modules.

The static DEFINE_PER_CPU allocation works OK in core kernel because core
kernel does _not_ use per-instance objects.  But modules do.

A case in point, which Rusty has twice mentioned, is the three per-mount
fuzzy counters in the ext2 superblock.  And lo, ext2 cannot use the code in
this patch, because people want to scale to 4000 mounts.

In those very rare cases where a module wants allocation to be performed at
module_init()-time (presumably global stats counters), they can use
DEFINE_PER_CPU, so we should just not export kmalloc_per_cpu() to modules at
all.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

kmalloc_percpu

Post by David S. Mille » Wed, 07 May 2003 07:30:11



   Date: Mon, 5 May 2003 22:02:50 -0700


   > Andrew's example with some module doing kmalloc_percpu() inside
   > of fops->open() is just rediculious.

   crap.  Modules deal with per-device and per-mount objects.  If a module
   cannot use kmalloc_per_cpu on behalf of the primary object which it manages
   then the facility is simply not useful to modules.

Ok then.

Please address the ia64 concerns then :-)  It probably means we
have to stay with the dereferencing stuff...  at which point you
might as well use normal kmalloc() and smp_processor_id() indexing
inside of modules.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

kmalloc_percpu

Post by Andrew Morto » Wed, 07 May 2003 07:50:08



Quote:

> Please address the ia64 concerns then :-)  It probably means we
> have to stay with the dereferencing stuff...  at which point you
> might as well use normal kmalloc() and smp_processor_id() indexing
> inside of modules.

I think so.  So we'd end up with:

- DEFINE_PER_CPU and kmalloc_percpu() work in core kernel, and use the 32k
  pool.

- DEFINE_PER_CPU in modules uses the 32k pool as well (core kernel does the
  allocation).

- kmalloc_per_cpu() is unavailble to modules (it ain't exported).

AFAICT the only thing which will break is sctp, which needs a trivial
conversion to DEFINE_PER_CPU.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/