i386 Topology update

i386 Topology update

Post by Matthew Dobso » Thu, 31 Oct 2002 21:40:05



Linus,
        This patch updates the i386 Topology macros to be much more efficient.
The functions there now are relatively slow searches, and these are
simple array lookups.  #ifdef'd out for non-NUMAQ.

i386_topo_fixup.patch

Updates include/asm-i386/topology.h to read values from an array
populated at boot time with cpu<->node mapping information, rather than
calculating this information each time it is called.

Please apply...

Cheers!

-Matt

[ i386_topology_fixup-2.5.44.patch 5K ]
diff -Nur --exclude-from=/usr/src/.dontdiff linux-2.5.44-vanilla/arch/i386/kernel/smpboot.c linux-2.5.44-i386_topo_fixup/arch/i386/kernel/smpboot.c
--- linux-2.5.44-vanilla/arch/i386/kernel/smpboot.c     Fri Oct 18 21:01:53 2002

        return do_fork(CLONE_VM|CLONE_IDLETASK, 0, &regs, 0, NULL);
 }

+#ifdef CONFIG_X86_NUMAQ
+/* which logical CPUs are on which nodes */
+volatile unsigned long node_2_cpu_mask[MAX_NR_NODES];
+/* which node each logical CPU is on */
+volatile int cpu_2_node[NR_CPUS];
+
+/* Initialize all maps between cpu number and node */
+static inline void init_cpu_to_node_mapping(void)
+{
+       int node, cpu;
+
+       for (node = 0; node < MAX_NR_NODES; node++) {
+               node_2_cpu_mask[node] = 0;
+       }
+       for (cpu = 0; cpu < NR_CPUS; cpu++) {
+               cpu_2_node[cpu] = -1;
+       }
+}
+
+/* set up a mapping between cpu and node. */
+static inline void map_cpu_to_node(int cpu, int node)
+{
+       node_2_cpu_mask[node] |= (1 << cpu);
+       cpu_2_node[cpu] = node;
+}
+
+/* undo a mapping between cpu and node. */
+static inline void unmap_cpu_to_node(int cpu, int node)
+{
+       node_2_cpu_mask[node] &= ~(1 << cpu);
+       cpu_2_node[cpu] = -1;
+}
+#else /* !CONFIG_X86_NUMAQ */
+
+#define init_cpu_to_node_mapping()     ({})
+#define map_cpu_to_node(cpu, node)     ({})
+#define unmap_cpu_to_node(cpu, node)   ({})
+
+#endif /* CONFIG_X86_NUMAQ */
+
 /* which physical APIC ID maps to which logical CPU number */
 volatile int physical_apicid_2_cpu[MAX_APICID];

                cpu_2_physical_apicid[cpu] = -1;
                cpu_2_logical_apicid[cpu] = -1;
        }
+       init_cpu_to_node_mapping();
 }


        if (clustered_apic_mode) {
                logical_apicid_2_cpu[apicid] = cpu;    
                cpu_2_logical_apicid[cpu] = apicid;
+               map_cpu_to_node(cpu, apicid >> 4);
        } else {
                physical_apicid_2_cpu[apicid] = cpu;    

        if (clustered_apic_mode) {
                logical_apicid_2_cpu[apicid] = -1;      
                cpu_2_logical_apicid[cpu] = -1;
+               unmap_cpu_to_node(cpu, apicid >> 4);
        } else {
                physical_apicid_2_cpu[apicid] = -1;    
                cpu_2_physical_apicid[cpu] = -1;
diff -Nur --exclude-from=/usr/src/.dontdiff linux-2.5.44-vanilla/include/asm-i386/smpboot.h linux-2.5.44-i386_topo_fixup/include/asm-i386/smpboot.h
--- linux-2.5.44-vanilla/include/asm-i386/smpboot.h     Fri Oct 18 21:02:31 2002

  #define boot_cpu_apicid boot_cpu_physical_apicid
 #endif /* CONFIG_CLUSTERED_APIC */

+#ifdef CONFIG_X86_NUMAQ
+/*
+ * Mappings between logical cpu number and node number
+ */
+extern volatile unsigned long node_2_cpu_mask[];
+extern volatile int cpu_2_node[];
+#endif /* CONFIG_X86_NUMAQ */
+
 /*
  * Mappings between logical cpu number and logical / physical apicid
  * The first four macros are trivial, but it keeps the abstraction consistent
diff -Nur --exclude-from=/usr/src/.dontdiff linux-2.5.44-vanilla/include/asm-i386/topology.h linux-2.5.44-i386_topo_fixup/include/asm-i386/topology.h
--- linux-2.5.44-vanilla/include/asm-i386/topology.h    Fri Oct 18 21:01:16 2002

 #include <asm/smpboot.h>

 /* Returns the number of the node containing CPU 'cpu' */
-#define __cpu_to_node(cpu) (cpu_to_logical_apicid(cpu) >> 4)
+#define __cpu_to_node(cpu) (cpu_2_node[cpu])

 /* Returns the number of the node containing MemBlk 'memblk' */

    so it is a pretty simple function! */
 #define __parent_node(node) (node)

-/* Returns the number of the first CPU on Node 'node'.
- * This should be changed to a set of cached values
- * but this will do for now.
- */
-static inline int __node_to_first_cpu(int node)
-{
-       int i, cpu, logical_apicid = node << 4;
-
-       for(i = 1; i < 16; i <<= 1)
-               /* check to see if the cpu is in the system */
-               if ((cpu = logical_apicid_to_cpu(logical_apicid | i)) >= 0)
-                       /* if yes, return it to caller */
-                       return cpu;
-
-       BUG(); /* couldn't find a cpu on given node */
-       return -1;
-}
-
-/* Returns a bitmask of CPUs on Node 'node'.
- * This should be changed to a set of cached bitmasks
- * but this will do for now.
- */
-static inline unsigned long __node_to_cpu_mask(int node)
-{
-       int i, cpu, logical_apicid = node << 4;
-       unsigned long mask = 0UL;
-
-       if (sizeof(unsigned long) * 8 < NR_CPUS)
-               BUG();
-
-       for(i = 1; i < 16; i <<= 1)
-               /* check to see if the cpu is in the system */
-               if ((cpu = logical_apicid_to_cpu(logical_apicid | i)) >= 0)
-                       /* if yes, add to bitmask */
-                       mask |= 1 << cpu;
+/* Returns a bitmask of CPUs on Node 'node'. */
+#define __node_to_cpu_mask(node) (node_2_cpu_mask[node])

-       return mask;
-}
+/* Returns the number of the first CPU on Node 'node'. */
+#define __node_to_first_cpu(node) (__ffs(__node_to_cpu_mask(node)))

 /* Returns the number of the first MemBlk on Node 'node' */
 #define __node_to_memblk(node) (node)