Index: sys/arch/amd64/amd64/mainbus.c =================================================================== RCS file: /cvsroot/src/sys/arch/amd64/amd64/mainbus.c,v retrieving revision 1.26 diff -u -p -r1.26 mainbus.c --- sys/arch/amd64/amd64/mainbus.c 10 Nov 2008 14:36:59 -0000 1.26 +++ sys/arch/amd64/amd64/mainbus.c 10 Nov 2008 14:39:06 -0000 @@ -54,11 +54,13 @@ __KERNEL_RCSID(0, "$NetBSD: mainbus.c,v #include "opt_mpbios.h" #include "opt_pcifixup.h" -#include +#include #include #include #include +#include + #if NACPI > 0 #include #endif @@ -92,7 +94,6 @@ union mainbus_attach_args { const char *mba_busname; /* first elem of all */ struct pcibus_attach_args mba_pba; struct isabus_attach_args mba_iba; - struct cpu_attach_args mba_caa; #if NACPI > 0 struct acpibus_attach_args mba_acpi; #endif @@ -133,6 +134,8 @@ int mp_verbose = 0; #endif #endif +int acpi_present = 0; +uint32_t numa_bootflags = 0; /* * Probe for the mainbus; always succeeds. @@ -150,17 +153,15 @@ mainbus_match(device_t parent, cfdata_t void mainbus_attach(device_t parent, device_t self, void *aux) { + struct numa_info *node = NULL; #if NPCI > 0 union mainbus_attach_args mba; #endif -#if NACPI > 0 - int acpi_present = 0; -#endif #ifdef MPBIOS int mpbios_present = 0; #endif + bool numa_faketopology = false; int mpacpi_active = 0; - int numcpus = 0; #if defined(PCI_BUS_FIXUP) int pci_maxbus = 0; #endif @@ -194,33 +195,44 @@ mainbus_attach(device_t parent, device_t #if NACPI > 0 if ((boothowto & RB_MD2) == 0 && acpi_check(self, "acpibus")) acpi_present = acpi_probe(); +#endif + + numa_bootflags = mi_numa_init(); + + if (numa_bootflags & NUMAF_FAKETOPOLOGY) + numa_faketopology = true; + + if (numa_faketopology) { + aprint_normal_dev(self, "fake a one node NUMA system\n"); + + /* Fake a one node NUMA system */ + node = numanode_getbyid_alloc(0, true); + if (!node) + panic("NUMA: can't allocate memory for NUMA node\n"); + } + +#if NACPI > 0 /* * First, see if the MADT contains CPUs, and possibly I/O APICs. * Building the interrupt routing structures can only * be done later (via a callback). */ if (acpi_present) - mpacpi_active = mpacpi_scan_apics(self, &numcpus); + mpacpi_active = mpacpi_scan_apics(self); #endif if (!mpacpi_active) { #ifdef MPBIOS if (mpbios_present) - mpbios_scan(self, &numcpus); - else + mpbios_scan(self); #endif - if (numcpus == 0) { - struct cpu_attach_args caa; - - memset(&caa, 0, sizeof(caa)); - caa.cpu_number = 0; - caa.cpu_role = CPU_ROLE_SP; - caa.cpu_func = 0; - - config_found_ia(self, "cpubus", &caa, mainbus_print); - } } + if (numa_faketopology) + numa_fakememory(node); + + md_numa_init(self); + #if NISADMA > 0 && NACPI > 0 /* * ACPI needs ISA DMA initialized before they start probing. Index: sys/arch/amd64/conf/XEN3_DOM0 =================================================================== RCS file: /cvsroot/src/sys/arch/amd64/conf/XEN3_DOM0,v retrieving revision 1.33 diff -u -p -r1.33 XEN3_DOM0 --- sys/arch/amd64/conf/XEN3_DOM0 21 Oct 2008 10:13:23 -0000 1.33 +++ sys/arch/amd64/conf/XEN3_DOM0 10 Nov 2008 14:39:06 -0000 @@ -178,7 +178,8 @@ config netbsd root on ? type ? mainbus0 at root -cpu* at mainbus? +numa* at mainbus? +cpu* at numa? # IPMI support ipmi0 at mainbus? Index: sys/arch/amd64/conf/files.amd64 =================================================================== RCS file: /cvsroot/src/sys/arch/amd64/conf/files.amd64,v retrieving revision 1.60 diff -u -p -r1.60 files.amd64 --- sys/arch/amd64/conf/files.amd64 11 May 2008 14:44:53 -0000 1.60 +++ sys/arch/amd64/conf/files.amd64 10 Nov 2008 14:39:06 -0000 @@ -87,7 +87,7 @@ file arch/amd64/amd64/bios32.c bios32 n # # XXX BIOS32 only if something that uses it is configured! -device mainbus: isabus, pcibus, bios32, acpibus, cpubus, ioapicbus, ipmibus +device mainbus: isabus, pcibus, bios32, acpibus, ioapicbus, ipmibus, numabus attach mainbus at root file arch/amd64/amd64/mainbus.c mainbus Index: sys/arch/amd64/conf/std.amd64 =================================================================== RCS file: /cvsroot/src/sys/arch/amd64/conf/std.amd64,v retrieving revision 1.6 diff -u -p -r1.6 std.amd64 --- sys/arch/amd64/conf/std.amd64 30 Apr 2008 22:08:18 -0000 1.6 +++ sys/arch/amd64/conf/std.amd64 10 Nov 2008 14:39:06 -0000 @@ -12,6 +12,6 @@ options MTRR options MULTIPROCESSOR mainbus0 at root -cpu* at mainbus? +numa* at mainbus? +cpu* at numa? ioapic* at mainbus? apid ? - Index: sys/arch/amd64/include/numa.h =================================================================== RCS file: sys/arch/amd64/include/numa.h diff -N sys/arch/amd64/include/numa.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/arch/amd64/include/numa.h 10 Nov 2008 14:39:06 -0000 @@ -0,0 +1,3 @@ +/* $NetBSD: $ */ + +#include Index: sys/arch/i386/conf/XEN2_DOM0 =================================================================== RCS file: /cvsroot/src/sys/arch/i386/conf/XEN2_DOM0,v retrieving revision 1.53 diff -u -p -r1.53 XEN2_DOM0 --- sys/arch/i386/conf/XEN2_DOM0 29 Oct 2008 11:56:53 -0000 1.53 +++ sys/arch/i386/conf/XEN2_DOM0 10 Nov 2008 14:39:21 -0000 @@ -187,7 +187,8 @@ config netbsd root on ? type ? mainbus0 at root -cpu* at mainbus? +numa* at mainbus? +cpu* at numa? # IPMI support ipmi0 at mainbus? Index: sys/arch/i386/conf/XEN2_DOMU =================================================================== RCS file: /cvsroot/src/sys/arch/i386/conf/XEN2_DOMU,v retrieving revision 1.19 diff -u -p -r1.19 XEN2_DOMU --- sys/arch/i386/conf/XEN2_DOMU 4 Aug 2008 03:55:47 -0000 1.19 +++ sys/arch/i386/conf/XEN2_DOMU 10 Nov 2008 14:39:21 -0000 @@ -166,7 +166,8 @@ config netbsd root on ? type ? mainbus0 at root -cpu* at mainbus? +numa* at mainbus? +cpu* at numa? hypervisor* at mainbus? # Xen hypervisor Index: sys/arch/i386/conf/XEN3_DOMU =================================================================== RCS file: /cvsroot/src/sys/arch/i386/conf/XEN3_DOMU,v retrieving revision 1.10 diff -u -p -r1.10 XEN3_DOMU --- sys/arch/i386/conf/XEN3_DOMU 11 Nov 2006 20:00:39 -0000 1.10 +++ sys/arch/i386/conf/XEN3_DOMU 10 Nov 2008 14:39:21 -0000 @@ -6,7 +6,8 @@ options XEN3 #Xen 3.x support options XEN_COMPAT_030001 #compatible with Xen3 before 3.0.2 options MAXPHYS=32768 #xbd doesn't handle 64k transfers -no cpu* at mainbus? +no numa* at mainbus? +no cpu* at numa? no xennet* at hypervisor? no xbd* at hypervisor? Index: sys/arch/i386/conf/files.i386 =================================================================== RCS file: /cvsroot/src/sys/arch/i386/conf/files.i386,v retrieving revision 1.338 diff -u -p -r1.338 files.i386 --- sys/arch/i386/conf/files.i386 13 Jun 2008 17:26:33 -0000 1.338 +++ sys/arch/i386/conf/files.i386 10 Nov 2008 14:39:21 -0000 @@ -143,7 +143,7 @@ define vesabiosbus {} # XXX BIOS32 only if something that uses it is configured! device mainbus: isabus, eisabus, mcabus, pcibus, bios32, acpibus, - cpubus, ioapicbus, apmbus, pnpbiosbus, vesabiosbus, ipmibus, + ioapicbus, apmbus, pnpbiosbus, vesabiosbus, ipmibus, numabus, bioscall attach mainbus at root file arch/i386/i386/mainbus.c mainbus Index: sys/arch/i386/conf/std.i386 =================================================================== RCS file: /cvsroot/src/sys/arch/i386/conf/std.i386,v retrieving revision 1.28 diff -u -p -r1.28 std.i386 --- sys/arch/i386/conf/std.i386 30 Apr 2008 15:29:12 -0000 1.28 +++ sys/arch/i386/conf/std.i386 10 Nov 2008 14:39:21 -0000 @@ -17,5 +17,6 @@ options MULTIPROCESSOR # multiprocesso options MPBIOS # configure CPUs and APICs using MPBIOS mainbus0 at root -cpu* at mainbus? +numa* at mainbus? +cpu* at numa? ioapic* at mainbus? Index: sys/arch/i386/i386/mainbus.c =================================================================== RCS file: /cvsroot/src/sys/arch/i386/i386/mainbus.c,v retrieving revision 1.79 diff -u -p -r1.79 mainbus.c --- sys/arch/i386/i386/mainbus.c 10 Nov 2008 14:36:59 -0000 1.79 +++ sys/arch/i386/i386/mainbus.c 10 Nov 2008 14:39:21 -0000 @@ -60,7 +60,7 @@ __KERNEL_RCSID(0, "$NetBSD: mainbus.c,v #include "opt_mpbios.h" #include "opt_pcifixup.h" -#include +#include #include #include #include @@ -119,7 +119,6 @@ union mainbus_attach_args { #if NPNPBIOS > 0 struct pnpbios_attach_args mba_paa; #endif - struct cpu_attach_args mba_caa; struct apic_attach_args aaa_caa; #if NACPI > 0 struct acpibus_attach_args mba_acpi; @@ -181,24 +180,25 @@ mainbus_match(struct device *parent, str return 1; } +int acpi_present = 0; +uint32_t numa_bootflags = 0; + /* * Attach the mainbus. */ void mainbus_attach(struct device *parent, struct device *self, void *aux) { + struct numa_info *node = NULL; union mainbus_attach_args mba; -#if NACPI > 0 - int acpi_present = 0; -#endif #ifdef MPBIOS int mpbios_present = 0; #endif #if defined(PCI_BUS_FIXUP) int pci_maxbus = 0; #endif + bool numa_faketopology = false; int mpacpi_active = 0; - int numcpus = 0; aprint_naive("\n"); aprint_normal("\n"); @@ -229,32 +229,43 @@ mainbus_attach(struct device *parent, st #if NACPI > 0 if ((boothowto & RB_MD2) == 0 && acpi_check(self, "acpibus")) acpi_present = acpi_probe(); +#endif + + numa_bootflags = mi_numa_init(); + + if (numa_bootflags & NUMAF_FAKETOPOLOGY) + numa_faketopology = true; + + if (numa_faketopology) { + aprint_normal_dev(self, "fake a one node NUMA system\n"); + + /* Fake a one node NUMA system */ + node = numanode_getbyid_alloc(0, true); + if (!node) + panic("NUMA: can't allocate memory for NUMA node\n"); + } + +#if NACPI > 0 /* * First, see if the MADT contains CPUs, and possibly I/O APICs. * Building the interrupt routing structures can only * be done later (via a callback). */ if (acpi_present) - mpacpi_active = mpacpi_scan_apics(self, &numcpus); + mpacpi_active = mpacpi_scan_apics(self); #endif if (!mpacpi_active) { #ifdef MPBIOS if (mpbios_present) - mpbios_scan(self, &numcpus); - else + mpbios_scan(self); #endif - if (numcpus == 0) { - struct cpu_attach_args caa; + } - memset(&caa, 0, sizeof(caa)); - caa.cpu_number = 0; - caa.cpu_role = CPU_ROLE_SP; - caa.cpu_func = 0; + if (numa_faketopology) + numa_fakememory(node); - config_found_ia(self, "cpubus", &caa, mainbus_print); - } - } + md_numa_init(self); #if NVESABIOS > 0 if (vbeprobe()) Index: sys/arch/i386/include/numa.h =================================================================== RCS file: sys/arch/i386/include/numa.h diff -N sys/arch/i386/include/numa.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/arch/i386/include/numa.h 10 Nov 2008 14:39:21 -0000 @@ -0,0 +1,3 @@ +/* $NetBSD: $ */ + +#include Index: sys/arch/x86/conf/files.x86 =================================================================== RCS file: /cvsroot/src/sys/arch/x86/conf/files.x86,v retrieving revision 1.44 diff -u -p -r1.44 files.x86 --- sys/arch/x86/conf/files.x86 3 Aug 2008 19:32:03 -0000 1.44 +++ sys/arch/x86/conf/files.x86 10 Nov 2008 14:39:33 -0000 @@ -35,6 +35,15 @@ define ioapicbus { [apid = -1] } define ipmibus {} # +# NUMA +# +device numabus { } +device numa: cpubus +attach numa at numabus +file arch/x86/x86/x86_numa.c numa +file arch/x86/x86/x86_numa_dev.c numa + +# # CPUs # device cpu Index: sys/arch/x86/include/cpu.h =================================================================== RCS file: /cvsroot/src/sys/arch/x86/include/cpu.h,v retrieving revision 1.9 diff -u -p -r1.9 cpu.h --- sys/arch/x86/include/cpu.h 25 Oct 2008 19:13:40 -0000 1.9 +++ sys/arch/x86/include/cpu.h 10 Nov 2008 14:39:33 -0000 @@ -57,6 +57,7 @@ #include #include +#include #include #include @@ -80,6 +81,7 @@ struct device; struct cpu_info { struct device *ci_dev; /* pointer to our device */ struct cpu_info *ci_self; /* self-pointer */ + struct numa_cpu_info *ci_nci; /* back-pointer to numa data */ volatile struct vcpu_info *ci_vcpu; /* for XEN */ void *ci_tlog_base; /* Trap log base */ int32_t ci_tlog_offset; /* Trap log current offset */ Index: sys/arch/x86/include/cpuvar.h =================================================================== RCS file: /cvsroot/src/sys/arch/x86/include/cpuvar.h,v retrieving revision 1.27 diff -u -p -r1.27 cpuvar.h --- sys/arch/x86/include/cpuvar.h 13 May 2008 22:39:17 -0000 1.27 +++ sys/arch/x86/include/cpuvar.h 10 Nov 2008 14:39:33 -0000 @@ -78,10 +78,14 @@ extern const struct cpu_functions mp_cpu #define CPU_ROLE_BP 1 #define CPU_ROLE_AP 2 +struct numa_cpu_info; + struct cpu_attach_args { + const char *cpu_busname; int cpu_number; int cpu_role; const struct cpu_functions *cpu_func; + struct numa_cpu_info *cpu_nci; }; #ifdef _KERNEL Index: sys/arch/x86/include/mpacpi.h =================================================================== RCS file: /cvsroot/src/sys/arch/x86/include/mpacpi.h,v retrieving revision 1.8 diff -u -p -r1.8 mpacpi.h --- sys/arch/x86/include/mpacpi.h 9 Nov 2008 15:34:14 -0000 1.8 +++ sys/arch/x86/include/mpacpi.h 10 Nov 2008 14:39:33 -0000 @@ -5,7 +5,7 @@ struct pcibus_attach_args; -int mpacpi_scan_apics(device_t, int *); +int mpacpi_scan_apics(device_t); int mpacpi_find_interrupts(void *); int mpacpi_pci_attach_hook(device_t, device_t, struct pcibus_attach_args *); Index: sys/arch/x86/include/mpbiosvar.h =================================================================== RCS file: /cvsroot/src/sys/arch/x86/include/mpbiosvar.h,v retrieving revision 1.7 diff -u -p -r1.7 mpbiosvar.h --- sys/arch/x86/include/mpbiosvar.h 9 Nov 2008 15:34:14 -0000 1.7 +++ sys/arch/x86/include/mpbiosvar.h 10 Nov 2008 14:39:33 -0000 @@ -45,7 +45,7 @@ struct pcibus_attach_args; #if defined(_KERNEL) -void mpbios_scan(device_t, int *); +void mpbios_scan(device_t); int mpbios_probe(device_t); int mpbios_pci_attach_hook(device_t, device_t, struct pcibus_attach_args *); Index: sys/arch/x86/include/numa.h =================================================================== RCS file: sys/arch/x86/include/numa.h diff -N sys/arch/x86/include/numa.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/arch/x86/include/numa.h 10 Nov 2008 14:39:33 -0000 @@ -0,0 +1,136 @@ +/* $NetBSD: $ */ +/* + * Copyright (c) 2008 Christoph Egger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _X86_NUMA_H +#define _X86_NUMA_H + +#include + +#define __HAVE_NUMAINFO 1 + +struct device; +struct numa_cpu_info; +struct numa_mem_info; + +/* per numa-node info */ +struct numa_info { + struct device *ni_dev; /* pointer to our device */ + struct numa_info *ni_self; /* self-pointer */ + uint32_t ni_nodeid; + + /* Will be accessed by other NUMA nodes */ + struct numa_data ni_data; /* MI numa node data */ + struct numa_info *ni_next; /* next numa node */ + + uint32_t ni_ncpus; /* number of cpus in this node, + * 0 means cpu-less node */ + struct numa_cpu_info *ni_cpuinfo; /* list of cpus */ + + uint32_t ni_nmemory; /* number of memory entities */ + struct numa_mem_info *ni_meminfo; /* list of memory affinity */ + + /* + * Private members. + */ +}; + +/* per-cpu numa info */ +struct numa_cpu_info { + struct numa_info *nci_numainfo; /* back-pointer to the node */ + struct cpu_info *nci_cpuinfo; /* pointer to cpu_info */ + + struct numa_cpu_data nci_data; /* MI per-cpu numa node data */ + struct numa_cpu_info *nci_next; /* next cpu in this node */ + + uint32_t nci_flags; + + uint8_t nci_acpiid; /* ACPI processor id */ + uint8_t nci_apicid; /* Processor's local apic id */ + uint8_t nci_sapicid; /* SAPIC id */ + uint8_t nci_sapiceid; /* SAPIC eid */ +}; + +/* per-memory numa info */ +struct numa_mem_info { + struct numa_info *nmi_numainfo; /* back-pointer to the node */ + struct numa_mem_info *nmi_next; /* next memory in this node */ + + uint64_t nmi_baseaddress; + uint64_t nmi_length; + uint32_t nmi_mi_type; /* memory types, MI code understands */ + uint32_t nmi_md_type; /* memory types, MD code only understands */ + uint32_t nmi_mi_flags; /* Flags, MI code understands */ + uint32_t nmi_md_flags; /* Flags, MD code only understands */ +}; + +/* MD NUMA memory types */ +#define NUMAMEM_TYPE_NORMAL 0x00000000 +#define NUMAMEM_TYPE_RESERVED 0x00000001 +#define NUMAMEM_TYPE_ACPI 0x00000002 +#define NUMAMEM_TYPE_NVS 0x00000004 + +/* MD NUMA memory flags */ + + +extern uint32_t cpu_bsp_number; +extern uint32_t numa_bootflags; +extern struct numa_info *numa_info_primary; +extern struct numa_info *numa_info_list; + +#define NUMA_INFO_ITERATOR int +#define NUMA_INFO_FOREACH(nii, ni) nii = 0, ni = numa_info_list; \ + ni != NULL; ni = ni->ni_next + + +#define NUMACPU_ITERATOR int +#define NUMACPU_FOREACH(node, ncii, nci) \ + ncii = 0, nci = node->ni_cpuinfo; nci != NULL; nci = nci->nci_next + +#define NUMAMEM_ITERATOR int +#define NUMAMEM_FOREACH(node, nmii, nmi) \ + nmii = 0, nmi = node->ni_meminfo; nmi != NULL; nmi = nmi->nmi_next + +#define NUMAINFO_IS_PRIMARY(ni) ((ni) == numa_info_primary) +#define NUMACPU_IS_PRIMARY(ni) ((ni)->nci_flags & NUMACPU_FLAG_PRIMARY) + +#define curnode() (curcpu()->ci_nci->nci_numainfo) + +uint32_t numa_get_topology(void); +uint32_t numa_get_affinity(void); + +int numa_fakememory(struct numa_info *); + +#define numanode_getbyid(id) numanode_getbyid_alloc((id), false) +struct numa_info *numanode_getbyid_alloc(uint32_t, bool); + +struct numa_cpu_info *numa_cpuinfo_alloc(struct numa_info *); +struct numa_mem_info *numa_meminfo_alloc(struct numa_info *); + +struct numa_cpu_info *numa_cpuinfo_getby_apicid(struct numa_info *, uint8_t); +struct numa_cpu_info *numa_cpuinfo_getby_sapiceid(struct numa_info *, uint8_t); + +#endif Index: sys/arch/x86/include/numavar.h =================================================================== RCS file: sys/arch/x86/include/numavar.h diff -N sys/arch/x86/include/numavar.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/arch/x86/include/numavar.h 10 Nov 2008 14:39:33 -0000 @@ -0,0 +1,36 @@ +/* $NetBSD: $ */ +/* + * Copyright (c) 2008 Christoph Egger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + + +#include +__KERNEL_RCSID(0, "$NetBSD: $"); + +struct numa_attach_args { + const char *naa_busname; /* first elem of all */ +}; + +int md_numa_init(device_t); Index: sys/arch/x86/x86/cpu.c =================================================================== RCS file: /cvsroot/src/sys/arch/x86/x86/cpu.c,v retrieving revision 1.59 diff -u -p -r1.59 cpu.c --- sys/arch/x86/x86/cpu.c 6 Nov 2008 19:29:46 -0000 1.59 +++ sys/arch/x86/x86/cpu.c 10 Nov 2008 14:39:34 -0000 @@ -324,6 +324,10 @@ cpu_attach(device_t parent, device_t sel ci->ci_dev = self; ci->ci_cpuid = caa->cpu_number; ci->ci_func = caa->cpu_func; + ci->ci_nci = caa->cpu_nci; + + KASSERT(ci->ci_nci != NULL); + KASSERT(ci->ci_nci->nci_apicid == lapic_cpu_number()); /* Must be before mi_cpu_attach(). */ cpu_vm_init(ci); Index: sys/arch/x86/x86/mpacpi.c =================================================================== RCS file: /cvsroot/src/sys/arch/x86/x86/mpacpi.c,v retrieving revision 1.71 diff -u -p -r1.71 mpacpi.c --- sys/arch/x86/x86/mpacpi.c 9 Nov 2008 15:34:14 -0000 1.71 +++ sys/arch/x86/x86/mpacpi.c 10 Nov 2008 14:39:34 -0000 @@ -98,7 +98,9 @@ static TAILQ_HEAD(, mpacpi_pcibus) mpacp #endif +#if 0 static int mpacpi_cpuprint(void *, const char *); +#endif static int mpacpi_ioapicprint(void *, const char *); /* acpi_madt_walk callbacks */ @@ -145,17 +147,6 @@ int mpacpi_step; int mpacpi_force; static int -mpacpi_cpuprint(void *aux, const char *pnp) -{ - struct cpu_attach_args *caa = aux; - - if (pnp) - aprint_normal("cpu at %s", pnp); - aprint_normal(" apid %d", caa->cpu_number); - return (UNCONF); -} - -static int mpacpi_ioapicprint(void *aux, const char *pnp) { struct apic_attach_args *aaa = aux; @@ -343,31 +334,53 @@ mpacpi_count(ACPI_SUBTABLE_HEADER *hdrp, static ACPI_STATUS mpacpi_config_cpu(ACPI_SUBTABLE_HEADER *hdrp, void *aux) { +#if 0 device_t parent = aux; - ACPI_MADT_LOCAL_APIC *p; - struct cpu_attach_args caa; - int cpunum = 0; - int locs[CPUBUSCF_NLOCS]; - -#if defined(MULTIPROCESSOR) || defined(IOAPIC) - if (mpacpi_ncpu > 1) - cpunum = lapic_cpu_number(); #endif + ACPI_MADT_LOCAL_APIC *apic; + ACPI_MADT_LOCAL_SAPIC *sapic; + struct numa_cpu_info *nci; + bool fake = false; - if (hdrp->Type == ACPI_MADT_TYPE_LOCAL_APIC) { - p = (ACPI_MADT_LOCAL_APIC *)hdrp; - if (p->LapicFlags & ACPI_MADT_ENABLED) { - if (p->Id != cpunum) - caa.cpu_role = CPU_ROLE_AP; - else - caa.cpu_role = CPU_ROLE_BP; - caa.cpu_number = p->Id; - caa.cpu_func = &mp_cpu_funcs; - locs[CPUBUSCF_APID] = caa.cpu_number; - config_found_sm_loc(parent, "cpubus", locs, - &caa, mpacpi_cpuprint, config_stdsubmatch); - } + if (numa_bootflags & NUMAF_FAKETOPOLOGY) + fake = true; + + switch (hdrp->Type) { + case ACPI_MADT_TYPE_LOCAL_APIC: + apic = (ACPI_MADT_LOCAL_APIC *)hdrp; + if (!apic->LapicFlags & ACPI_MADT_ENABLED) + break; + + if (fake) + nci = numa_cpuinfo_getby_apicid(NULL, 0xff); + else + nci = numa_cpuinfo_getby_apicid(NULL, apic->Id); + KASSERT(nci != NULL); + if (nci->nci_apicid == 0xff) + nci->nci_apicid = apic->Id; + if (nci->nci_acpiid == 0xff) + nci->nci_acpiid = apic->ProcessorId; + break; + + case ACPI_MADT_TYPE_LOCAL_SAPIC: + sapic = (ACPI_MADT_LOCAL_SAPIC *)hdrp; + if (!sapic->LapicFlags & ACPI_MADT_ENABLED) + break; + + if (fake) + nci = numa_cpuinfo_getby_sapiceid(NULL, 0xff); + else + nci = numa_cpuinfo_getby_sapiceid(NULL, sapic->Eid); + KASSERT(nci != NULL); + if (nci->nci_sapiceid == 0xff) + nci->nci_sapiceid = sapic->Eid; + if (nci->nci_sapicid == 0xff) + nci->nci_sapicid = sapic->Id; + if (nci->nci_acpiid == 0xff) + nci->nci_acpiid = sapic->ProcessorId; + break; } + return AE_OK; } @@ -376,31 +389,56 @@ mpacpi_config_ioapic(ACPI_SUBTABLE_HEADE { device_t parent = aux; struct apic_attach_args aaa; - ACPI_MADT_IO_APIC *p; + ACPI_MADT_IO_APIC *ioapic; + ACPI_MADT_IO_SAPIC *iosapic; int locs[IOAPICBUSCF_NLOCS]; - if (hdrp->Type == ACPI_MADT_TYPE_IO_APIC) { - p = (ACPI_MADT_IO_APIC *)hdrp; - aaa.apic_id = p->Id; - aaa.apic_address = p->Address; + switch (hdrp->Type) { + case ACPI_MADT_TYPE_IO_APIC: + ioapic = (ACPI_MADT_IO_APIC *)hdrp; + aaa.apic_id = ioapic->Id; + aaa.apic_address = ioapic->Address; aaa.apic_version = -1; aaa.flags = IOAPIC_VWIRE; - aaa.apic_vecbase = p->GlobalIrqBase; + aaa.apic_vecbase = ioapic->GlobalIrqBase; locs[IOAPICBUSCF_APID] = aaa.apic_id; config_found_sm_loc(parent, "ioapicbus", locs, &aaa, mpacpi_ioapicprint, config_stdsubmatch); + break; + + case ACPI_MADT_TYPE_IO_SAPIC: + aprint_normal_dev(parent, "configuring sioapic\n"); + iosapic = (ACPI_MADT_IO_SAPIC *)hdrp; + aaa.apic_id = iosapic->Id; + aaa.apic_address = iosapic->Address; + aaa.apic_version = -1; + aaa.flags = IOAPIC_VWIRE; + aaa.apic_vecbase = iosapic->GlobalIrqBase; + locs[IOAPICBUSCF_APID] = aaa.apic_id; +#ifdef notyet + config_found_sm_loc(parent, "ioapicbus", locs, &aaa, + mpacpi_ioapicprint, config_stdsubmatch); +#else + aprint_normal_dev(parent, "implement support for io sapic\n"); +#endif + break; } return AE_OK; } int -mpacpi_scan_apics(device_t self, int *ncpup) +mpacpi_scan_apics(device_t self) { int rv = 0; + struct numa_info *node; if (acpi_madt_map() != AE_OK) return 0; + /* If faked or not, a numa node id 0 must always exist */ + node = numanode_getbyid(0); + KASSERT(node != NULL); + mpacpi_ncpu = mpacpi_nintsrc = mpacpi_nioapic = 0; acpi_madt_walk(mpacpi_count, self); @@ -408,6 +446,18 @@ mpacpi_scan_apics(device_t self, int *nc lapic_boot_init(mpacpi_lapic_base); #endif + if (numa_bootflags & NUMAF_FAKETOPOLOGY) { + struct numa_cpu_info *nci; + uint32_t i; + + for (i = 0; i < mpacpi_ncpu; i++) { + nci = numa_cpuinfo_alloc(node); + if (!nci) + panic("%s: can't allocate memory for %i cpu\n", + device_xname(self), i); + } + } + acpi_madt_walk(mpacpi_config_cpu, self); if (mpacpi_ncpu == 0) @@ -426,7 +476,6 @@ mpacpi_scan_apics(device_t self, int *nc #endif rv = 1; done: - *ncpup = mpacpi_ncpu; acpi_madt_unmap(); return rv; } Index: sys/arch/x86/x86/mpbios.c =================================================================== RCS file: /cvsroot/src/sys/arch/x86/x86/mpbios.c,v retrieving revision 1.49 diff -u -p -r1.49 mpbios.c --- sys/arch/x86/x86/mpbios.c 9 Nov 2008 15:34:14 -0000 1.49 +++ sys/arch/x86/x86/mpbios.c 10 Nov 2008 14:39:35 -0000 @@ -492,7 +492,7 @@ static struct mp_bus nmi_bus = { * nintrs */ void -mpbios_scan(device_t self, int *ncpup) +mpbios_scan(device_t self) { const uint8_t *position, *end; int count; @@ -691,16 +691,18 @@ mpbios_scan(device_t self, int *ncpup) mpbios_unmap (&mp_cfg_table_map); } mpbios_scanned = 1; - - *ncpup = mpbios_ncpu; } static void mpbios_cpu(const uint8_t *ent, device_t self) { const struct mpbios_proc *entry = (const struct mpbios_proc *)ent; - struct cpu_attach_args caa; - int locs[CPUBUSCF_NLOCS]; + bool fake = false; + struct numa_info *node; + struct numa_cpu_info *nci; + + if (numa_bootflags & NUMAF_FAKETOPOLOGY) + fake = true; /* XXX move this into the CPU attachment goo. */ /* check for usability */ @@ -709,18 +711,36 @@ mpbios_cpu(const uint8_t *ent, device_t mpbios_ncpu++; + /* If faked or not, a numa node id 0 must always exist */ + node = numanode_getbyid(0); + KASSERT(node != NULL); + + if (fake) { +#if NACPI > 0 + if (mpacpi_ncpu == 0) { +#endif + nci = numa_cpuinfo_alloc(node); + if (!nci) + panic("%s: can't allocate memory for %i cpu\n", + device_xname(self), mpbios_ncpu); +#if NACPI > 0 + } else { + nci = numa_cpuinfo_getby_apicid(NULL, entry->apic_id); + } +#endif + } else { + nci = numa_cpuinfo_getby_apicid(NULL, entry->apic_id); + } + + KASSERT(nci != NULL); + /* check for BSP flag */ if (entry->cpu_flags & PROCENTRY_FLAG_BP) - caa.cpu_role = CPU_ROLE_BP; - else - caa.cpu_role = CPU_ROLE_AP; - - caa.cpu_number = entry->apic_id; - caa.cpu_func = &mp_cpu_funcs; - locs[CPUBUSCF_APID] = caa.cpu_number; + if (cpu_bsp_number == 0xff) + cpu_bsp_number = entry->apic_id; - config_found_sm_loc(self, "cpubus", locs, &caa, mp_cpuprint, - config_stdsubmatch); + if (nci->nci_apicid == 0xff) + nci->nci_apicid = entry->apic_id; } static void Index: sys/arch/x86/x86/x86_numa.c =================================================================== RCS file: sys/arch/x86/x86/x86_numa.c diff -N sys/arch/x86/x86/x86_numa.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/arch/x86/x86/x86_numa.c 10 Nov 2008 14:39:35 -0000 @@ -0,0 +1,549 @@ +/* $NetBSD: $ */ +/* + * Copyright (c) 2008 Christoph Egger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +__KERNEL_RCSID(0, "$NetBSD: $"); + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include /* for ptoa */ + +uint32_t cpu_bsp_number = 0xff; +struct numa_info *numa_info_primary = NULL; +struct numa_info *numa_info_list = NULL; + +#ifdef NUMA_ACPI_DUMP +static void +acpi_srat_dump(ACPI_TABLE_SRAT *srat) +{ + uint32_t srat_pos; + + printf("dumping SRAT table:\n"); + printf("Header: length: 0x%x, revision: 0x%x\n", + srat->Header.Length, srat->Header.Revision); + printf("TableRevision: 0x%x\n", srat->TableRevision); + + /* Content starts right after the header */ + srat_pos = sizeof(ACPI_TABLE_SRAT); + + while (srat_pos < srat->Header.Length) { + ACPI_SUBTABLE_HEADER *subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos); + ACPI_SRAT_CPU_AFFINITY *srat_cpu; + ACPI_SRAT_MEM_AFFINITY *srat_mem; + + srat_pos += subtable->Length; + + switch (subtable->Type) { + case ACPI_SRAT_TYPE_CPU_AFFINITY: + srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable; + printf("SRAT subtable cpu, length: 0x%x\n", + subtable->Length); + printf("SRAT CPU: Node 0x%x\n", + (srat_cpu->ProximityDomainHi[2] << 24) | + (srat_cpu->ProximityDomainHi[1] << 16) | + (srat_cpu->ProximityDomainHi[0] << 8) | + (srat_cpu->ProximityDomainLo)); + printf("SRAT CPU: ProximityDomainLo %x\n", + srat_cpu->ProximityDomainLo); + printf("SRAT CPU: ApicId %x\n", + srat_cpu->ApicId); + printf("SRAT CPU: Flags %x\n", + srat_cpu->Flags); + printf("SRAT CPU: LocalSapicEid %x\n", + srat_cpu->LocalSapicEid); + printf("SRAT CPU: ProximityDomainHi %x, %x, %x\n", + srat_cpu->ProximityDomainHi[0], + srat_cpu->ProximityDomainHi[1], + srat_cpu->ProximityDomainHi[2]); + break; + case ACPI_SRAT_TYPE_MEMORY_AFFINITY: + srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable; + printf("SRAT subtable mem, length: 0x%x\n", + subtable->Length); + printf("SRAT MEM: Node 0x%x\n", + srat_mem->ProximityDomain); + printf("SRAT MEM: BaseAddress 0x%"PRIx64"\n", + srat_mem->BaseAddress); + printf("SRAT MEM: Length 0x%"PRIx64"\n", + srat_mem->Length); + printf("SRAT MEM: MemoryType %x\n", + srat_mem->MemoryType); + printf("SRAT MEM: Flags %x\n", + srat_mem->Flags); + break; + case ACPI_SRAT_TYPE_RESERVED: + printf("SRAT subtable reserved, length: 0x%x\n", + subtable->Length); + break; + } + } + + return; +} +#endif /* NUMA_ACPI_DUMP */ + +static struct numa_info * +numanode_alloc(uint32_t nodeid) +{ + struct numa_info *node, *tmp; + + node = kmem_zalloc(sizeof(*node), KM_NOSLEEP); + if (!node) + return node; + + node->ni_self = node; + node->ni_nodeid = nodeid; + node->ni_next = NULL; + + if (numa_info_list == NULL) { + numa_info_list = node; + } else { + tmp = numa_info_list; + while (tmp->ni_next) + tmp = tmp->ni_next; + + tmp->ni_next = node; + } + + return node; +} + +struct numa_info * +numanode_getbyid_alloc(uint32_t nodeid, bool alloc) +{ + NUMA_INFO_ITERATOR nii; + struct numa_info *ni; + + for (NUMA_INFO_FOREACH(nii, ni)) { + if (ni->ni_nodeid == nodeid) + return ni; + } + + if (!alloc) + return NULL; + + return numanode_alloc(nodeid); +} + + +struct numa_cpu_info * +numa_cpuinfo_alloc(struct numa_info *ni) +{ + struct numa_cpu_info *nci, *tmp; + + nci = kmem_zalloc(sizeof(*nci), KM_NOSLEEP); + if (!nci) + return NULL; + + nci->nci_numainfo = ni; + nci->nci_acpiid = nci->nci_apicid = 0xff; + nci->nci_sapicid = nci->nci_sapiceid = 0xff; + if (ni->ni_cpuinfo == NULL) { + ni->ni_cpuinfo = nci; + ni->ni_ncpus = 1; + } else { + tmp = ni->ni_cpuinfo; + while (tmp->nci_next) + tmp = tmp->nci_next; + + tmp->nci_next = nci; + ni->ni_ncpus++; + } + + return nci; +} + +struct numa_cpu_info * +numa_cpuinfo_getby_apicid(struct numa_info *ni, uint8_t apicid) +{ + NUMA_INFO_ITERATOR nii; + struct numa_cpu_info *nci; + NUMACPU_ITERATOR ncii; + + if (ni == NULL) { + for (NUMA_INFO_FOREACH(nii, ni)) { + for (NUMACPU_FOREACH(ni, ncii, nci)) { + if (nci->nci_apicid == apicid) + return nci; + } + } + return NULL; + } + + + for (NUMACPU_FOREACH(ni, ncii, nci)) { + if (nci->nci_apicid == apicid) + return nci; + } + + return NULL; +} + +struct numa_cpu_info * +numa_cpuinfo_getby_sapiceid(struct numa_info *ni, uint8_t sapiceid) +{ + NUMA_INFO_ITERATOR nii; + struct numa_cpu_info *nci; + NUMACPU_ITERATOR ncii; + + if (ni == NULL) { + for (NUMA_INFO_FOREACH(nii, ni)) { + for (NUMACPU_FOREACH(ni, ncii, nci)) { + if (nci->nci_sapiceid == sapiceid) + return nci; + } + } + return NULL; + } + + + for (NUMACPU_FOREACH(ni, ncii, nci)) { + if (nci->nci_sapiceid == sapiceid) + return nci; + } + + return NULL; +} + +struct numa_mem_info * +numa_meminfo_alloc(struct numa_info *ni) +{ + struct numa_mem_info *nmi, *tmp; + + nmi = kmem_zalloc(sizeof(*nmi), KM_NOSLEEP); + if (!nmi) + return NULL; + + nmi->nmi_numainfo = ni; + if (ni->ni_meminfo == NULL) { + ni->ni_meminfo = nmi; + ni->ni_nmemory = 1; + } else { + tmp = ni->ni_meminfo; + while (tmp->nmi_next) + tmp = tmp->nmi_next; + + tmp->nmi_next = nmi; + ni->ni_nmemory++; + } + + return nmi; +} + + +static bool +numa_acpi_srat_parse(ACPI_TABLE_SRAT *srat) +{ + ACPI_SRAT_CPU_AFFINITY *srat_cpu; + ACPI_SRAT_MEM_AFFINITY *srat_mem; + ACPI_SUBTABLE_HEADER *subtable; + struct numa_info *node = NULL; + struct numa_cpu_info *nci_node = NULL; + struct numa_mem_info *nmi_node = NULL; + uint32_t numaid; + uint32_t srat_pos; + + KASSERT(srat != NULL); + +#ifdef NUMA_ACPI_DUMP + acpi_srat_dump(srat); +#endif + + /* Content starts right after the header */ + srat_pos = sizeof(ACPI_TABLE_SRAT); + + while (srat_pos < srat->Header.Length) { + subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos); + srat_pos += subtable->Length; + + switch (subtable->Type) { + case ACPI_SRAT_TYPE_CPU_AFFINITY: + srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable; + numaid = (srat_cpu->ProximityDomainHi[2] << 24) | + (srat_cpu->ProximityDomainHi[1] << 16) | + (srat_cpu->ProximityDomainHi[0] << 8) | + (srat_cpu->ProximityDomainLo); + + node = numanode_getbyid_alloc(numaid, true); + if (!node) + panic("acpi_srat_parse: no memory for NUMA (cpu node)\n"); + nci_node = numa_cpuinfo_alloc(node); + if (!nci_node) + panic("acpi_srat_parse: no memory for NUMA (cpu node2)\n"); + + nci_node->nci_apicid = srat_cpu->ApicId; + nci_node->nci_sapiceid = srat_cpu->LocalSapicEid; + + break; + case ACPI_SRAT_TYPE_MEMORY_AFFINITY: + srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable; + + numaid = srat_mem->ProximityDomain; + node = numanode_getbyid_alloc(numaid, true); + if (!node) + panic("acpi_srat_parse: no memory for NUMA (memory node)\n"); + + nmi_node = numa_meminfo_alloc(node); + if (!nmi_node) + panic("acpi_srat_parse: no memory for NUMA (memory node2)\n"); + + nmi_node->nmi_baseaddress = srat_mem->BaseAddress; + nmi_node->nmi_length = srat_mem->Length; + + switch (srat_mem->MemoryType) { + case ACPI_ADDRESS_RANGE_MEMORY: + nmi_node->nmi_md_type = NUMAMEM_TYPE_NORMAL; + break; + case ACPI_ADDRESS_RANGE_RESERVED: + nmi_node->nmi_md_type = NUMAMEM_TYPE_RESERVED; + break; + case ACPI_ADDRESS_RANGE_ACPI: + nmi_node->nmi_md_type = NUMAMEM_TYPE_ACPI; + break; + case ACPI_ADDRESS_RANGE_NVS: + nmi_node->nmi_md_type = NUMAMEM_TYPE_NVS; + break; + } + + nmi_node->nmi_mi_flags = NUMAMEM_FLAG_NORMAL; + if (srat_mem->Flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) + nmi_node->nmi_mi_flags |= NUMAMEM_FLAG_HOTPLUG; + if (srat_mem->Flags & ACPI_SRAT_MEM_NON_VOLATILE) + nmi_node->nmi_mi_flags |= NUMAMEM_FLAG_NONVOLATILE; + break; + case ACPI_SRAT_TYPE_RESERVED: + printf("SRAT subtable reserved, length: 0x%x\n", + subtable->Length); + break; + } + } + + return true; +} + +#ifdef NUMA_ACPI_DUMP +static void +acpi_slit_dump(ACPI_TABLE_SLIT *slit) +{ + uint32_t i; + + printf("dumping SLIT table:\n"); + printf("Header: length: 0x%x, revision: 0x%x\n", + slit->Header.Length, slit->Header.Revision); + printf("LocalityCount: %"PRIx64"\n", slit->LocalityCount); + + i = 0; + while (i < (slit->LocalityCount * slit->LocalityCount)) { + printf("%2u ", slit->Entry[i]); + i++; + if ((i % slit->LocalityCount) == 0) + printf("\n"); + } + + return; +} +#endif /* NUMA_ACPI_DUMP */ + +static bool +numa_acpi_slit_parse(ACPI_TABLE_SLIT *slit) +{ + + KASSERT(slit != NULL); + +#ifdef NUMA_ACPI_DUMP + acpi_slit_dump(slit); +#endif + + return false; +} + +static ACPI_TABLE_HEADER * +numa_acpi_find_table(ACPI_CONST_STRING signature) +{ + ACPI_TABLE_HEADER *table; + ACPI_STATUS rv; + + rv = AcpiGetTable(signature, 1, (ACPI_TABLE_HEADER **)&table); + if (ACPI_FAILURE(rv)) + return NULL; + + /* Check if header is valid */ + if (table == NULL) + return NULL; + + if (table->Length == 0xffffffff) + return NULL; + + return table; +} + +extern int acpi_present; + +uint32_t +numa_get_topology(void) +{ + bool srat_present, srat_parsed; + ACPI_TABLE_SRAT *srat; + uint32_t flag = 0; + + if (!acpi_present) { + flag |= NUMAF_FAKETOPOLOGY; + flag |= NUMAF_SCANTOPOLOGY; + goto out; + } + + /* find SRAT table */ + srat = (ACPI_TABLE_SRAT *)numa_acpi_find_table(ACPI_SIG_SRAT); + srat_present = (srat) ? true : false; + + printf("NUMA: SRAT table %s\n", srat ? "found" : "not found"); + + if (!srat_present) { + flag |= NUMAF_FAKETOPOLOGY; + flag |= NUMAF_SCANTOPOLOGY; + goto out; + } + + srat_parsed = numa_acpi_srat_parse(srat); + if (!srat_parsed) { + printf("NUMA: couldn't parse SRAT table\n"); + flag |= NUMAF_FAKETOPOLOGY; + flag |= NUMAF_SCANTOPOLOGY; + goto out; + } + +out: + return flag; +} + +uint32_t +numa_get_affinity(void) +{ + bool slit_present, slit_parsed; + ACPI_TABLE_SLIT *slit; + uint32_t flag = 0; + + if (!acpi_present) { + flag |= NUMAF_PROBEAFFINITY; + goto out; + } + + /* find SLIT table */ + slit = (ACPI_TABLE_SLIT *)numa_acpi_find_table(ACPI_SIG_SLIT); + slit_present = (slit) ? true : false; + + printf("NUMA: SLIT table %s\n", slit ? "found" : "not found"); + + if (!slit_present) { + flag |= NUMAF_PROBEAFFINITY; + goto out; + } + + slit_parsed = numa_acpi_slit_parse(slit); + if (!slit_parsed) { + printf("NUMA: couldn't parse SLIT table\n"); + flag |= NUMAF_PROBEAFFINITY; + goto out; + } + +out: + return flag; +} + +#define NUMAMEM_COUNT 2 +struct numa_mem_info meminfo[] = { + { NULL, NULL, 0x0, 0xa0000, /* 0 - 640 KB */ + NUMAMEM_TYPE_PHYSICAL, NUMAMEM_TYPE_NORMAL, + NUMAMEM_FLAG_NORMAL, 0 }, + { NULL, NULL, 0x100000, 0xffffffffffefffffULL, /* 1 MB - total memory */ + NUMAMEM_TYPE_PHYSICAL, NUMAMEM_TYPE_NORMAL, + NUMAMEM_FLAG_NORMAL, 0 }, +}; + +int +numa_fakememory(struct numa_info *node) +{ + struct numa_mem_info *nmi; + uint64_t totalmemory; + uint64_t start, end; + uint32_t i; + + KASSERT(node != NULL); + KASSERT(numa_bootflags & NUMAF_FAKETOPOLOGY); + + totalmemory = ptoa(physmem); + + for (i = 0; i < NUMAMEM_COUNT; i++) { + start = meminfo[i].nmi_baseaddress; + end = start + meminfo[i].nmi_length; + if (totalmemory < end) + end = totalmemory; + + nmi = numa_meminfo_alloc(node); + if (!nmi) + panic("NUMA: can't store memory info\n"); + + nmi->nmi_baseaddress = start; + nmi->nmi_length = end; + nmi->nmi_mi_type = meminfo[i].nmi_mi_type; + nmi->nmi_md_type = meminfo[i].nmi_md_type; + nmi->nmi_mi_flags = meminfo[i].nmi_mi_type; + nmi->nmi_md_flags = meminfo[i].nmi_md_flags; + } + + printf("NUMA: faked memory has no PCI hole. Take care!\n"); + + return 0; +} + +int +md_numa_init(device_t self) +{ + NUMA_INFO_ITERATOR nii; + struct numa_info *ni; + + if (cpu_bsp_number == 0xff) + cpu_bsp_number = lapic_cpu_number(); + + for (NUMA_INFO_FOREACH(nii, ni)) { + config_found_ia(self, "numabus", NULL, NULL); + } + + return 0; +} Index: sys/arch/x86/x86/x86_numa_dev.c =================================================================== RCS file: sys/arch/x86/x86/x86_numa_dev.c diff -N sys/arch/x86/x86/x86_numa_dev.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/arch/x86/x86/x86_numa_dev.c 10 Nov 2008 14:39:35 -0000 @@ -0,0 +1,159 @@ +/* $NetBSD: $ */ +/* + * Copyright (c) 2008 Christoph Egger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +__KERNEL_RCSID(0, "$NetBSD: $"); + +#include +#include +#include +#include +#include + +#include + +static int numa_match(device_t, cfdata_t, void *); +static void numa_attach(device_t, device_t, void *); +static int numabus_print(void *, const char *); + +CFATTACH_DECL_NEW(numa, 0, numa_match, numa_attach, NULL, NULL); + + +/* + * Probe for the numa device always succeeds. + */ +static int +numa_match(device_t parent, cfdata_t match, void *aux) +{ + return 1; +} + +/* + * Attach the numa. + */ +static void +numa_attach(device_t parent, device_t self, void *aux) +{ + struct numa_info *node; + uint32_t nodeid; + NUMACPU_ITERATOR ncii; + NUMAMEM_ITERATOR nmii; + struct numa_cpu_info *nci; + struct numa_mem_info *nmi; + + aprint_naive("\n"); + aprint_normal("\n"); + + nodeid = device_unit(self); + node = numanode_getbyid(nodeid); + + KASSERT(node != NULL); + + node->ni_dev = self; + + mi_numa_attach(node); + + /* A node w/o cpus is valid. Large NUMA machines + * have (dedicated) cpu-less nodes. + */ + for (NUMACPU_FOREACH(node, ncii, nci)) { + struct cpu_attach_args caa; + + memset(&caa, 0, sizeof(caa)); + caa.cpu_number = nci->nci_apicid; + caa.cpu_func = &mp_cpu_funcs; + caa.cpu_nci = nci; + if (caa.cpu_number == cpu_bsp_number) { + caa.cpu_role = CPU_ROLE_SP; + if (((boothowto & RB_MD1) != RB_MD1) + && (node->ni_ncpus > 1)) + caa.cpu_role = CPU_ROLE_BP; + + KASSERT(numa_info_primary == NULL); + numa_info_primary = node; + } else { + caa.cpu_role = CPU_ROLE_AP; + } + + config_found_ia(self, "cpubus", &caa, numabus_print); + } + + for (NUMAMEM_FOREACH(node, nmii, nmi)) { + aprint_normal_dev(self, + "memory: 0x%"PRIx64" - 0x%"PRIx64 + " (0x%"PRIx64", %s, %s, %s)\n", + nmi->nmi_baseaddress, + nmi->nmi_baseaddress + nmi->nmi_length, + nmi->nmi_length, + (nmi->nmi_mi_type == NUMAMEM_TYPE_VIRTUAL) ? + "virtual" : "physical", + (nmi->nmi_mi_flags & NUMAMEM_FLAG_HOTPLUG) ? + "hotplug" : (nmi->nmi_mi_flags & NUMAMEM_FLAG_NONVOLATILE) ? + "nonvolatile" : "raw", + (nmi->nmi_md_type == NUMAMEM_TYPE_RESERVED) ? + "reserved" : (nmi->nmi_md_type == NUMAMEM_TYPE_ACPI) ? + "acpi" : (nmi->nmi_md_type == NUMAMEM_TYPE_NVS) ? + "nvs" : "raw" + ); + } + + if (!pmf_device_register(self, NULL, NULL)) + aprint_error_dev(self, "couldn't establish power handler\n"); + +} + +static int +numabus_print(void *aux, const char *pnp) +{ + struct cpu_attach_args *caa = aux; + + if (pnp) + aprint_normal("cpuX at %s", pnp); + aprint_normal(" apic %u", caa->cpu_nci->nci_apicid); + + aprint_debug(" acpi %u sapicid %u sapiceid %u", + caa->cpu_nci->nci_acpiid, + caa->cpu_nci->nci_sapicid, + caa->cpu_nci->nci_sapiceid); + + switch (caa->cpu_role) { + case CPU_ROLE_BP: + aprint_normal(" (BP)"); + break; + case CPU_ROLE_SP: + aprint_normal(" (SP)"); + break; + case CPU_ROLE_AP: + aprint_normal(" (AP)"); + break; + default: + aprint_normal(" (unknown)"); + break; + } + + return UNCONF; +} Index: sys/arch/xen/conf/files.xen =================================================================== RCS file: /cvsroot/src/sys/arch/xen/conf/files.xen,v retrieving revision 1.88 diff -u -p -r1.88 files.xen --- sys/arch/xen/conf/files.xen 3 Aug 2008 19:32:03 -0000 1.88 +++ sys/arch/xen/conf/files.xen 10 Nov 2008 14:39:35 -0000 @@ -158,15 +158,24 @@ define vesabiosbus {} define hypervisorbus {} define xendevbus {} define ipmibus {} +device numabus { } # # System bus types # -device mainbus: cpubus, ioapicbus, hypervisorbus, bios32, ipmibus +device mainbus: ioapicbus, hypervisorbus, bios32, ipmibus, numabus attach mainbus at root file arch/xen/x86/mainbus.c mainbus +# +# NUMA +# +device numa: cpubus +attach numa at numabus +file arch/x86/x86/x86_numa.c numa +file arch/x86/x86/x86_numa_dev.c numa + # Xen hypervisor device hypervisor { [apid = -1]}: isabus, pcibus, sysmon_power, xendevbus, acpibus attach hypervisor at hypervisorbus Index: sys/arch/xen/x86/mainbus.c =================================================================== RCS file: /cvsroot/src/sys/arch/xen/x86/mainbus.c,v retrieving revision 1.6 diff -u -p -r1.6 mainbus.c --- sys/arch/xen/x86/mainbus.c 9 Nov 2008 14:24:14 -0000 1.6 +++ sys/arch/xen/x86/mainbus.c 10 Nov 2008 14:39:35 -0000 @@ -51,7 +51,7 @@ __KERNEL_RCSID(0, "$NetBSD: mainbus.c,v #include "ipmi.h" -#include +#include #include #include @@ -91,6 +91,7 @@ int mp_verbose = 0; #endif /* defined(MPBIOS) || NACPI > 0 */ #endif /* defined(XEN3) && NPCI > 0 */ +uint32_t numa_bootflags = 0; int mainbus_match(device_t, cfdata_t, void *); void mainbus_attach(device_t, device_t, void *); @@ -102,7 +103,6 @@ int mainbus_print(void *, const char *); union mainbus_attach_args { const char *mba_busname; /* first elem of all */ - struct cpu_attach_args mba_caa; #if NHYPERVISOR > 0 struct hypervisor_attach_args mba_haa; #endif @@ -129,12 +129,14 @@ mainbus_attach(device_t parent, device_t { union mainbus_attach_args mba; #if defined(DOM0OPS) && defined(XEN3) - int numcpus = 0; + bool numa_faketopology = false; #ifdef MPBIOS + struct numa_info *node = NULL; int mpbios_present = 0; #endif #endif /* defined(DOM0OPS) && defined(XEN3) */ + aprint_naive("\n"); aprint_normal("\n"); @@ -165,25 +167,38 @@ mainbus_attach(device_t parent, device_t #endif /* PCI_BUS_FIXUP */ #if NACPI > 0 acpi_present = acpi_probe(); +#endif + numa_bootflags = mi_numa_init(); + + if (numa_bootflags & NUMAF_FAKETOPOLOGY) + numa_faketopology = true; + + if (numa_faketopology) { + aprint_normal_dev(self, "fake a one node NUMA system\n"); + + /* Fake a one node NUMA system */ + node = numanode_getbyid_alloc(0, true); + if (!node) + panic("NUMA: can't allocate memory for NUMA node\n"); + } + +#if NACPI > 0 if (acpi_present) - mpacpi_active = mpacpi_scan_apics(self, &numcpus); + mpacpi_active = mpacpi_scan_apics(self); if (!mpacpi_active) #endif { #ifdef MPBIOS if (mpbios_present) - mpbios_scan(self, &numcpus); - else + mpbios_scan(self); #endif - if (numcpus == 0) { - memset(&mba.mba_caa, 0, sizeof(mba.mba_caa)); - mba.mba_caa.cpu_number = 0; - mba.mba_caa.cpu_role = CPU_ROLE_SP; - mba.mba_caa.cpu_func = 0; - config_found_ia(self, "cpubus", - &mba.mba_caa, mainbus_print); - } } + + if (numa_faketopology) + numa_fakememory(node); + + md_numa_init(self); + #if NIOAPIC > 0 ioapic_enable(); #endif Index: sys/conf/files =================================================================== RCS file: /cvsroot/src/sys/conf/files,v retrieving revision 1.924 diff -u -p -r1.924 files --- sys/conf/files 15 Oct 2008 06:51:20 -0000 1.924 +++ sys/conf/files 10 Nov 2008 14:39:39 -0000 @@ -1393,6 +1393,7 @@ file kern/kern_malloc_stdtype.c file kern/kern_malloc_debug.c malloc_debug file kern/kern_module.c file kern/kern_mutex.c +file kern/kern_numa.c numa file kern/kern_fileassoc.c fileassoc file kern/kern_ntptime.c file kern/kern_pax.c pax_mprotect | pax_segvguard Index: sys/kern/kern_numa.c =================================================================== RCS file: sys/kern/kern_numa.c diff -N sys/kern/kern_numa.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/kern/kern_numa.c 10 Nov 2008 14:39:47 -0000 @@ -0,0 +1,136 @@ +/* $NetBSD: $ */ + +/* + * Copyright (c) 2007, 2008 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Andrew Doran. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD: $"); + +#include +#include +#include +#include +#include +#include +#include +#include + +void numactlattach(int); + +dev_type_ioctl(numactl_ioctl); + +const struct cdevsw numactl_cdevsw = { + nullopen, nullclose, nullread, nullwrite, numactl_ioctl, + nullstop, notty, nopoll, nommap, nokqfilter, + D_OTHER | D_MPSAFE +}; + +kmutex_t numa_lock; +int nnuma; +struct numaqueue numa_queue = CIRCLEQ_HEAD_INITIALIZER(numa_queue); + +static struct numa_info *numa_infos[MAX_NUMA_NODES]; + +uint32_t +mi_numa_init(void) +{ + uint32_t flag = 0; + + /* Try to get the topology from the firmware first. + * If that fails, we fake a NUMA system with one + * node with all CPUs and all memory on it. + */ + flag |= numa_get_topology(); + + /* Try to get the cpu/memory affinity from the firmware first. + * If that fails, we must attach NUMA devices (i.e. CPUs) + * first and then do some probing/meassuring. + */ + flag |= numa_get_affinity(); + + return flag; +} + +int +mi_numa_attach(struct numa_info *ni) +{ + struct numa_data *nd; + + nd = &ni->ni_data; + nd->ni_index = nnuma; + + numa_infos[numa_index(ni)] = ni; + CIRCLEQ_INSERT_TAIL(&numa_queue, ni, ni_data.numa_qchain); +#if 0 + TAILQ_INIT(&ni->ni_data.numa_ld_locks); + __numa_simple_lock_init(&ni->ni_data.numa_ld_lock); +#endif + +#if notyet + sched_numaattach(ni); +#endif + + nnuma++; + + return 0; +} + +void +numactlattach(int dummy) +{ + +} + +int +numactl_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l) +{ +#if 0 + NUMA_INFO_ITERATOR nii; + struct numa_info *ni; + int error, i; + u_int id; + + error = 0; +#endif + + int error = 0; + + return error; +} + +struct numa_info * +numa_lookup(uint32_t idx) +{ + struct numa_info *ni = numa_infos[idx]; + + KASSERT(idx < __arraycount(numa_infos)); + KASSERT(ni == NULL || numa_index(ni) == idx); + + return ni; +} Index: sys/sys/numa.h =================================================================== RCS file: sys/sys/numa.h diff -N sys/sys/numa.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/sys/numa.h 10 Nov 2008 14:39:48 -0000 @@ -0,0 +1,81 @@ +/* $NetBSD: $ */ + +/* + * Copyright (c) 2008 Christoph Egger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _SYS_NUMA_H_ +#define _SYS_NUMA_H_ + +#include + + +#ifndef NUMA_INFO_ITERATOR +#define NUMA_INFO_ITERATOR int +#define NUMA_INFO_FOREACH(nii, ni) \ + (void)nii, ni = curnode(); ni != NULL; ni = NULL +#endif + +#ifndef NUMAINFO_IS_PRIMARY +#define NUMAINFO_IS_PRIMARY(ni) ((void)ni, 1) +#endif + +/* MI NUMA flags */ +#define NUMAF_FAKETOPOLOGY 0x1 /* fake a NUMA topology */ +#define NUMAF_SCANTOPOLOGY 0x2 /* scan for cpu/memory devices + * and build topology based on + * the findings. + */ +#define NUMAF_PROBEAFFINITY 0x4 /* probe affinity between nodes */ + +/* MI NUMA cpu types */ + +/* MI NUMA cpu flags */ +#define NUMACPU_FLAG_PRIMARY 0x00000001 + +/* MI NUMA memory types */ +#define NUMAMEM_TYPE_PHYSICAL 0x00000000 +#define NUMAMEM_TYPE_VIRTUAL 0x00000001 + +/* MI NUMA memory flags */ +#define NUMAMEM_FLAG_NORMAL 0x00000000 +#define NUMAMEM_FLAG_HOTPLUG 0x00000001 +#define NUMAMEM_FLAG_NONVOLATILE 0x00000002 + +struct numa_info *numa_lookup(uint32_t); + +CIRCLEQ_HEAD(numaqueue, numa_info); + +extern kmutex_t numa_lock; +extern uint32_t maxnuma; +extern struct numaqueue numa_queue; + +static inline uint32_t +numa_index(struct numa_info *ni) +{ + return ni->ni_data.ni_index; +} + +#endif /* !_SYS_NUMA_H_ */ Index: sys/sys/numa_data.h =================================================================== RCS file: sys/sys/numa_data.h diff -N sys/sys/numa_data.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/sys/numa_data.h 10 Nov 2008 14:39:48 -0000 @@ -0,0 +1,84 @@ +/* $NetBSD: $ */ + +/* + * Copyright (c) 2008 Christoph Egger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_NUMA_DATA_H_ +#define _SYS_NUMA_DATA_H_ + +#include + +/* + * MI per numa-node data + * + * this structure is intended to be included in MD numa_info structure. + * struct numa_info { + * struct numa_data ni_data; + * } + * + * note that numa_data is not expected to contain much data, + * as numa_info is size-limited on most ports. + */ + +struct numa_data { + /* + * The first section is likely to be touched by other NUMAs + */ + CIRCLEQ_ENTRY(numa_info) numa_qchain; /* circleq of all NUMAs */ + + /* + * This section is mostly NUMA-private. + */ + uint32_t ni_index; /* NUMA node index */ +}; + +/* + * MI per cpu numa-node data + * + * this structure is intended to be included in MD numa_cpu_info structure. + * struct numa_cpu_info { + * struct numa_cpu_data nci_data; + * } + * + * note that numa_cpu_data is not expected to contain much data, + * as numa_cpu_info is size-limited on most ports. + */ + +struct numa_cpu_data { + uint32_t dummy; +}; + +int mi_numa_attach(struct numa_info *); + +#define NUMAF_FAKETOPOLOGY 0x1 /* fake a NUMA topology */ +#define NUMAF_SCANTOPOLGY 0x2 /* scan for cpu/memory devices + * and build topology based on + * the findings. + */ +#define NUMAF_PROBEAFFINITY 0x4 /* probe affinity between nodes */ +uint32_t mi_numa_init(void); + +#endif /* _SYS_NUMA_DATA_H_ */ Index: sys/sys/param.h =================================================================== RCS file: /cvsroot/src/sys/sys/param.h,v retrieving revision 1.331 diff -u -p -r1.331 param.h --- sys/sys/param.h 31 Oct 2008 00:12:51 -0000 1.331 +++ sys/sys/param.h 10 Nov 2008 14:39:48 -0000 @@ -174,6 +174,9 @@ #ifndef MAXCPUS #define MAXCPUS 32 #endif +#ifndef MAX_NUMA_NODES +#define MAX_NUMA_NODES 32 +#endif #ifndef MAX_LWP_PER_PROC #define MAX_LWP_PER_PROC 8000 #endif