svn commit: r297748 - in head/sys: conf dev/acpica kern vm x86/acpica

John Baldwin jhb at FreeBSD.org
Sat Apr 9 13:58:06 UTC 2016


Author: jhb
Date: Sat Apr  9 13:58:04 2016
New Revision: 297748
URL: https://svnweb.freebsd.org/changeset/base/297748

Log:
  Add more fine-grained kernel options for NUMA support.
  
  VM_NUMA_ALLOC is used to enable use of domain-aware memory allocation in
  the virtual memory system.  DEVICE_NUMA is used to enable affinity
  reporting for devices such as bus_get_domain().
  
  MAXMEMDOM must still be set to a value greater than for any NUMA support
  to be effective.  Note that 'cpuset -gd' always works if MAXMEMDOM is
  enabled and the system supports NUMA.
  
  Reviewed by:	kib
  Differential Revision:	https://reviews.freebsd.org/D5782

Modified:
  head/sys/conf/NOTES
  head/sys/conf/options
  head/sys/dev/acpica/acpi.c
  head/sys/dev/acpica/acpivar.h
  head/sys/kern/kern_cpuset.c
  head/sys/vm/vm_domain.c
  head/sys/vm/vm_pageout.c
  head/sys/vm/vm_phys.c
  head/sys/vm/vm_phys.h
  head/sys/x86/acpica/srat.c

Modified: head/sys/conf/NOTES
==============================================================================
--- head/sys/conf/NOTES	Sat Apr  9 13:32:42 2016	(r297747)
+++ head/sys/conf/NOTES	Sat Apr  9 13:58:04 2016	(r297748)
@@ -229,7 +229,15 @@ options 	MAXCPU=32
 
 # MAXMEMDOM defines the maximum number of memory domains that can boot in the
 # system.  A default value should already be defined by every architecture.
-options 	MAXMEMDOM=1
+options 	MAXMEMDOM=2
+
+# VM_NUMA_ALLOC enables use of memory domain-aware allocation in the VM
+# system.
+options 	VM_NUMA_ALLOC
+
+# DEVICE_NUMA enables reporting of domain affinity of I/O devices via
+# bus_get_domain(), etc.
+options 	DEVICE_NUMA
 
 # ADAPTIVE_MUTEXES changes the behavior of blocking mutexes to spin
 # if the thread that currently owns the mutex is executing on another

Modified: head/sys/conf/options
==============================================================================
--- head/sys/conf/options	Sat Apr  9 13:32:42 2016	(r297747)
+++ head/sys/conf/options	Sat Apr  9 13:58:04 2016	(r297748)
@@ -90,6 +90,7 @@ COMPAT_LINUXKPI	opt_compat.h
 COMPILING_LINT	opt_global.h
 CY_PCI_FASTINTR
 DEADLKRES	opt_watchdog.h
+DEVICE_NUMA
 EXT_RESOURCES	opt_global.h
 DIRECTIO
 FILEMON		opt_dontuse.h
@@ -603,6 +604,7 @@ VM_KMEM_SIZE		opt_vm.h
 VM_KMEM_SIZE_SCALE	opt_vm.h
 VM_KMEM_SIZE_MAX	opt_vm.h
 VM_NRESERVLEVEL		opt_vm.h
+VM_NUMA_ALLOC		opt_vm.h
 VM_LEVEL_0_ORDER	opt_vm.h
 NO_SWAPPING		opt_vm.h
 MALLOC_MAKE_FAILURES	opt_vm.h

Modified: head/sys/dev/acpica/acpi.c
==============================================================================
--- head/sys/dev/acpica/acpi.c	Sat Apr  9 13:32:42 2016	(r297747)
+++ head/sys/dev/acpica/acpi.c	Sat Apr  9 13:58:04 2016	(r297748)
@@ -31,6 +31,8 @@
 __FBSDID("$FreeBSD$");
 
 #include "opt_acpi.h"
+#include "opt_device_numa.h"
+
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
@@ -1083,7 +1085,7 @@ acpi_hint_device_unit(device_t acdev, de
 int
 acpi_parse_pxm(device_t dev, int *domain)
 {
-#if MAXMEMDOM > 1
+#ifdef DEVICE_NUMA
 	ACPI_HANDLE h;
 	int d, pxm;
 

Modified: head/sys/dev/acpica/acpivar.h
==============================================================================
--- head/sys/dev/acpica/acpivar.h	Sat Apr  9 13:32:42 2016	(r297747)
+++ head/sys/dev/acpica/acpivar.h	Sat Apr  9 13:58:04 2016	(r297748)
@@ -502,9 +502,7 @@ SYSCTL_DECL(_debug_acpi);
  *
  * Returns the VM domain ID if found, or -1 if not found / invalid.
  */
-#if MAXMEMDOM > 1
 extern	int acpi_map_pxm_to_vm_domainid(int pxm);
-#endif
 extern	int acpi_get_domain(device_t dev, device_t child, int *domain);
 extern	int acpi_parse_pxm(device_t dev, int *domain);
 

Modified: head/sys/kern/kern_cpuset.c
==============================================================================
--- head/sys/kern/kern_cpuset.c	Sat Apr  9 13:32:42 2016	(r297747)
+++ head/sys/kern/kern_cpuset.c	Sat Apr  9 13:58:04 2016	(r297748)
@@ -831,7 +831,7 @@ struct cpuset *
 cpuset_thread0(void)
 {
 	struct cpuset *set;
-	int error;
+	int error, i;
 
 	cpuset_zone = uma_zcreate("cpuset", sizeof(struct cpuset), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, 0);
@@ -863,9 +863,15 @@ cpuset_thread0(void)
 	 */
 	cpuset_unr = new_unrhdr(2, INT_MAX, NULL);
 
-	/* MD Code is responsible for initializing sets if vm_ndomains > 1. */
-	if (vm_ndomains == 1)
-		CPU_COPY(&all_cpus, &cpuset_domain[0]);
+	/*
+	 * If MD code has not initialized per-domain cpusets, place all
+	 * CPUs in domain 0.
+	 */
+	for (i = 0; i < MAXMEMDOM; i++)
+		if (!CPU_EMPTY(&cpuset_domain[i]))
+			goto domains_set;
+	CPU_COPY(&all_cpus, &cpuset_domain[0]);
+domains_set:
 
 	return (set);
 }
@@ -1118,7 +1124,7 @@ sys_cpuset_getaffinity(struct thread *td
 			error = intr_getaffinity(uap->id, mask);
 			break;
 		case CPU_WHICH_DOMAIN:
-			if (uap->id < 0 || uap->id >= vm_ndomains)
+			if (uap->id < 0 || uap->id >= MAXMEMDOM)
 				error = ESRCH;
 			else
 				CPU_COPY(&cpuset_domain[uap->id], mask);

Modified: head/sys/vm/vm_domain.c
==============================================================================
--- head/sys/vm/vm_domain.c	Sat Apr  9 13:32:42 2016	(r297747)
+++ head/sys/vm/vm_domain.c	Sat Apr  9 13:58:04 2016	(r297748)
@@ -39,7 +39,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 #include <sys/proc.h>
 #endif
 #include <sys/queue.h>
@@ -64,7 +64,7 @@ __FBSDID("$FreeBSD$");
 static __inline int
 vm_domain_rr_selectdomain(int skip_domain)
 {
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 	struct thread *td;
 
 	td = curthread;
@@ -188,8 +188,13 @@ vm_domain_policy_validate(const struct v
 		return (-1);
 	case VM_POLICY_FIXED_DOMAIN:
 	case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN:
+#ifdef VM_NUMA_ALLOC
 		if (vp->p.domain >= 0 && vp->p.domain < vm_ndomains)
 			return (0);
+#else
+		if (vp->p.domain == 0)
+			return (0);
+#endif
 		return (-1);
 	default:
 		return (-1);
@@ -221,6 +226,7 @@ vm_domain_iterator_set(struct vm_domain_
     vm_domain_policy_type_t vt, int domain)
 {
 
+#ifdef VM_NUMA_ALLOC
 	switch (vt) {
 	case VM_POLICY_FIXED_DOMAIN:
 		vi->policy = VM_POLICY_FIXED_DOMAIN;
@@ -249,6 +255,10 @@ vm_domain_iterator_set(struct vm_domain_
 		vi->n = vm_ndomains;
 		break;
 	}
+#else
+	vi->domain = 0;
+	vi->n = 1;
+#endif
 	return (0);
 }
 
@@ -259,6 +269,8 @@ static inline void
 _vm_domain_iterator_set_policy(struct vm_domain_iterator *vi,
     const struct vm_domain_policy *vt)
 {
+
+#ifdef VM_NUMA_ALLOC
 	/*
 	 * Initialise the iterator.
 	 *
@@ -300,6 +312,10 @@ _vm_domain_iterator_set_policy(struct vm
 		vi->n = vm_ndomains;
 		break;
 	}
+#else
+	vi->domain = 0;
+	vi->n = 1;
+#endif
 }
 
 void
@@ -334,6 +350,7 @@ vm_domain_iterator_run(struct vm_domain_
 	if (vi->n <= 0)
 		return (-1);
 
+#ifdef VM_NUMA_ALLOC
 	switch (vi->policy) {
 	case VM_POLICY_FIXED_DOMAIN:
 	case VM_POLICY_FIRST_TOUCH:
@@ -358,6 +375,10 @@ vm_domain_iterator_run(struct vm_domain_
 		vi->n--;
 		break;
 	}
+#else
+	*domain = 0;
+	vi->n--;
+#endif
 
 	return (0);
 }

Modified: head/sys/vm/vm_pageout.c
==============================================================================
--- head/sys/vm/vm_pageout.c	Sat Apr  9 13:32:42 2016	(r297747)
+++ head/sys/vm/vm_pageout.c	Sat Apr  9 13:58:04 2016	(r297748)
@@ -1656,12 +1656,12 @@ static void
 vm_pageout(void)
 {
 	int error;
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 	int i;
 #endif
 
 	swap_pager_swap_init();
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 	for (i = 1; i < vm_ndomains; i++) {
 		error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,
 		    curproc, NULL, 0, 0, "dom%d", i);

Modified: head/sys/vm/vm_phys.c
==============================================================================
--- head/sys/vm/vm_phys.c	Sat Apr  9 13:32:42 2016	(r297747)
+++ head/sys/vm/vm_phys.c	Sat Apr  9 13:58:04 2016	(r297748)
@@ -48,9 +48,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
-#if MAXMEMDOM > 1
 #include <sys/proc.h>
-#endif
 #include <sys/queue.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
@@ -73,8 +71,10 @@ __FBSDID("$FreeBSD$");
 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
     "Too many physsegs.");
 
+#ifdef VM_NUMA_ALLOC
 struct mem_affinity *mem_affinity;
 int *mem_locality;
+#endif
 
 int vm_ndomains = 1;
 
@@ -144,7 +144,7 @@ static int sysctl_vm_phys_segs(SYSCTL_HA
 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
 
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS);
 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD,
     NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info");
@@ -159,7 +159,7 @@ SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLF
 static struct mtx vm_default_policy_mtx;
 MTX_SYSINIT(vm_default_policy, &vm_default_policy_mtx, "default policy mutex",
     MTX_DEF);
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 static struct vm_domain_policy vm_default_policy =
     VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0);
 #else
@@ -277,7 +277,7 @@ vm_phys_fictitious_cmp(struct vm_phys_fi
 static __inline int
 vm_rr_selectdomain(void)
 {
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 	struct thread *td;
 
 	td = curthread;
@@ -303,13 +303,13 @@ vm_rr_selectdomain(void)
 static void
 vm_policy_iterator_init(struct vm_domain_iterator *vi)
 {
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 	struct vm_domain_policy lcl;
 #endif
 
 	vm_domain_iterator_init(vi);
 
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 	/* Copy out the thread policy */
 	vm_domain_policy_localcopy(&lcl, &curthread->td_vm_dom_policy);
 	if (lcl.p.policy != VM_POLICY_NONE) {
@@ -433,7 +433,7 @@ int
 vm_phys_mem_affinity(int f, int t)
 {
 
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 	if (mem_locality == NULL)
 		return (-1);
 	if (f >= vm_ndomains || t >= vm_ndomains)
@@ -444,7 +444,7 @@ vm_phys_mem_affinity(int f, int t)
 #endif
 }
 
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 /*
  * Outputs the VM locality table.
  */
@@ -520,6 +520,7 @@ _vm_phys_create_seg(vm_paddr_t start, vm
 static void
 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end)
 {
+#ifdef VM_NUMA_ALLOC
 	int i;
 
 	if (mem_affinity == NULL) {
@@ -544,6 +545,9 @@ vm_phys_create_seg(vm_paddr_t start, vm_
 		    mem_affinity[i].domain);
 		start = mem_affinity[i].end;
 	}
+#else
+	_vm_phys_create_seg(start, end, 0);
+#endif
 }
 
 /*

Modified: head/sys/vm/vm_phys.h
==============================================================================
--- head/sys/vm/vm_phys.h	Sat Apr  9 13:32:42 2016	(r297747)
+++ head/sys/vm/vm_phys.h	Sat Apr  9 13:58:04 2016	(r297748)
@@ -99,7 +99,7 @@ int vm_phys_mem_affinity(int f, int t);
 static inline struct vm_domain *
 vm_phys_domain(vm_page_t m)
 {
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 	int domn, segind;
 
 	/* XXXKIB try to assert that the page is managed */

Modified: head/sys/x86/acpica/srat.c
==============================================================================
--- head/sys/x86/acpica/srat.c	Sat Apr  9 13:32:42 2016	(r297747)
+++ head/sys/x86/acpica/srat.c	Sat Apr  9 13:58:04 2016	(r297748)
@@ -28,6 +28,8 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_vm.h"
+
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
@@ -62,7 +64,8 @@ int num_mem;
 static ACPI_TABLE_SRAT *srat;
 static vm_paddr_t srat_physaddr;
 
-static int vm_domains[VM_PHYSSEG_MAX];
+static int domain_pxm[MAXMEMDOM];
+static int ndomain;
 
 static ACPI_TABLE_SLIT *slit;
 static vm_paddr_t slit_physaddr;
@@ -145,8 +148,10 @@ parse_slit(void)
 	acpi_unmap_table(slit);
 	slit = NULL;
 
+#ifdef VM_NUMA_ALLOC
 	/* Tell the VM about it! */
 	mem_locality = vm_locality_table;
+#endif
 	return (0);
 }
 
@@ -340,48 +345,46 @@ renumber_domains(void)
 	int i, j, slot;
 
 	/* Enumerate all the domains. */
-	vm_ndomains = 0;
+	ndomain = 0;
 	for (i = 0; i < num_mem; i++) {
 		/* See if this domain is already known. */
-		for (j = 0; j < vm_ndomains; j++) {
-			if (vm_domains[j] >= mem_info[i].domain)
+		for (j = 0; j < ndomain; j++) {
+			if (domain_pxm[j] >= mem_info[i].domain)
 				break;
 		}
-		if (j < vm_ndomains && vm_domains[j] == mem_info[i].domain)
+		if (j < ndomain && domain_pxm[j] == mem_info[i].domain)
 			continue;
 
 		/* Insert the new domain at slot 'j'. */
 		slot = j;
-		for (j = vm_ndomains; j > slot; j--)
-			vm_domains[j] = vm_domains[j - 1];
-		vm_domains[slot] = mem_info[i].domain;
-		vm_ndomains++;
-		if (vm_ndomains > MAXMEMDOM) {
-			vm_ndomains = 1;
+		for (j = ndomain; j > slot; j--)
+			domain_pxm[j] = domain_pxm[j - 1];
+		domain_pxm[slot] = mem_info[i].domain;
+		ndomain++;
+		if (ndomain > MAXMEMDOM) {
+			ndomain = 1;
 			printf("SRAT: Too many memory domains\n");
 			return (EFBIG);
 		}
 	}
 
-	/* Renumber each domain to its index in the sorted 'domains' list. */
-	for (i = 0; i < vm_ndomains; i++) {
+	/* Renumber each domain to its index in the sorted 'domain_pxm' list. */
+	for (i = 0; i < ndomain; i++) {
 		/*
 		 * If the domain is already the right value, no need
 		 * to renumber.
 		 */
-		if (vm_domains[i] == i)
+		if (domain_pxm[i] == i)
 			continue;
 
 		/* Walk the cpu[] and mem_info[] arrays to renumber. */
 		for (j = 0; j < num_mem; j++)
-			if (mem_info[j].domain == vm_domains[i])
+			if (mem_info[j].domain == domain_pxm[i])
 				mem_info[j].domain = i;
 		for (j = 0; j <= MAX_APIC_ID; j++)
-			if (cpus[j].enabled && cpus[j].domain == vm_domains[i])
+			if (cpus[j].enabled && cpus[j].domain == domain_pxm[i])
 				cpus[j].domain = i;
 	}
-	KASSERT(vm_ndomains > 0,
-	    ("renumber_domains: invalid final vm_ndomains setup"));
 
 	return (0);
 }
@@ -416,8 +419,11 @@ parse_srat(void)
 		return (-1);
 	}
 
+#ifdef VM_NUMA_ALLOC
 	/* Point vm_phys at our memory affinity table. */
+	vm_ndomains = ndomain;
 	mem_affinity = mem_info;
+#endif
 
 	return (0);
 }
@@ -495,12 +501,21 @@ acpi_map_pxm_to_vm_domainid(int pxm)
 {
 	int i;
 
-	for (i = 0; i < vm_ndomains; i++) {
-		if (vm_domains[i] == pxm)
+	for (i = 0; i < ndomain; i++) {
+		if (domain_pxm[i] == pxm)
 			return (i);
 	}
 
 	return (-1);
 }
 
+#else /* MAXMEMDOM == 1 */
+
+int
+acpi_map_pxm_to_vm_domainid(int pxm)
+{
+
+	return (-1);
+}
+
 #endif /* MAXMEMDOM > 1 */


More information about the svn-src-head mailing list