svn commit: r262199 - in projects/numa/sys: amd64/amd64 i386/i386 ia64/ia64 kern mips/mips powerpc/aim sparc64/sparc64 sys vm
Jeff Roberson
jeff at FreeBSD.org
Wed Feb 19 00:35:31 UTC 2014
Author: jeff
Date: Wed Feb 19 00:35:27 2014
New Revision: 262199
URL: http://svnweb.freebsd.org/changeset/base/262199
Log:
- Push NUMA domain selection into the object and out of the physical page
layer.
- Make UMA zones NUMA domain aware.
Modified:
projects/numa/sys/amd64/amd64/uma_machdep.c
projects/numa/sys/i386/i386/pmap.c
projects/numa/sys/ia64/ia64/uma_machdep.c
projects/numa/sys/kern/kern_mbuf.c
projects/numa/sys/kern/subr_busdma_bufalloc.c
projects/numa/sys/kern/subr_vmem.c
projects/numa/sys/mips/mips/pmap.c
projects/numa/sys/mips/mips/uma_machdep.c
projects/numa/sys/powerpc/aim/mmu_oea64.c
projects/numa/sys/powerpc/aim/slb.c
projects/numa/sys/powerpc/aim/uma_machdep.c
projects/numa/sys/sparc64/sparc64/vm_machdep.c
projects/numa/sys/sys/busdma_bufalloc.h
projects/numa/sys/sys/proc.h
projects/numa/sys/vm/uma.h
projects/numa/sys/vm/uma_core.c
projects/numa/sys/vm/uma_int.h
projects/numa/sys/vm/vm_object.c
projects/numa/sys/vm/vm_object.h
projects/numa/sys/vm/vm_page.c
projects/numa/sys/vm/vm_page.h
projects/numa/sys/vm/vm_phys.c
projects/numa/sys/vm/vm_phys.h
projects/numa/sys/vm/vm_reserv.c
projects/numa/sys/vm/vm_reserv.h
Modified: projects/numa/sys/amd64/amd64/uma_machdep.c
==============================================================================
--- projects/numa/sys/amd64/amd64/uma_machdep.c Tue Feb 18 23:22:54 2014 (r262198)
+++ projects/numa/sys/amd64/amd64/uma_machdep.c Wed Feb 19 00:35:27 2014 (r262199)
@@ -41,7 +41,8 @@ __FBSDID("$FreeBSD$");
#include <machine/vmparam.h>
void *
-uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+uma_small_alloc(uma_zone_t zone, int bytes, int domain, u_int8_t *flags,
+ int wait)
{
vm_page_t m;
vm_paddr_t pa;
@@ -51,7 +52,7 @@ uma_small_alloc(uma_zone_t zone, int byt
*flags = UMA_SLAB_PRIV;
pflags = malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
for (;;) {
- m = vm_page_alloc(NULL, 0, pflags);
+ m = vm_page_alloc_domain(NULL, 0, domain, pflags);
if (m == NULL) {
if (wait & M_NOWAIT)
return (NULL);
Modified: projects/numa/sys/i386/i386/pmap.c
==============================================================================
--- projects/numa/sys/i386/i386/pmap.c Tue Feb 18 23:22:54 2014 (r262198)
+++ projects/numa/sys/i386/i386/pmap.c Wed Feb 19 00:35:27 2014 (r262199)
@@ -339,7 +339,8 @@ static pt_entry_t *pmap_pte_quick(pmap_t
static void pmap_pte_release(pt_entry_t *pte);
static int pmap_unuse_pt(pmap_t, vm_offset_t, struct spglist *);
#ifdef PAE
-static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, int domain,
+ u_int8_t *flags, int wait);
#endif
static void pmap_set_pg(void);
@@ -648,7 +649,8 @@ pmap_page_init(vm_page_t m)
#ifdef PAE
static void *
-pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+pmap_pdpt_allocf(uma_zone_t zone, int bytes, int domain, u_int8_t *flags,
+ int wait)
{
/* Inform UMA that this allocator uses kernel_map/object. */
Modified: projects/numa/sys/ia64/ia64/uma_machdep.c
==============================================================================
--- projects/numa/sys/ia64/ia64/uma_machdep.c Tue Feb 18 23:22:54 2014 (r262198)
+++ projects/numa/sys/ia64/ia64/uma_machdep.c Wed Feb 19 00:35:27 2014 (r262199)
@@ -40,7 +40,8 @@ __FBSDID("$FreeBSD$");
#include <machine/vmparam.h>
void *
-uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+uma_small_alloc(uma_zone_t zone, int bytes, int domain, u_int8_t *flags,
+ int wait)
{
void *va;
vm_page_t m;
Modified: projects/numa/sys/kern/kern_mbuf.c
==============================================================================
--- projects/numa/sys/kern/kern_mbuf.c Tue Feb 18 23:22:54 2014 (r262198)
+++ projects/numa/sys/kern/kern_mbuf.c Wed Feb 19 00:35:27 2014 (r262199)
@@ -284,7 +284,7 @@ static int mb_zinit_pack(void *, int, in
static void mb_zfini_pack(void *, int);
static void mb_reclaim(void *);
-static void *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int);
+static void *mbuf_jumbo_alloc(uma_zone_t, int, int, uint8_t *, int);
/* Ensure that MSIZE is a power of 2. */
CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
@@ -389,7 +389,8 @@ SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRS
* pages.
*/
static void *
-mbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
+mbuf_jumbo_alloc(uma_zone_t zone, int bytes, int domain, uint8_t *flags,
+ int wait)
{
/* Inform UMA that this allocator uses kernel_map/object. */
Modified: projects/numa/sys/kern/subr_busdma_bufalloc.c
==============================================================================
--- projects/numa/sys/kern/subr_busdma_bufalloc.c Tue Feb 18 23:22:54 2014 (r262198)
+++ projects/numa/sys/kern/subr_busdma_bufalloc.c Wed Feb 19 00:35:27 2014 (r262199)
@@ -147,8 +147,8 @@ busdma_bufalloc_findzone(busdma_bufalloc
}
void *
-busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, int size, u_int8_t *pflag,
- int wait)
+busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, int size, int domain,
+ u_int8_t *pflag, int wait)
{
#ifdef VM_MEMATTR_UNCACHEABLE
Modified: projects/numa/sys/kern/subr_vmem.c
==============================================================================
--- projects/numa/sys/kern/subr_vmem.c Tue Feb 18 23:22:54 2014 (r262198)
+++ projects/numa/sys/kern/subr_vmem.c Wed Feb 19 00:35:27 2014 (r262199)
@@ -495,7 +495,7 @@ bt_insfree(vmem_t *vm, bt_t *bt)
* Import from the arena into the quantum cache in UMA.
*/
static int
-qc_import(void *arg, void **store, int cnt, int flags)
+qc_import(void *arg, void **store, int cnt, int domain, int flags)
{
qcache_t *qc;
vmem_addr_t addr;
@@ -608,7 +608,7 @@ static struct mtx_padalign vmem_bt_lock;
* we are really out of KVA.
*/
static void *
-vmem_bt_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait)
+vmem_bt_alloc(uma_zone_t zone, int bytes, int domain, uint8_t *pflag, int wait)
{
vmem_addr_t addr;
Modified: projects/numa/sys/mips/mips/pmap.c
==============================================================================
--- projects/numa/sys/mips/mips/pmap.c Tue Feb 18 23:22:54 2014 (r262198)
+++ projects/numa/sys/mips/mips/pmap.c Wed Feb 19 00:35:27 2014 (r262199)
@@ -1047,7 +1047,7 @@ pmap_alloc_direct_page(unsigned int inde
{
vm_page_t m;
- m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, req | VM_ALLOC_WIRED |
+ m = vm_page_alloc_freelist(0, VM_FREELIST_DIRECT, req | VM_ALLOC_WIRED |
VM_ALLOC_ZERO);
if (m == NULL)
return (NULL);
@@ -1581,7 +1581,7 @@ retry:
}
}
/* No free items, allocate another chunk */
- m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, VM_ALLOC_NORMAL |
+ m = vm_page_alloc_freelist(0, VM_FREELIST_DIRECT, VM_ALLOC_NORMAL |
VM_ALLOC_WIRED);
if (m == NULL) {
if (try) {
Modified: projects/numa/sys/mips/mips/uma_machdep.c
==============================================================================
--- projects/numa/sys/mips/mips/uma_machdep.c Tue Feb 18 23:22:54 2014 (r262198)
+++ projects/numa/sys/mips/mips/uma_machdep.c Wed Feb 19 00:35:27 2014 (r262199)
@@ -41,7 +41,8 @@ __FBSDID("$FreeBSD$");
#include <machine/vmparam.h>
void *
-uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+uma_small_alloc(uma_zone_t zone, int bytes, int domain, u_int8_t *flags,
+ int wait)
{
vm_paddr_t pa;
vm_page_t m;
@@ -52,7 +53,7 @@ uma_small_alloc(uma_zone_t zone, int byt
pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED;
for (;;) {
- m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, pflags);
+ m = vm_page_alloc_freelist(domain, VM_FREELIST_DIRECT, pflags);
if (m == NULL) {
if (wait & M_NOWAIT)
return (NULL);
Modified: projects/numa/sys/powerpc/aim/mmu_oea64.c
==============================================================================
--- projects/numa/sys/powerpc/aim/mmu_oea64.c Tue Feb 18 23:22:54 2014 (r262198)
+++ projects/numa/sys/powerpc/aim/mmu_oea64.c Wed Feb 19 00:35:27 2014 (r262199)
@@ -1446,7 +1446,8 @@ retry:
static mmu_t installed_mmu;
static void *
-moea64_uma_page_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+moea64_uma_page_alloc(uma_zone_t zone, int bytes, int domain, u_int8_t *flags,
+ int wait)
{
/*
* This entire routine is a horrible hack to avoid bothering kmem
Modified: projects/numa/sys/powerpc/aim/slb.c
==============================================================================
--- projects/numa/sys/powerpc/aim/slb.c Tue Feb 18 23:22:54 2014 (r262198)
+++ projects/numa/sys/powerpc/aim/slb.c Wed Feb 19 00:35:27 2014 (r262199)
@@ -473,7 +473,8 @@ slb_insert_user(pmap_t pm, struct slb *s
}
static void *
-slb_uma_real_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+slb_uma_real_alloc(uma_zone_t zone, int bytes, int domain, u_int8_t *flags,
+ int wait)
{
static vm_offset_t realmax = 0;
void *va;
Modified: projects/numa/sys/powerpc/aim/uma_machdep.c
==============================================================================
--- projects/numa/sys/powerpc/aim/uma_machdep.c Tue Feb 18 23:22:54 2014 (r262198)
+++ projects/numa/sys/powerpc/aim/uma_machdep.c Wed Feb 19 00:35:27 2014 (r262199)
@@ -50,7 +50,8 @@ SYSCTL_INT(_hw, OID_AUTO, uma_mdpages, C
"UMA MD pages in use");
void *
-uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+uma_small_alloc(uma_zone_t zone, int bytes, int domain, u_int8_t *flags,
+ int wait)
{
void *va;
vm_page_t m;
Modified: projects/numa/sys/sparc64/sparc64/vm_machdep.c
==============================================================================
--- projects/numa/sys/sparc64/sparc64/vm_machdep.c Tue Feb 18 23:22:54 2014 (r262198)
+++ projects/numa/sys/sparc64/sparc64/vm_machdep.c Wed Feb 19 00:35:27 2014 (r262199)
@@ -502,7 +502,8 @@ swi_vm(void *v)
}
void *
-uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+uma_small_alloc(uma_zone_t zone, int bytes, int domain, u_int8_t *flags,
+ int wait)
{
vm_paddr_t pa;
vm_page_t m;
Modified: projects/numa/sys/sys/busdma_bufalloc.h
==============================================================================
--- projects/numa/sys/sys/busdma_bufalloc.h Tue Feb 18 23:22:54 2014 (r262198)
+++ projects/numa/sys/sys/busdma_bufalloc.h Wed Feb 19 00:35:27 2014 (r262199)
@@ -111,7 +111,7 @@ struct busdma_bufzone * busdma_bufalloc_
* you can probably use these when you need uncacheable buffers.
*/
void * busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, int size,
- u_int8_t *pflag, int wait);
+ int domain, u_int8_t *pflag, int wait);
void busdma_bufalloc_free_uncacheable(void *item, int size, u_int8_t pflag);
#endif /* _MACHINE_BUSDMA_BUFALLOC_H_ */
Modified: projects/numa/sys/sys/proc.h
==============================================================================
--- projects/numa/sys/sys/proc.h Tue Feb 18 23:22:54 2014 (r262198)
+++ projects/numa/sys/sys/proc.h Wed Feb 19 00:35:27 2014 (r262199)
@@ -274,7 +274,6 @@ struct thread {
pid_t td_dbg_forked; /* (c) Child pid for debugger. */
u_int td_vp_reserv; /* (k) Count of reserved vnodes. */
int td_no_sleeping; /* (k) Sleeping disabled count. */
- int td_dom_rr_idx; /* (k) RR Numa domain selection. */
#define td_endzero td_sigmask
/* Copied during fork1() or create_thread(). */
Modified: projects/numa/sys/vm/uma.h
==============================================================================
--- projects/numa/sys/vm/uma.h Tue Feb 18 23:22:54 2014 (r262198)
+++ projects/numa/sys/vm/uma.h Wed Feb 19 00:35:27 2014 (r262199)
@@ -126,7 +126,8 @@ typedef void (*uma_fini)(void *mem, int
/*
* Import new memory into a cache zone.
*/
-typedef int (*uma_import)(void *arg, void **store, int count, int flags);
+typedef int (*uma_import)(void *arg, void **store, int count, int domain,
+ int flags);
/*
* Free memory from a cache zone.
@@ -279,6 +280,11 @@ uma_zone_t uma_zcache_create(char *name,
* Allocates mp_ncpus slabs sized to
* sizeof(struct pcpu).
*/
+#define UMA_ZONE_NUMA 0x10000 /*
+ * Zone is NUMA aware. Implements
+ * a best effort first-touch
+ * allocation policy.
+ */
/*
* These flags are shared between the keg and zone. In zones wishing to add
@@ -365,16 +371,12 @@ uma_zfree(uma_zone_t zone, void *item)
}
/*
- * XXX The rest of the prototypes in this header are h0h0 magic for the VM.
- * If you think you need to use it for a normal zone you're probably incorrect.
- */
-
-/*
* Backend page supplier routines
*
* Arguments:
* zone The zone that is requesting pages.
* size The number of bytes being requested.
+ * domain The NUMA domain we prefer for this allocation.
* pflag Flags for these memory pages, see below.
* wait Indicates our willingness to block.
*
@@ -382,7 +384,8 @@ uma_zfree(uma_zone_t zone, void *item)
* A pointer to the allocated memory or NULL on failure.
*/
-typedef void *(*uma_alloc)(uma_zone_t zone, int size, uint8_t *pflag, int wait);
+typedef void *(*uma_alloc)(uma_zone_t zone, int size, int domain,
+ uint8_t *pflag, int wait);
/*
* Backend page free routines
@@ -397,8 +400,6 @@ typedef void *(*uma_alloc)(uma_zone_t zo
*/
typedef void (*uma_free)(void *item, int size, uint8_t pflag);
-
-
/*
* Sets up the uma allocator. (Called by vm_mem_init)
*
Modified: projects/numa/sys/vm/uma_core.c
==============================================================================
--- projects/numa/sys/vm/uma_core.c Tue Feb 18 23:22:54 2014 (r262198)
+++ projects/numa/sys/vm/uma_core.c Wed Feb 19 00:35:27 2014 (r262199)
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_param.h>
+#include <vm/vm_phys.h>
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
@@ -226,11 +227,11 @@ enum zfreeskip { SKIP_NONE = 0, SKIP_DTO
/* Prototypes.. */
-static void *noobj_alloc(uma_zone_t, int, uint8_t *, int);
-static void *page_alloc(uma_zone_t, int, uint8_t *, int);
-static void *startup_alloc(uma_zone_t, int, uint8_t *, int);
+static void *noobj_alloc(uma_zone_t, int, int, uint8_t *, int);
+static void *page_alloc(uma_zone_t, int, int, uint8_t *, int);
+static void *startup_alloc(uma_zone_t, int, int, uint8_t *, int);
static void page_free(void *, int, uint8_t);
-static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
+static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int);
static void cache_drain(uma_zone_t);
static void bucket_drain(uma_zone_t, uma_bucket_t);
static void bucket_cache_drain(uma_zone_t zone);
@@ -248,23 +249,23 @@ static int hash_expand(struct uma_hash *
static void hash_free(struct uma_hash *hash);
static void uma_timeout(void *);
static void uma_startup3(void);
-static void *zone_alloc_item(uma_zone_t, void *, int);
+static void *zone_alloc_item(uma_zone_t, void *, int, int);
static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
static void bucket_enable(void);
static void bucket_init(void);
static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
static void bucket_zone_drain(void);
-static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags);
-static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
-static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
+static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int);
+static uma_slab_t zone_fetch_slab(uma_zone_t, uma_keg_t, int, int);
+static uma_slab_t zone_fetch_slab_multi(uma_zone_t, uma_keg_t, int, int);
static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
uma_fini fini, int align, uint32_t flags);
-static int zone_import(uma_zone_t zone, void **bucket, int max, int flags);
-static void zone_release(uma_zone_t zone, void **bucket, int cnt);
-static void uma_zero_item(void *item, uma_zone_t zone);
+static int zone_import(uma_zone_t, void **, int, int, int);
+static void zone_release(uma_zone_t, void **, int);
+static void uma_zero_item(void *, uma_zone_t);
void uma_print_zone(uma_zone_t);
void uma_print_stats(void);
@@ -311,7 +312,7 @@ bucket_init(void)
size += sizeof(void *) * ubz->ubz_entries;
ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
- UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET);
+ UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET | UMA_ZONE_NUMA);
}
}
@@ -539,7 +540,7 @@ hash_alloc(struct uma_hash *hash)
} else {
alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
- M_WAITOK);
+ UMA_ANYDOMAIN, M_WAITOK);
hash->uh_hashsize = UMA_HASH_SIZE_INIT;
}
if (hash->uh_slab_hash) {
@@ -705,17 +706,23 @@ cache_drain_safe_cpu(uma_zone_t zone)
{
uma_cache_t cache;
uma_bucket_t b1, b2;
+ int domain;
if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
return;
b1 = b2 = NULL;
+
ZONE_LOCK(zone);
critical_enter();
+ if (zone->uz_flags & UMA_ZONE_NUMA)
+ domain = PCPU_GET(domain);
+ else
+ domain = 0;
cache = &zone->uz_cpu[curcpu];
if (cache->uc_allocbucket) {
if (cache->uc_allocbucket->ub_cnt != 0)
- LIST_INSERT_HEAD(&zone->uz_buckets,
+ LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets,
cache->uc_allocbucket, ub_link);
else
b1 = cache->uc_allocbucket;
@@ -723,7 +730,7 @@ cache_drain_safe_cpu(uma_zone_t zone)
}
if (cache->uc_freebucket) {
if (cache->uc_freebucket->ub_cnt != 0)
- LIST_INSERT_HEAD(&zone->uz_buckets,
+ LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets,
cache->uc_freebucket, ub_link);
else
b2 = cache->uc_freebucket;
@@ -778,18 +785,22 @@ cache_drain_safe(uma_zone_t zone)
static void
bucket_cache_drain(uma_zone_t zone)
{
+ uma_zone_domain_t zdom;
uma_bucket_t bucket;
+ int i;
/*
- * Drain the bucket queues and free the buckets, we just keep two per
- * cpu (alloc/free).
+ * Drain the bucket queues and free the buckets.
*/
- while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
- LIST_REMOVE(bucket, ub_link);
- ZONE_UNLOCK(zone);
- bucket_drain(zone, bucket);
- bucket_free(zone, bucket, NULL);
- ZONE_LOCK(zone);
+ for (i = 0; i < vm_ndomains; i++) {
+ zdom = &zone->uz_domain[i];
+ while ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
+ LIST_REMOVE(bucket, ub_link);
+ ZONE_UNLOCK(zone);
+ bucket_drain(zone, bucket);
+ bucket_free(zone, bucket, NULL);
+ ZONE_LOCK(zone);
+ }
}
/*
@@ -834,8 +845,10 @@ static void
keg_drain(uma_keg_t keg)
{
struct slabhead freeslabs = { 0 };
+ uma_domain_t dom;
uma_slab_t slab;
uma_slab_t n;
+ int i;
/*
* We don't want to take pages from statically allocated kegs at this
@@ -851,26 +864,30 @@ keg_drain(uma_keg_t keg)
if (keg->uk_free == 0)
goto finished;
- slab = LIST_FIRST(&keg->uk_free_slab);
- while (slab) {
- n = LIST_NEXT(slab, us_link);
-
- /* We have no where to free these to */
- if (slab->us_flags & UMA_SLAB_BOOT) {
- slab = n;
- continue;
- }
-
- LIST_REMOVE(slab, us_link);
- keg->uk_pages -= keg->uk_ppera;
- keg->uk_free -= keg->uk_ipers;
+ for (i = 0; i < vm_ndomains; i++) {
+ dom = &keg->uk_domain[i];
+ slab = LIST_FIRST(&dom->ud_free_slab);
+ while (slab) {
+ n = LIST_NEXT(slab, us_link);
+
+ /* We have no where to free these to */
+ if (slab->us_flags & UMA_SLAB_BOOT) {
+ slab = n;
+ continue;
+ }
- if (keg->uk_flags & UMA_ZONE_HASH)
- UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
+ LIST_REMOVE(slab, us_link);
+ keg->uk_pages -= keg->uk_ppera;
+ keg->uk_free -= keg->uk_ipers;
+
+ if (keg->uk_flags & UMA_ZONE_HASH)
+ UMA_HASH_REMOVE(&keg->uk_hash, slab,
+ slab->us_data);
- SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
+ SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
- slab = n;
+ slab = n;
+ }
}
finished:
KEG_UNLOCK(keg);
@@ -933,7 +950,7 @@ zone_drain(uma_zone_t zone)
* caller specified M_NOWAIT.
*/
static uma_slab_t
-keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
+keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait)
{
uma_slabrefcnt_t slabref;
uma_alloc allocf;
@@ -942,6 +959,8 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t
uint8_t flags;
int i;
+ KASSERT(domain >= 0 && domain < vm_ndomains,
+ ("keg_alloc_slab: domain %d out of range", domain));
mtx_assert(&keg->uk_lock, MA_OWNED);
slab = NULL;
mem = NULL;
@@ -953,7 +972,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t
KEG_UNLOCK(keg);
if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
- slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
+ slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, wait);
if (slab == NULL)
goto out;
}
@@ -974,7 +993,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t
wait |= M_NODUMP;
/* zone is passed for legacy reasons. */
- mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
+ mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, domain, &flags, wait);
if (mem == NULL) {
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
@@ -998,6 +1017,18 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t
#ifdef INVARIANTS
BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
#endif
+ /*
+ * Set the domain based on the first page. This may be incorrect
+ * for multi-page allocations depending on the numa_policy specified.
+ */
+#if MAXMEMDOM > 1
+ if ((flags & UMA_SLAB_BOOT) == 0)
+ slab->us_domain = vm_phys_domain(PHYS_TO_VM_PAGE(
+ pmap_kextract((vm_offset_t)mem)));
+ else
+#endif
+ slab->us_domain = 0;
+
if (keg->uk_flags & UMA_ZONE_REFCNT) {
slabref = (uma_slabrefcnt_t)slab;
for (i = 0; i < keg->uk_ipers; i++)
@@ -1035,7 +1066,7 @@ out:
* the VM is ready.
*/
static void *
-startup_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait)
+startup_alloc(uma_zone_t zone, int bytes, int domain, uint8_t *pflag, int wait)
{
uma_keg_t keg;
uma_slab_t tmps;
@@ -1080,7 +1111,7 @@ startup_alloc(uma_zone_t zone, int bytes
#else
keg->uk_allocf = page_alloc;
#endif
- return keg->uk_allocf(zone, bytes, pflag, wait);
+ return keg->uk_allocf(zone, bytes, domain, pflag, wait);
}
/*
@@ -1095,7 +1126,7 @@ startup_alloc(uma_zone_t zone, int bytes
* NULL if M_NOWAIT is set.
*/
static void *
-page_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait)
+page_alloc(uma_zone_t zone, int bytes, int domain, uint8_t *pflag, int wait)
{
void *p; /* Returned page */
@@ -1117,7 +1148,7 @@ page_alloc(uma_zone_t zone, int bytes, u
* NULL if M_NOWAIT is set.
*/
static void *
-noobj_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
+noobj_alloc(uma_zone_t zone, int bytes, int domain, uint8_t *flags, int wait)
{
TAILQ_HEAD(, vm_page) alloctail;
u_long npages;
@@ -1130,7 +1161,7 @@ noobj_alloc(uma_zone_t zone, int bytes,
npages = howmany(bytes, PAGE_SIZE);
while (npages > 0) {
- p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
+ p = vm_page_alloc_domain(NULL, 0, domain, VM_ALLOC_INTERRUPT |
VM_ALLOC_WIRED | VM_ALLOC_NOOBJ);
if (p != NULL) {
/*
@@ -1410,6 +1441,7 @@ keg_ctor(void *mem, int size, void *udat
keg->uk_init = arg->uminit;
keg->uk_fini = arg->fini;
keg->uk_align = arg->align;
+ keg->uk_cursor = 0;
keg->uk_free = 0;
keg->uk_reserve = 0;
keg->uk_pages = 0;
@@ -1910,7 +1942,7 @@ uma_kcreate(uma_zone_t zone, size_t size
args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
args.flags = flags;
args.zone = zone;
- return (zone_alloc_item(kegs, &args, M_WAITOK));
+ return (zone_alloc_item(kegs, &args, UMA_ANYDOMAIN, M_WAITOK));
}
/* See uma.h */
@@ -1942,7 +1974,7 @@ uma_zcreate(const char *name, size_t siz
args.flags = flags;
args.keg = NULL;
- return (zone_alloc_item(zones, &args, M_WAITOK));
+ return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK));
}
/* See uma.h */
@@ -1966,7 +1998,7 @@ uma_zsecond_create(char *name, uma_ctor
args.keg = keg;
/* XXX Attaches only one keg of potentially many. */
- return (zone_alloc_item(zones, &args, M_WAITOK));
+ return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK));
}
/* See uma.h */
@@ -1990,7 +2022,7 @@ uma_zcache_create(char *name, int size,
args.align = 0;
args.flags = flags;
- return (zone_alloc_item(zones, &args, M_WAITOK));
+ return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK));
}
static void
@@ -2091,10 +2123,12 @@ uma_zdestroy(uma_zone_t zone)
void *
uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
{
- void *item;
- uma_cache_t cache;
+ uma_zone_domain_t zdom;
uma_bucket_t bucket;
+ uma_cache_t cache;
+ void *item;
int lockfail;
+ int domain;
int cpu;
/* This is the fast path allocation */
@@ -2196,8 +2230,10 @@ zalloc_start:
bucket_free(zone, bucket, udata);
/* Short-circuit for zones without buckets and low memory. */
- if (zone->uz_count == 0 || bucketdisable)
+ if (zone->uz_count == 0 || bucketdisable) {
+ domain = UMA_ANYDOMAIN;
goto zalloc_item;
+ }
/*
* Attempt to retrieve the item from the per-CPU cache has failed, so
@@ -2232,10 +2268,19 @@ zalloc_start:
goto zalloc_start;
}
+ /* Get the domain according to zone flags. */
+ if (zone->uz_flags & UMA_ZONE_NUMA) {
+ domain = PCPU_GET(domain);
+ zdom = &zone->uz_domain[domain];
+ } else {
+ domain = UMA_ANYDOMAIN;
+ zdom = &zone->uz_domain[0];
+ }
+
/*
* Check the zone's cache of buckets.
*/
- if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
+ if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
KASSERT(bucket->ub_cnt != 0,
("uma_zalloc_arg: Returning an empty bucket."));
@@ -2260,7 +2305,7 @@ zalloc_start:
* works we'll restart the allocation from the begining and it
* will use the just filled bucket.
*/
- bucket = zone_alloc_bucket(zone, udata, flags);
+ bucket = zone_alloc_bucket(zone, udata, domain, flags);
if (bucket != NULL) {
ZONE_LOCK(zone);
critical_enter();
@@ -2271,10 +2316,11 @@ zalloc_start:
* initialized bucket to make this less likely or claim
* the memory directly.
*/
- if (cache->uc_allocbucket == NULL)
- cache->uc_allocbucket = bucket;
+ if (cache->uc_allocbucket != NULL ||
+ (domain != UMA_ANYDOMAIN && domain != PCPU_GET(domain)))
+ LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
else
- LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
+ cache->uc_allocbucket = bucket;
ZONE_UNLOCK(zone);
goto zalloc_start;
}
@@ -2287,16 +2333,44 @@ zalloc_start:
#endif
zalloc_item:
- item = zone_alloc_item(zone, udata, flags);
+ item = zone_alloc_item(zone, udata, domain, flags);
return (item);
}
+/*
+ * Find a slab with some space. Prefer slabs that are partially
+ * used over those that are totally full. This helps to reduce
+ * fragmentation.
+ */
+static uma_slab_t
+keg_first_slab(uma_keg_t keg, int domain)
+{
+ uma_domain_t dom;
+ uma_slab_t slab;
+
+ KASSERT(domain >= 0 && domain < vm_ndomains,
+ ("keg_first_slab: Domain %d out of range", domain));
+ dom = &keg->uk_domain[domain];
+ if (!LIST_EMPTY(&dom->ud_part_slab))
+ return LIST_FIRST(&dom->ud_part_slab);
+ if (LIST_EMPTY(&dom->ud_free_slab))
+ return (NULL);
+ slab = LIST_FIRST(&dom->ud_free_slab);
+ LIST_REMOVE(slab, us_link);
+ LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
+
+ return (slab);
+}
+
static uma_slab_t
-keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
+keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, int flags)
{
+ uma_domain_t dom;
uma_slab_t slab;
int reserve;
+ int domain;
+ int start;
mtx_assert(&keg->uk_lock, MA_OWNED);
slab = NULL;
@@ -2304,21 +2378,14 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t
if ((flags & M_USE_RESERVE) == 0)
reserve = keg->uk_reserve;
- for (;;) {
- /*
- * Find a slab with some space. Prefer slabs that are partially
- * used over those that are totally full. This helps to reduce
- * fragmentation.
- */
- if (keg->uk_free > reserve) {
- if (!LIST_EMPTY(&keg->uk_part_slab)) {
- slab = LIST_FIRST(&keg->uk_part_slab);
- } else {
- slab = LIST_FIRST(&keg->uk_free_slab);
- LIST_REMOVE(slab, us_link);
- LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
- us_link);
- }
+ if (rdomain == UMA_ANYDOMAIN) {
+ keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains;
+ domain = start = keg->uk_cursor;
+ } else
+ domain = start = rdomain;
+ do {
+ if (keg->uk_free > reserve &&
+ (slab = keg_first_slab(keg, domain)) != NULL) {
MPASS(slab->us_keg == keg);
return (slab);
}
@@ -2345,7 +2412,7 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t
msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
continue;
}
- slab = keg_alloc_slab(keg, zone, flags);
+ slab = keg_alloc_slab(keg, zone, domain, flags);
/*
* If we got a slab here it's safe to mark it partially used
* and return. We assume that the caller is going to remove
@@ -2353,7 +2420,8 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t
*/
if (slab) {
MPASS(slab->us_keg == keg);
- LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
+ dom = &keg->uk_domain[slab->us_domain];
+ LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
return (slab);
}
/*
@@ -2361,13 +2429,21 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t
* could have while we were unlocked. Check again before we
* fail.
*/
- flags |= M_NOVM;
- }
- return (slab);
+ if ((slab = keg_first_slab(keg, domain)) != NULL) {
+ MPASS(slab->us_keg == keg);
+ return (slab);
+ }
+ if (rdomain == UMA_ANYDOMAIN) {
+ keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains;
+ domain = keg->uk_cursor;
+ }
+ } while (domain != start);
+
+ return (NULL);
}
static uma_slab_t
-zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
+zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int domain, int flags)
{
uma_slab_t slab;
@@ -2377,7 +2453,7 @@ zone_fetch_slab(uma_zone_t zone, uma_keg
}
for (;;) {
- slab = keg_fetch_slab(keg, zone, flags);
+ slab = keg_fetch_slab(keg, zone, domain, flags);
if (slab)
return (slab);
if (flags & (M_NOWAIT | M_NOVM))
@@ -2394,7 +2470,7 @@ zone_fetch_slab(uma_zone_t zone, uma_keg
* The last pointer is used to seed the search. It is not required.
*/
static uma_slab_t
-zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
+zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int domain, int rflags)
{
uma_klink_t klink;
uma_slab_t slab;
@@ -2414,7 +2490,7 @@ zone_fetch_slab_multi(uma_zone_t zone, u
* the search.
*/
if (last != NULL) {
- slab = keg_fetch_slab(last, zone, flags);
+ slab = keg_fetch_slab(last, zone, domain, flags);
if (slab)
return (slab);
KEG_UNLOCK(last);
@@ -2435,7 +2511,7 @@ zone_fetch_slab_multi(uma_zone_t zone, u
keg = klink->kl_keg;
KEG_LOCK(keg);
if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
- slab = keg_fetch_slab(keg, zone, flags);
+ slab = keg_fetch_slab(keg, zone, domain, flags);
if (slab)
return (slab);
}
@@ -2470,6 +2546,7 @@ zone_fetch_slab_multi(uma_zone_t zone, u
static void *
slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
{
+ uma_domain_t dom;
void *item;
uint8_t freei;
@@ -2485,14 +2562,15 @@ slab_alloc_item(uma_keg_t keg, uma_slab_
/* Move this slab to the full list */
if (slab->us_freecount == 0) {
LIST_REMOVE(slab, us_link);
- LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
+ dom = &keg->uk_domain[slab->us_domain];
+ LIST_INSERT_HEAD(&dom->ud_full_slab, slab, us_link);
}
return (item);
}
static int
-zone_import(uma_zone_t zone, void **bucket, int max, int flags)
+zone_import(uma_zone_t zone, void **bucket, int max, int domain, int flags)
{
uma_slab_t slab;
uma_keg_t keg;
@@ -2502,13 +2580,25 @@ zone_import(uma_zone_t zone, void **buck
keg = NULL;
/* Try to keep the buckets totally full */
for (i = 0; i < max; ) {
- if ((slab = zone->uz_slab(zone, keg, flags)) == NULL)
+ if ((slab = zone->uz_slab(zone, keg, domain, flags)) == NULL)
break;
keg = slab->us_keg;
while (slab->us_freecount && i < max) {
bucket[i++] = slab_alloc_item(keg, slab);
if (keg->uk_free <= keg->uk_reserve)
break;
+#if MAXMEMDOM > 1
+ /*
+ * If the zone is striped we pick a new slab for
+ * every allocation. Eliminating this conditional
+ * will instead pick a new domain for each bucket
+ * rather than stripe within each bucket. The
+ * current options produces more fragmentation but
+ * yields better distribution.
+ */
+ if (domain == UMA_ANYDOMAIN && vm_ndomains > 1)
+ break;
+#endif
}
/* Don't grab more than one slab at a time. */
flags &= ~M_WAITOK;
@@ -2521,7 +2611,7 @@ zone_import(uma_zone_t zone, void **buck
}
static uma_bucket_t
-zone_alloc_bucket(uma_zone_t zone, void *udata, int flags)
+zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
{
uma_bucket_t bucket;
int max;
@@ -2533,7 +2623,7 @@ zone_alloc_bucket(uma_zone_t zone, void
max = MIN(bucket->ub_entries, zone->uz_count);
bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
- max, flags);
+ max, domain, flags);
/*
* Initialize the memory if necessary.
@@ -2583,7 +2673,7 @@ zone_alloc_bucket(uma_zone_t zone, void
*/
static void *
-zone_alloc_item(uma_zone_t zone, void *udata, int flags)
+zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
{
void *item;
@@ -2592,7 +2682,7 @@ zone_alloc_item(uma_zone_t zone, void *u
#ifdef UMA_DEBUG_ALLOC
printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
#endif
- if (zone->uz_import(zone->uz_arg, &item, 1, flags) != 1)
+ if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1)
goto fail;
atomic_add_long(&zone->uz_allocs, 1);
@@ -2633,7 +2723,9 @@ uma_zfree_arg(uma_zone_t zone, void *ite
{
uma_cache_t cache;
uma_bucket_t bucket;
+ uma_zone_domain_t zdom;
int lockfail;
+ int domain;
int cpu;
#ifdef UMA_DEBUG_ALLOC_1
@@ -2743,6 +2835,15 @@ zfree_start:
}
cache->uc_freebucket = NULL;
+ /* Get the domain according to zone flags. */
+ if (zone->uz_flags & UMA_ZONE_NUMA) {
+ domain = PCPU_GET(domain);
+ zdom = &zone->uz_domain[domain];
+ } else {
+ zdom = &zone->uz_domain[0];
+ domain = UMA_ANYDOMAIN;
+ }
+
/* Can we throw this on the zone full list? */
if (bucket != NULL) {
#ifdef UMA_DEBUG_ALLOC
@@ -2751,7 +2852,7 @@ zfree_start:
/* ub_cnt is pointing to the last free item */
KASSERT(bucket->ub_cnt != 0,
("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
- LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
+ LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
}
/* We are no longer associated with this CPU. */
@@ -2773,7 +2874,8 @@ zfree_start:
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
- if (cache->uc_freebucket == NULL) {
+ if (cache->uc_freebucket == NULL &&
+ (domain == UMA_ANYDOMAIN || domain == PCPU_GET(domain))) {
cache->uc_freebucket = bucket;
goto zfree_start;
}
@@ -2798,18 +2900,20 @@ zfree_item:
static void
slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
{
+ uma_domain_t dom;
uint8_t freei;
mtx_assert(&keg->uk_lock, MA_OWNED);
MPASS(keg == slab->us_keg);
+ dom = &keg->uk_domain[slab->us_domain];
/* Do we need to remove from any lists? */
if (slab->us_freecount+1 == keg->uk_ipers) {
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-projects
mailing list