svn commit: r356081 - head/sys/vm
Jeff Roberson
jeff at FreeBSD.org
Wed Dec 25 20:57:25 UTC 2019
Author: jeff
Date: Wed Dec 25 20:57:24 2019
New Revision: 356081
URL: https://svnweb.freebsd.org/changeset/base/356081
Log:
Further reduce the cacheline footprint of fast allocations by duplicating
the zone size and flags fields in the per-cpu caches. This allows fast
alloctions to proceed only touching the single per-cpu cacheline and
simplifies the common case when no ctor/dtor is specified.
Reviewed by: markj, rlibby
Differential Revision: https://reviews.freebsd.org/D22826
Modified:
head/sys/vm/uma_core.c
head/sys/vm/uma_int.h
Modified: head/sys/vm/uma_core.c
==============================================================================
--- head/sys/vm/uma_core.c Wed Dec 25 20:50:53 2019 (r356080)
+++ head/sys/vm/uma_core.c Wed Dec 25 20:57:24 2019 (r356081)
@@ -281,7 +281,6 @@ static uma_keg_t uma_kcreate(uma_zone_t zone, size_t s
uma_fini fini, int align, uint32_t flags);
static int zone_import(void *, void **, int, int, int);
static void zone_release(void *, void **, int);
-static void uma_zero_item(void *, uma_zone_t);
static bool cache_alloc(uma_zone_t, uma_cache_t, void *, int);
static bool cache_free(uma_zone_t, uma_cache_t, void *, void *, int);
@@ -2183,6 +2182,17 @@ zone_count(uma_zone_t zone, void *arg)
zone->uz_namecnt + 1);
}
+static void
+zone_update_caches(uma_zone_t zone)
+{
+ int i;
+
+ for (i = 0; i <= mp_maxid; i++) {
+ cache_set_uz_size(&zone->uz_cpu[i], zone->uz_size);
+ cache_set_uz_flags(&zone->uz_cpu[i], zone->uz_flags);
+ }
+}
+
/*
* Zone header ctor. This initializes all fields, locks, etc.
*
@@ -2228,7 +2238,7 @@ zone_ctor(void *mem, int size, void *udata, int flags)
#ifdef INVARIANTS
if (arg->uminit == trash_init && arg->fini == trash_fini)
- zone->uz_flags |= UMA_ZFLAG_TRASH;
+ zone->uz_flags |= UMA_ZFLAG_TRASH | UMA_ZFLAG_CTORDTOR;
#endif
/*
@@ -2327,6 +2337,9 @@ out:
else
zone->uz_bucket_size = bucket_select(zone->uz_size);
zone->uz_bucket_size_min = zone->uz_bucket_size;
+ if (zone->uz_dtor != NULL || zone->uz_ctor != NULL)
+ zone->uz_flags |= UMA_ZFLAG_CTORDTOR;
+ zone_update_caches(zone);
return (0);
}
@@ -2801,8 +2814,14 @@ uma_zfree_pcpu_arg(uma_zone_t zone, void *item, void *
uma_zfree_arg(zone, item, udata);
}
+#ifdef INVARIANTS
+#define UMA_ALWAYS_CTORDTOR 1
+#else
+#define UMA_ALWAYS_CTORDTOR 0
+#endif
+
static void *
-item_ctor(uma_zone_t zone, void *udata, int flags, void *item)
+item_ctor(uma_zone_t zone, int size, void *udata, int flags, void *item)
{
#ifdef INVARIANTS
bool skipdbg;
@@ -2810,10 +2829,10 @@ item_ctor(uma_zone_t zone, void *udata, int flags, voi
skipdbg = uma_dbg_zskip(zone, item);
if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 &&
zone->uz_ctor != trash_ctor)
- trash_ctor(item, zone->uz_size, udata, flags);
+ trash_ctor(item, size, udata, flags);
#endif
if (__predict_false(zone->uz_ctor != NULL) &&
- zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
+ zone->uz_ctor(item, size, udata, flags) != 0) {
counter_u64_add(zone->uz_fails, 1);
zone_free_item(zone, item, udata, SKIP_DTOR | SKIP_CNT);
return (NULL);
@@ -2823,13 +2842,14 @@ item_ctor(uma_zone_t zone, void *udata, int flags, voi
uma_dbg_alloc(zone, NULL, item);
#endif
if (flags & M_ZERO)
- uma_zero_item(item, zone);
+ bzero(item, size);
return (item);
}
static inline void
-item_dtor(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
+item_dtor(uma_zone_t zone, void *item, int size, void *udata,
+ enum zfreeskip skip)
{
#ifdef INVARIANTS
bool skipdbg;
@@ -2842,13 +2862,13 @@ item_dtor(uma_zone_t zone, void *item, void *udata, en
uma_dbg_free(zone, NULL, item);
}
#endif
- if (skip < SKIP_DTOR) {
+ if (__predict_true(skip < SKIP_DTOR)) {
if (zone->uz_dtor != NULL)
- zone->uz_dtor(item, zone->uz_size, udata);
+ zone->uz_dtor(item, size, udata);
#ifdef INVARIANTS
if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 &&
zone->uz_dtor != trash_dtor)
- trash_dtor(item, zone->uz_size, udata);
+ trash_dtor(item, size, udata);
#endif
}
}
@@ -2860,7 +2880,7 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags
uma_cache_bucket_t bucket;
uma_cache_t cache;
void *item;
- int cpu, domain;
+ int domain, size, uz_flags;
/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
@@ -2869,16 +2889,21 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags
CTR4(KTR_UMA, "uma_zalloc_arg thread %x zone %s(%p) flags %d",
curthread, zone->uz_name, zone, flags);
+#ifdef WITNESS
if (flags & M_WAITOK) {
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
"uma_zalloc_arg: zone \"%s\"", zone->uz_name);
}
+#endif
+
+#ifdef INVARIANTS
KASSERT((flags & M_EXEC) == 0, ("uma_zalloc_arg: called with M_EXEC"));
KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
("uma_zalloc_arg: called with spinlock or critical section held"));
if (zone->uz_flags & UMA_ZONE_PCPU)
KASSERT((flags & M_ZERO) == 0, ("allocating from a pcpu zone "
"with M_ZERO passed"));
+#endif
#ifdef DEBUG_MEMGUARD
if (memguard_cmp_zone(zone)) {
@@ -2912,13 +2937,19 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags
*/
critical_enter();
do {
- cpu = curcpu;
- cache = &zone->uz_cpu[cpu];
+ cache = &zone->uz_cpu[curcpu];
bucket = &cache->uc_allocbucket;
+ size = cache_uz_size(cache);
+ uz_flags = cache_uz_flags(cache);
if (__predict_true(bucket->ucb_cnt != 0)) {
item = cache_bucket_pop(cache, bucket);
critical_exit();
- return (item_ctor(zone, udata, flags, item));
+ if (__predict_false((uz_flags & UMA_ZFLAG_CTORDTOR) != 0 ||
+ UMA_ALWAYS_CTORDTOR))
+ return (item_ctor(zone, size, udata, flags, item));
+ if (flags & M_ZERO)
+ bzero(item, size);
+ return (item);
}
} while (cache_alloc(zone, cache, udata, flags));
critical_exit();
@@ -2926,7 +2957,7 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags
/*
* We can not get a bucket so try to return a single item.
*/
- if (zone->uz_flags & UMA_ZONE_NUMA)
+ if (uz_flags & UMA_ZONE_NUMA)
domain = PCPU_GET(domain);
else
domain = UMA_ANYDOMAIN;
@@ -2945,7 +2976,7 @@ cache_alloc(uma_zone_t zone, uma_cache_t cache, void *
{
uma_zone_domain_t zdom;
uma_bucket_t bucket;
- int cpu, domain;
+ int domain;
bool lockfail;
CRITICAL_ASSERT(curthread);
@@ -2988,8 +3019,7 @@ cache_alloc(uma_zone_t zone, uma_cache_t cache, void *
if (zone->uz_bucket_size == 0 || bucketdisable)
return (false);
- cpu = curcpu;
- cache = &zone->uz_cpu[cpu];
+ cache = &zone->uz_cpu[curcpu];
/* See if we lost the race to fill the cache. */
if (cache->uc_allocbucket.ucb_bucket != NULL) {
@@ -3040,8 +3070,7 @@ cache_alloc(uma_zone_t zone, uma_cache_t cache, void *
* initialized bucket to make this less likely or claim
* the memory directly.
*/
- cpu = curcpu;
- cache = &zone->uz_cpu[cpu];
+ cache = &zone->uz_cpu[curcpu];
if (cache->uc_allocbucket.ucb_bucket == NULL &&
((zone->uz_flags & UMA_ZONE_NUMA) == 0 ||
domain == PCPU_GET(domain))) {
@@ -3437,7 +3466,7 @@ zone_alloc_item_locked(uma_zone_t zone, void *udata, i
goto fail_cnt;
}
}
- item = item_ctor(zone, udata, flags, item);
+ item = item_ctor(zone, zone->uz_size, udata, flags, item);
if (item == NULL)
goto fail;
@@ -3467,7 +3496,7 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata
{
uma_cache_t cache;
uma_cache_bucket_t bucket;
- int cpu, domain, itemdomain;
+ int domain, itemdomain, uz_flags;
/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
@@ -3491,14 +3520,26 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata
return;
}
#endif
- item_dtor(zone, item, udata, SKIP_NONE);
/*
+ * We are accessing the per-cpu cache without a critical section to
+ * fetch size and flags. This is acceptable, if we are preempted we
+ * will simply read another cpu's line.
+ */
+ cache = &zone->uz_cpu[curcpu];
+ uz_flags = cache_uz_flags(cache);
+ if (__predict_false((uz_flags & UMA_ZFLAG_CTORDTOR) != 0 ||
+ UMA_ALWAYS_CTORDTOR))
+ item_dtor(zone, item, cache_uz_size(cache), udata, SKIP_NONE);
+
+ /*
* The race here is acceptable. If we miss it we'll just have to wait
* a little longer for the limits to be reset.
*/
- if (zone->uz_sleepers > 0)
- goto zfree_item;
+ if (__predict_false(uz_flags & UMA_ZFLAG_LIMIT)) {
+ if (zone->uz_sleepers > 0)
+ goto zfree_item;
+ }
/*
* If possible, free to the per-CPU cache. There are two
@@ -3514,16 +3555,14 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata
domain = itemdomain = 0;
critical_enter();
do {
- cpu = curcpu;
- cache = &zone->uz_cpu[cpu];
+ cache = &zone->uz_cpu[curcpu];
bucket = &cache->uc_allocbucket;
#ifdef UMA_XDOMAIN
- if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) {
+ if ((uz_flags & UMA_ZONE_NUMA) != 0) {
itemdomain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
domain = PCPU_GET(domain);
}
- if ((zone->uz_flags & UMA_ZONE_NUMA) != 0 &&
- domain != itemdomain) {
+ if ((uz_flags & UMA_ZONE_NUMA) != 0 && domain != itemdomain) {
bucket = &cache->uc_crossbucket;
} else
#endif
@@ -3615,15 +3654,14 @@ cache_free(uma_zone_t zone, uma_cache_t cache, void *u
int itemdomain)
{
uma_bucket_t bucket;
- int cpu, domain;
+ int domain;
CRITICAL_ASSERT(curthread);
if (zone->uz_bucket_size == 0 || bucketdisable)
return false;
- cpu = curcpu;
- cache = &zone->uz_cpu[cpu];
+ cache = &zone->uz_cpu[curcpu];
/*
* NUMA domains need to free to the correct zdom. When XDOMAIN
@@ -3660,8 +3698,7 @@ cache_free(uma_zone_t zone, uma_cache_t cache, void *u
critical_enter();
if (bucket == NULL)
return (false);
- cpu = curcpu;
- cache = &zone->uz_cpu[cpu];
+ cache = &zone->uz_cpu[curcpu];
#ifdef UMA_XDOMAIN
/*
* Check to see if we should be populating the cross bucket. If it
@@ -3783,7 +3820,7 @@ static void
zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
{
- item_dtor(zone, item, udata, skip);
+ item_dtor(zone, item, zone->uz_size, udata, skip);
if (skip < SKIP_FINI && zone->uz_fini)
zone->uz_fini(item, zone->uz_size);
@@ -3819,6 +3856,8 @@ uma_zone_set_max(uma_zone_t zone, int nitems)
if (zone->uz_bucket_size_min > zone->uz_bucket_size_max)
zone->uz_bucket_size_min = zone->uz_bucket_size_max;
zone->uz_max_items = nitems;
+ zone->uz_flags |= UMA_ZFLAG_LIMIT;
+ zone_update_caches(zone);
ZONE_UNLOCK(zone);
return (nitems);
@@ -4086,7 +4125,9 @@ uma_zone_reserve_kva(uma_zone_t zone, int count)
#else
keg->uk_allocf = noobj_alloc;
#endif
- keg->uk_flags |= UMA_ZONE_NOFREE;
+ keg->uk_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE;
+ zone->uz_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE;
+ zone_update_caches(zone);
ZONE_UNLOCK(zone);
return (1);
@@ -4231,13 +4272,6 @@ int
uma_zone_exhausted_nolock(uma_zone_t zone)
{
return (zone->uz_sleepers > 0);
-}
-
-static void
-uma_zero_item(void *item, uma_zone_t zone)
-{
-
- bzero(item, zone->uz_size);
}
unsigned long
Modified: head/sys/vm/uma_int.h
==============================================================================
--- head/sys/vm/uma_int.h Wed Dec 25 20:50:53 2019 (r356080)
+++ head/sys/vm/uma_int.h Wed Dec 25 20:57:24 2019 (r356081)
@@ -218,6 +218,39 @@ typedef struct uma_cache * uma_cache_t;
LIST_HEAD(slabhead, uma_slab);
/*
+ * The cache structure pads perfectly into 64 bytes so we use spare
+ * bits from the embedded cache buckets to store information from the zone
+ * and keep all fast-path allocations accessing a single per-cpu line.
+ */
+static inline void
+cache_set_uz_flags(uma_cache_t cache, uint32_t flags)
+{
+
+ cache->uc_freebucket.ucb_spare = flags;
+}
+
+static inline void
+cache_set_uz_size(uma_cache_t cache, uint32_t size)
+{
+
+ cache->uc_allocbucket.ucb_spare = size;
+}
+
+static inline uint32_t
+cache_uz_flags(uma_cache_t cache)
+{
+
+ return (cache->uc_freebucket.ucb_spare);
+}
+
+static inline uint32_t
+cache_uz_size(uma_cache_t cache)
+{
+
+ return (cache->uc_allocbucket.ucb_spare);
+}
+
+/*
* Per-domain slab lists. Embedded in the kegs.
*/
struct uma_domain {
@@ -442,6 +475,8 @@ struct uma_zone {
/*
* These flags must not overlap with the UMA_ZONE flags specified in uma.h.
*/
+#define UMA_ZFLAG_CTORDTOR 0x01000000 /* Zone has ctor/dtor set. */
+#define UMA_ZFLAG_LIMIT 0x02000000 /* Zone has limit set. */
#define UMA_ZFLAG_CACHE 0x04000000 /* uma_zcache_create()d it */
#define UMA_ZFLAG_RECLAIMING 0x08000000 /* Running zone_reclaim(). */
#define UMA_ZFLAG_BUCKET 0x10000000 /* Bucket zone. */
@@ -459,6 +494,8 @@ struct uma_zone {
"\35BUCKET" \
"\34RECLAIMING" \
"\33CACHE" \
+ "\32LIMIT" \
+ "\31CTORDTOR" \
"\22MINBUCKET" \
"\21NUMA" \
"\20PCPU" \
More information about the svn-src-head
mailing list