svn commit: r329894 - user/jeff/numa/sys/vm
Jeff Roberson
jeff at FreeBSD.org
Sat Feb 24 02:52:41 UTC 2018
Author: jeff
Date: Sat Feb 24 02:52:39 2018
New Revision: 329894
URL: https://svnweb.freebsd.org/changeset/base/329894
Log:
Fine grain lock reservations. This permits us to free to a reservation
without the domain free lock held. It further reduces the scope of the free
lock so that it now only protects the queues and allocation from the free count.
Modified:
user/jeff/numa/sys/vm/vm_kern.c
user/jeff/numa/sys/vm/vm_page.c
user/jeff/numa/sys/vm/vm_pageout.c
user/jeff/numa/sys/vm/vm_reserv.c
user/jeff/numa/sys/vm/vm_reserv.h
Modified: user/jeff/numa/sys/vm/vm_kern.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_kern.c Sat Feb 24 02:08:18 2018 (r329893)
+++ user/jeff/numa/sys/vm/vm_kern.c Sat Feb 24 02:52:39 2018 (r329894)
@@ -519,7 +519,9 @@ _kmem_unback(vm_object_t object, vm_offset_t addr, vm_
for (; offset < end; offset += PAGE_SIZE, m = next) {
next = vm_page_next(m);
vm_page_unwire(m, PQ_NONE);
+ vm_page_lock(m);
vm_page_free(m);
+ vm_page_unlock(m);
}
VM_OBJECT_WUNLOCK(object);
Modified: user/jeff/numa/sys/vm/vm_page.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_page.c Sat Feb 24 02:08:18 2018 (r329893)
+++ user/jeff/numa/sys/vm/vm_page.c Sat Feb 24 02:52:39 2018 (r329894)
@@ -1758,9 +1758,12 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pind
again:
m = NULL;
#if VM_NRESERVLEVEL > 0
+ /*
+ * Can we allocate the page from a reservation?
+ */
if (vm_object_reserv(object) &&
- (m = vm_reserv_extend(req, object, pindex, domain, mpred))
- != NULL) {
+ ((m = vm_reserv_extend(req, object, pindex, domain, mpred)) != NULL ||
+ (m = vm_reserv_alloc_page(req, object, pindex, domain, mpred)) != NULL)) {
domain = vm_phys_domain(m);
vmd = VM_DOMAIN(domain);
goto found;
@@ -1776,32 +1779,18 @@ again:
vm_domain_free_lock(vmd);
if (vm_domain_available(vmd, req, 1)) {
/*
- * Can we allocate the page from a reservation?
+ * If not, allocate it from the free page queues.
*/
-#if VM_NRESERVLEVEL > 0
- if (!vm_object_reserv(object) ||
- (m = vm_reserv_alloc_page(object, pindex,
- domain, mpred)) == NULL)
-#endif
- {
- /*
- * If not, allocate it from the free page queues.
- */
- m = vm_phys_alloc_pages(domain, object != NULL ?
- VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
-#if VM_NRESERVLEVEL > 0
- if (m == NULL && vm_reserv_reclaim_inactive(domain)) {
- m = vm_phys_alloc_pages(domain,
- object != NULL ?
- VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT,
- 0);
- }
-#endif
- }
+ m = vm_phys_alloc_pages(domain, object != NULL ?
+ VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
+ if (m != NULL)
+ vm_domain_freecnt_dec(vmd, 1);
}
- if (m != NULL)
- vm_domain_freecnt_dec(vmd, 1);
vm_domain_free_unlock(vmd);
+#if VM_NRESERVLEVEL > 0
+ if (m == NULL && vm_reserv_reclaim_inactive(domain))
+ goto again;
+#endif
if (m == NULL) {
/*
* Not allocatable, give up.
@@ -1973,9 +1962,14 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pin
*/
again:
#if VM_NRESERVLEVEL > 0
+ /*
+ * Can we allocate the pages from a reservation?
+ */
if (vm_object_reserv(object) &&
- (m_ret = vm_reserv_extend_contig(req, object, pindex, domain,
- npages, low, high, alignment, boundary, mpred)) != NULL) {
+ ((m_ret = vm_reserv_extend_contig(req, object, pindex, domain,
+ npages, low, high, alignment, boundary, mpred)) != NULL ||
+ (m_ret = vm_reserv_alloc_contig(req, object, pindex, domain,
+ npages, low, high, alignment, boundary, mpred)) != NULL)) {
domain = vm_phys_domain(m_ret);
vmd = VM_DOMAIN(domain);
goto found;
@@ -1986,28 +1980,20 @@ again:
vm_domain_free_lock(vmd);
if (vm_domain_available(vmd, req, npages)) {
/*
- * Can we allocate the pages from a reservation?
+ * allocate them from the free page queues.
*/
-#if VM_NRESERVLEVEL > 0
-retry:
- if (!vm_object_reserv(object) ||
- (m_ret = vm_reserv_alloc_contig(object, pindex, domain,
- npages, low, high, alignment, boundary, mpred)) == NULL)
-#endif
- /*
- * If not, allocate them from the free page queues.
- */
- m_ret = vm_phys_alloc_contig(domain, npages, low, high,
- alignment, boundary);
-#if VM_NRESERVLEVEL > 0
- if (m_ret == NULL && vm_reserv_reclaim_contig(
- domain, npages, low, high, alignment, boundary))
- goto retry;
-#endif
+ m_ret = vm_phys_alloc_contig(domain, npages, low, high,
+ alignment, boundary);
+ if (m_ret != NULL)
+ vm_domain_freecnt_dec(vmd, npages);
}
- if (m_ret != NULL)
- vm_domain_freecnt_dec(vmd, npages);
vm_domain_free_unlock(vmd);
+#if VM_NRESERVLEVEL > 0
+ if (m_ret == NULL &&
+ vm_reserv_reclaim_contig(domain, npages, low, high, alignment,
+ boundary))
+ goto again;
+#endif
if (m_ret == NULL) {
if (vm_domain_alloc_fail(vmd, object, req))
goto again;
@@ -2222,10 +2208,6 @@ vm_page_release(void *arg, void **store, int cnt)
vm_domain_free_lock(vmd);
for (i = 0; i < cnt; i++) {
m = (vm_page_t)store[i];
-#if VM_NRESERVLEVEL > 0
- KASSERT(vm_reserv_free_page(m) == false,
- ("vm_page_release: Cached page belonged to reservation."));
-#endif
vm_phys_free_pages(m, 0);
}
vm_domain_free_unlock(vmd);
@@ -2595,15 +2577,18 @@ retry:
KASSERT(m->dirty == 0,
("page %p is dirty", m));
}
- SLIST_INSERT_HEAD(&free, m, plinks.s.ss);
+#if VM_NRESERVLEVEL > 0
+ if (!vm_reserv_free_page(m))
+#endif
+ SLIST_INSERT_HEAD(&free, m,
+ plinks.s.ss);
} else
error = EBUSY;
unlock:
VM_OBJECT_WUNLOCK(object);
} else {
MPASS(vm_phys_domain(m) == domain);
- vmd = VM_DOMAIN(domain);
- vm_domain_free_lock(vmd);
+ vm_page_lock(m);
order = m->order;
if (order < VM_NFREEORDER) {
/*
@@ -2620,7 +2605,7 @@ unlock:
else if (vm_reserv_is_page_free(m))
order = 0;
#endif
- vm_domain_free_unlock(vmd);
+ vm_page_unlock(m);
if (order == VM_NFREEORDER)
error = EINVAL;
}
@@ -3278,6 +3263,10 @@ vm_page_free_prep(vm_page_t m, bool pagequeue_locked)
*/
if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT)
pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
+#if VM_NRESERVLEVEL > 0
+ if (vm_reserv_free_page(m))
+ return (false);
+#endif
return (true);
}
Modified: user/jeff/numa/sys/vm/vm_pageout.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_pageout.c Sat Feb 24 02:08:18 2018 (r329893)
+++ user/jeff/numa/sys/vm/vm_pageout.c Sat Feb 24 02:52:39 2018 (r329894)
@@ -1107,16 +1107,14 @@ vm_pageout_pglist_init(struct pgo_pglist *pglist)
pglist->count = 0;
}
-static bool
+static void
vm_pageout_pglist_append(struct pgo_pglist *pglist, vm_page_t m)
{
if (vm_page_free_prep(m, false)) {
m->flags &= ~PG_ZERO;
TAILQ_INSERT_TAIL(&pglist->pgl, m, listq);
pglist->count++;
- return (true);
}
- return (false);
}
static void
@@ -1139,11 +1137,10 @@ vm_pageout_free_pages(struct pgo_pglist *pglist, vm_ob
int pcount, count;
pcount = MAX(object->iosize / PAGE_SIZE, 1);
+ mtx = vm_page_lockptr(m);
count = 1;
- if (pcount == 1 || vm_object_reserv(object)) {
- vm_page_free(m);
- vm_page_unlock(m);
- VM_OBJECT_WUNLOCK(object);
+ if (pcount == 1) {
+ vm_pageout_pglist_append(pglist, m);
goto out;
}
@@ -1153,7 +1150,6 @@ vm_pageout_free_pages(struct pgo_pglist *pglist, vm_ob
p = pp);
/* Free the original page so we don't validate it twice. */
- mtx = vm_page_lockptr(m);
if (p == m)
p = vm_page_next(m);
vm_pageout_pglist_append(pglist, m);
@@ -1184,13 +1180,13 @@ vm_pageout_free_pages(struct pgo_pglist *pglist, vm_ob
if (m->dirty)
continue;
free_page:
- if (vm_pageout_pglist_append(pglist, m))
- count++;
+ vm_pageout_pglist_append(pglist, m);
+ count++;
}
+out:
mtx_unlock(mtx);
VM_OBJECT_WUNLOCK(object);
vm_pageout_pglist_flush(pglist, false);
-out:
VM_CNT_ADD(v_dfree, count);
return (count);
Modified: user/jeff/numa/sys/vm/vm_reserv.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_reserv.c Sat Feb 24 02:08:18 2018 (r329893)
+++ user/jeff/numa/sys/vm/vm_reserv.c Sat Feb 24 02:52:39 2018 (r329894)
@@ -45,6 +45,8 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
+#include <sys/counter.h>
+#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
@@ -167,22 +169,35 @@ popmap_is_set(popmap_t popmap[], int i)
*
* A partially populated reservation can be broken and reclaimed at any time.
*
- * f - vm_domain_free_lock
+ * r - vm_reserv_lock
+ * d - vm_reserv_domain_lock
* o - vm_reserv_object_lock
* c - constant after boot
*/
struct vm_reserv {
- TAILQ_ENTRY(vm_reserv) partpopq; /* (f) per-domain queue. */
- LIST_ENTRY(vm_reserv) objq; /* (o, f) object queue */
- vm_object_t object; /* (o, f) containing object */
- vm_pindex_t pindex; /* (o, f) offset in object */
+ TAILQ_ENTRY(vm_reserv) partpopq; /* (d) per-domain queue. */
+ LIST_ENTRY(vm_reserv) objq; /* (o, r) object queue */
+ vm_object_t object; /* (o, r) containing object */
+ vm_pindex_t pindex; /* (o, r) offset in object */
vm_page_t pages; /* (c) first page */
int domain; /* (c) NUMA domain. */
- int popcnt; /* (f) # of pages in use */
- char inpartpopq; /* (f) */
- popmap_t popmap[NPOPMAP]; /* (f) bit vector, used pages */
+ int popcnt; /* (r) # of pages in use */
+ char inpartpopq; /* (d) */
+ popmap_t popmap[NPOPMAP]; /* (r) bit vector, used pages */
};
+#define vm_reserv_assert_locked(rv) vm_page_assert_locked((rv)->pages)
+#define vm_reserv_lockptr(rv) vm_page_lockptr((rv)->pages)
+#define vm_reserv_lock(rv) vm_page_lock((rv)->pages)
+#define vm_reserv_trylock(rv) vm_page_trylock((rv)->pages)
+#define vm_reserv_unlock(rv) vm_page_unlock((rv)->pages)
+
+static struct mtx_padalign vm_reserv_domain_locks[MAXMEMDOM];
+
+#define vm_reserv_domain_lockptr(d) &vm_reserv_domain_locks[(d)]
+#define vm_reserv_domain_lock(d) mtx_lock(vm_reserv_domain_lockptr(d))
+#define vm_reserv_domain_unlock(d) mtx_unlock(vm_reserv_domain_lockptr(d))
+
/*
* The reservation array
*
@@ -219,13 +234,13 @@ static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop[MAXMEMDO
static SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD, 0, "Reservation Info");
-static long vm_reserv_broken;
-SYSCTL_LONG(_vm_reserv, OID_AUTO, broken, CTLFLAG_RD,
- &vm_reserv_broken, 0, "Cumulative number of broken reservations");
+static counter_u64_t vm_reserv_broken = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_vm_reserv, OID_AUTO, broken, CTLFLAG_RD,
+ &vm_reserv_broken, "Cumulative number of broken reservations");
-static long vm_reserv_freed;
-SYSCTL_LONG(_vm_reserv, OID_AUTO, freed, CTLFLAG_RD,
- &vm_reserv_freed, 0, "Cumulative number of freed reservations");
+static counter_u64_t vm_reserv_freed = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_vm_reserv, OID_AUTO, freed, CTLFLAG_RD,
+ &vm_reserv_freed, "Cumulative number of freed reservations");
static int sysctl_vm_reserv_fullpop(SYSCTL_HANDLER_ARGS);
@@ -237,9 +252,9 @@ static int sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_AR
SYSCTL_OID(_vm_reserv, OID_AUTO, partpopq, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
sysctl_vm_reserv_partpopq, "A", "Partially populated reservation queues");
-static long vm_reserv_reclaimed;
-SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
- &vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations");
+static counter_u64_t vm_reserv_reclaimed = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
+ &vm_reserv_reclaimed, "Cumulative number of reclaimed reservations");
/*
* The object lock pool is used to synchronize the rvq. We can not use a
@@ -314,12 +329,12 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
counter = 0;
unused_pages = 0;
- vm_domain_free_lock(VM_DOMAIN(domain));
+ vm_reserv_domain_lock(domain);
TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) {
counter++;
unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
}
- vm_domain_free_unlock(VM_DOMAIN(domain));
+ vm_reserv_domain_unlock(domain);
sbuf_printf(&sbuf, "%6d, %7d, %6dK, %6d\n",
domain, level,
unused_pages * ((int)PAGE_SIZE / 1024), counter);
@@ -338,6 +353,9 @@ vm_reserv_remove(vm_reserv_t rv)
{
vm_object_t object;
+ vm_reserv_assert_locked(rv);
+ CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
+ __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
KASSERT(rv->object != NULL,
("vm_reserv_remove: reserv %p is free", rv));
KASSERT(!rv->inpartpopq,
@@ -357,6 +375,11 @@ vm_reserv_insert(vm_reserv_t rv, vm_object_t object, v
{
int i;
+ vm_reserv_assert_locked(rv);
+ CTR6(KTR_VM,
+ "%s: rv %p(%p) object %p new %p popcnt %d",
+ __FUNCTION__, rv, rv->pages, rv->object, object,
+ rv->popcnt);
KASSERT(rv->object == NULL,
("vm_reserv_insert: reserv %p isn't free", rv));
KASSERT(rv->popcnt == 0,
@@ -378,14 +401,15 @@ vm_reserv_insert(vm_reserv_t rv, vm_object_t object, v
* becomes zero, the reservation is destroyed. Additionally, moves the
* reservation to the tail of the partially populated reservation queue if the
* population count is non-zero.
- *
- * The free page queue lock must be held.
*/
static void
vm_reserv_depopulate(vm_reserv_t rv, int index)
{
+ struct vm_domain *vmd;
- vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
+ vm_reserv_assert_locked(rv);
+ CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
+ __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
KASSERT(rv->object != NULL,
("vm_reserv_depopulate: reserv %p is free", rv));
KASSERT(popmap_is_set(rv->popmap, index),
@@ -396,10 +420,7 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
("vm_reserv_depopulate: reserv %p's domain is corrupted %d",
rv, rv->domain));
- if (rv->inpartpopq) {
- TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
- rv->inpartpopq = FALSE;
- } else {
+ if (rv->popcnt == VM_LEVEL_0_NPAGES) {
KASSERT(rv->pages->psind == 1,
("vm_reserv_depopulate: reserv %p is already demoted",
rv));
@@ -407,14 +428,25 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
}
popmap_clear(rv->popmap, index);
rv->popcnt--;
+ vm_reserv_domain_lock(rv->domain);
+ if (rv->inpartpopq) {
+ TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
+ rv->inpartpopq = FALSE;
+ }
+ if (rv->popcnt != 0) {
+ rv->inpartpopq = TRUE;
+ TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
+ }
+ vm_reserv_domain_unlock(rv->domain);
+ vmd = VM_DOMAIN(rv->domain);
if (rv->popcnt == 0) {
vm_reserv_remove(rv);
+ vm_domain_free_lock(vmd);
vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER);
- vm_reserv_freed++;
- } else {
- rv->inpartpopq = TRUE;
- TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
+ vm_domain_free_unlock(vmd);
+ counter_u64_add(vm_reserv_freed, 1);
}
+ vm_domain_freecnt_inc(vmd, 1);
}
/*
@@ -423,8 +455,20 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
static __inline vm_reserv_t
vm_reserv_from_page(vm_page_t m)
{
+ vm_reserv_t rv;
- return (&vm_reserv_array[VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT]);
+ rv = &vm_reserv_array[VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT];
+#if 0
+ if (rv->pages == NULL)
+ panic("vm_reserv_from_page: Bad reservation %p page %p phys %p segind %d start %p end %p first page %p domain %d\n",
+ rv, m, (void *)m->phys_addr, m->segind,
+ (void *)vm_phys_segs[m->segind].start,
+ (void *)vm_phys_segs[m->segind].end,
+ vm_phys_segs[m->segind].first_page,
+ vm_phys_segs[m->segind].domain);
+#endif
+
+ return (rv);
}
/*
@@ -485,7 +529,9 @@ static void
vm_reserv_populate(vm_reserv_t rv, int index)
{
- vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
+ vm_reserv_assert_locked(rv);
+ CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
+ __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
KASSERT(rv->object != NULL,
("vm_reserv_populate: reserv %p is free", rv));
KASSERT(popmap_is_clear(rv->popmap, index),
@@ -498,17 +544,23 @@ vm_reserv_populate(vm_reserv_t rv, int index)
KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
("vm_reserv_populate: reserv %p's domain is corrupted %d",
rv, rv->domain));
+ popmap_set(rv->popmap, index);
+ rv->popcnt++;
+ vm_reserv_domain_lock(rv->domain);
if (rv->inpartpopq) {
TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
rv->inpartpopq = FALSE;
}
- popmap_set(rv->popmap, index);
- rv->popcnt++;
if (rv->popcnt < VM_LEVEL_0_NPAGES) {
rv->inpartpopq = TRUE;
TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
- } else
+ } else {
+ KASSERT(rv->pages->psind == 0,
+ ("vm_reserv_populate: reserv %p is already promoted",
+ rv));
rv->pages->psind = 1;
+ }
+ vm_reserv_domain_unlock(rv->domain);
}
/*
@@ -578,31 +630,34 @@ vm_reserv_extend_contig(int req, vm_object_t object, v
return (NULL);
domain = rv->domain;
vmd = VM_DOMAIN(domain);
- vm_domain_free_lock(vmd);
- if (rv->object != object || !vm_domain_available(vmd, req, npages)) {
- m = NULL;
+ vm_reserv_lock(rv);
+ if (rv->object != object)
goto out;
- }
m = &rv->pages[index];
pa = VM_PAGE_TO_PHYS(m);
if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 ||
- ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) {
- m = NULL;
+ ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0)
goto out;
- }
/* Handle vm_page_rename(m, new_object, ...). */
for (i = 0; i < npages; i++) {
- if (popmap_is_set(rv->popmap, index + i)) {
- m = NULL;
+ if (popmap_is_set(rv->popmap, index + i))
goto out;
- }
}
- for (i = 0; i < npages; i++)
- vm_reserv_populate(rv, index + i);
+ vm_domain_free_lock(vmd);
+ if (!vm_domain_available(vmd, req, npages)) {
+ vm_domain_free_unlock(vmd);
+ goto out;
+ }
vm_domain_freecnt_dec(vmd, npages);
-out:
vm_domain_free_unlock(vmd);
+ for (i = 0; i < npages; i++)
+ vm_reserv_populate(rv, index + i);
+ vm_reserv_unlock(rv);
return (m);
+
+out:
+ vm_reserv_unlock(rv);
+ return (NULL);
}
/*
@@ -621,10 +676,11 @@ out:
* The object and free page queue must be locked.
*/
vm_page_t
-vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
+vm_reserv_alloc_contig(int req, vm_object_t object, vm_pindex_t pindex, int domain,
u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_page_t mpred)
{
+ struct vm_domain *vmd;
vm_paddr_t pa, size;
vm_page_t m, m_ret, msucc;
vm_pindex_t first, leftcap, rightcap;
@@ -632,7 +688,6 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t
u_long allocpages, maxpages, minpages;
int i, index, n;
- vm_domain_free_assert_locked(VM_DOMAIN(domain));
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0"));
@@ -740,8 +795,16 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t
* specified index may not be the first page within the first new
* reservation.
*/
- m = vm_phys_alloc_contig(domain, allocpages, low, high, ulmax(alignment,
- VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0);
+ m = NULL;
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_lock(vmd);
+ if (vm_domain_available(vmd, req, allocpages))
+ m = vm_phys_alloc_contig(domain, allocpages, low, high,
+ ulmax(alignment, VM_LEVEL_0_SIZE),
+ boundary > VM_LEVEL_0_SIZE ? boundary : 0);
+ if (m != NULL)
+ vm_domain_freecnt_dec(vmd, allocpages);
+ vm_domain_free_unlock(vmd);
if (m == NULL)
return (NULL);
KASSERT(vm_phys_domain(m) == domain,
@@ -760,6 +823,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t
KASSERT(rv->pages == m,
("vm_reserv_alloc_contig: reserv %p's pages is corrupted",
rv));
+ vm_reserv_lock(rv);
vm_reserv_insert(rv, object, first);
n = ulmin(VM_LEVEL_0_NPAGES - index, npages);
for (i = 0; i < n; i++)
@@ -769,6 +833,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t
m_ret = &rv->pages[index];
index = 0;
}
+ vm_reserv_unlock(rv);
m += VM_LEVEL_0_NPAGES;
first += VM_LEVEL_0_NPAGES;
allocpages -= VM_LEVEL_0_NPAGES;
@@ -816,18 +881,25 @@ vm_reserv_extend(int req, vm_object_t object, vm_pinde
vmd = VM_DOMAIN(domain);
index = VM_RESERV_INDEX(object, pindex);
m = &rv->pages[index];
- vm_domain_free_lock(vmd);
- if (vm_domain_available(vmd, req, 1) == 0 ||
- /* Handle reclaim race. */
- rv->object != object ||
+ vm_reserv_lock(rv);
+ /* Handle reclaim race. */
+ if (rv->object != object ||
/* Handle vm_page_rename(m, new_object, ...). */
- popmap_is_set(rv->popmap, index))
+ popmap_is_set(rv->popmap, index)) {
m = NULL;
+ goto out;
+ }
+ vm_domain_free_lock(vmd);
+ if (vm_domain_available(vmd, req, 1) == 0)
+ m = NULL;
+ else
+ vm_domain_freecnt_dec(vmd, 1);
+ vm_domain_free_unlock(vmd);
if (m != NULL) {
vm_reserv_populate(rv, index);
- vm_domain_freecnt_dec(vmd, 1);
}
- vm_domain_free_unlock(vmd);
+out:
+ vm_reserv_unlock(rv);
return (m);
}
@@ -841,15 +913,15 @@ vm_reserv_extend(int req, vm_object_t object, vm_pinde
* The object and free page queue must be locked.
*/
vm_page_t
-vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
+vm_reserv_alloc_page(int req, vm_object_t object, vm_pindex_t pindex, int domain,
vm_page_t mpred)
{
+ struct vm_domain *vmd;
vm_page_t m, msucc;
vm_pindex_t first, leftcap, rightcap;
vm_reserv_t rv;
int index;
- vm_domain_free_assert_locked(VM_DOMAIN(domain));
VM_OBJECT_ASSERT_WLOCKED(object);
/*
@@ -918,15 +990,27 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t p
/*
* Allocate and populate the new reservation.
*/
- m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER);
+ m = NULL;
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_lock(vmd);
+ if (vm_domain_available(vmd, req, VM_LEVEL_0_ORDER)) {
+ m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT,
+ VM_LEVEL_0_ORDER);
+ if (m != NULL)
+ vm_domain_freecnt_dec(vmd, 1);
+ }
+ vm_domain_free_unlock(vmd);
if (m == NULL)
return (NULL);
rv = vm_reserv_from_page(m);
+ vm_reserv_lock(rv);
KASSERT(rv->pages == m,
("vm_reserv_alloc_page: reserv %p's pages is corrupted", rv));
vm_reserv_insert(rv, object, first);
index = VM_RESERV_INDEX(object, pindex);
vm_reserv_populate(rv, index);
+ vm_reserv_unlock(rv);
+
return (&rv->pages[index]);
}
@@ -943,7 +1027,9 @@ vm_reserv_break(vm_reserv_t rv)
{
int begin_zeroes, hi, i, lo;
- vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
+ vm_reserv_assert_locked(rv);
+ CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
+ __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
vm_reserv_remove(rv);
rv->pages->psind = 0;
i = hi = 0;
@@ -982,12 +1068,14 @@ vm_reserv_break(vm_reserv_t rv)
if (i != NPOPMAP)
/* Convert from ffsl() to ordinary bit numbering. */
hi--;
+ vm_domain_free_lock(VM_DOMAIN(rv->domain));
vm_phys_free_contig(&rv->pages[begin_zeroes], NBPOPMAP * i +
hi - begin_zeroes);
+ vm_domain_free_unlock(VM_DOMAIN(rv->domain));
} while (i < NPOPMAP);
KASSERT(rv->popcnt == 0,
("vm_reserv_break: reserv %p's popcnt is corrupted", rv));
- vm_reserv_broken++;
+ counter_u64_add(vm_reserv_broken, 1);
}
/*
@@ -997,7 +1085,6 @@ void
vm_reserv_break_all(vm_object_t object)
{
vm_reserv_t rv;
- struct vm_domain *vmd;
/*
* This access of object->rvq is unsynchronized so that the
@@ -1006,27 +1093,22 @@ vm_reserv_break_all(vm_object_t object)
* lock prevents new additions, so we are guaranteed that when
* it returns NULL the object is properly empty.
*/
- vmd = NULL;
while ((rv = LIST_FIRST(&object->rvq)) != NULL) {
- if (vmd != VM_DOMAIN(rv->domain)) {
- if (vmd != NULL)
- vm_domain_free_unlock(vmd);
- vmd = VM_DOMAIN(rv->domain);
- vm_domain_free_lock(vmd);
- }
+ vm_reserv_lock(rv);
/* Reclaim race. */
- if (rv->object != object)
+ if (rv->object != object) {
+ vm_reserv_unlock(rv);
continue;
- KASSERT(rv->object == object,
- ("vm_reserv_break_all: reserv %p is corrupted", rv));
+ }
+ vm_reserv_domain_lock(rv->domain);
if (rv->inpartpopq) {
TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
rv->inpartpopq = FALSE;
}
+ vm_reserv_domain_unlock(rv->domain);
vm_reserv_break(rv);
+ vm_reserv_unlock(rv);
}
- if (vmd != NULL)
- vm_domain_free_unlock(vmd);
}
/*
@@ -1043,7 +1125,6 @@ vm_reserv_free_page(vm_page_t m)
rv = vm_reserv_from_page(m);
if (rv->object == NULL)
return (FALSE);
- vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
vm_reserv_depopulate(rv, m - rv->pages);
return (TRUE);
}
@@ -1076,8 +1157,11 @@ vm_reserv_init(void)
paddr += VM_LEVEL_0_SIZE;
}
}
- for (i = 0; i < MAXMEMDOM; i++)
+ for (i = 0; i < MAXMEMDOM; i++) {
+ mtx_init(&vm_reserv_domain_locks[i], "VM reserv domain", NULL,
+ MTX_DEF);
TAILQ_INIT(&vm_rvq_partpop[i]);
+ }
}
/*
@@ -1092,7 +1176,7 @@ vm_reserv_is_page_free(vm_page_t m)
rv = vm_reserv_from_page(m);
if (rv->object == NULL)
return (false);
- vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
+ vm_reserv_assert_locked(rv);
return (popmap_is_clear(rv->popmap, m - rv->pages));
}
@@ -1132,7 +1216,10 @@ static void
vm_reserv_reclaim(vm_reserv_t rv)
{
- vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
+ vm_reserv_assert_locked(rv);
+ CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
+ __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
+ vm_reserv_domain_lock(rv->domain);
KASSERT(rv->inpartpopq,
("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv));
KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
@@ -1140,8 +1227,9 @@ vm_reserv_reclaim(vm_reserv_t rv)
rv, rv->domain));
TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
rv->inpartpopq = FALSE;
+ vm_reserv_domain_unlock(rv->domain);
vm_reserv_break(rv);
- vm_reserv_reclaimed++;
+ counter_u64_add(vm_reserv_reclaimed, 1);
}
/*
@@ -1156,9 +1244,14 @@ vm_reserv_reclaim_inactive(int domain)
{
vm_reserv_t rv;
- vm_domain_free_assert_locked(VM_DOMAIN(domain));
- if ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) {
+ while ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) {
+ vm_reserv_lock(rv);
+ if (rv != TAILQ_FIRST(&vm_rvq_partpop[domain])) {
+ vm_reserv_unlock(rv);
+ continue;
+ }
vm_reserv_reclaim(rv);
+ vm_reserv_unlock(rv);
return (TRUE);
}
return (FALSE);
@@ -1177,14 +1270,16 @@ vm_reserv_reclaim_contig(int domain, u_long npages, vm
vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
{
vm_paddr_t pa, size;
- vm_reserv_t rv;
+ vm_reserv_t rv, rvn;
int hi, i, lo, low_index, next_free;
- vm_domain_free_assert_locked(VM_DOMAIN(domain));
if (npages > VM_LEVEL_0_NPAGES - 1)
return (FALSE);
size = npages << PAGE_SHIFT;
- TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) {
+ vm_reserv_domain_lock(domain);
+again:
+ for (rv = TAILQ_FIRST(&vm_rvq_partpop[domain]); rv != NULL; rv = rvn) {
+ rvn = TAILQ_NEXT(rv, partpopq);
pa = VM_PAGE_TO_PHYS(&rv->pages[VM_LEVEL_0_NPAGES - 1]);
if (pa + PAGE_SIZE - size < low) {
/* This entire reservation is too low; go to next. */
@@ -1195,6 +1290,17 @@ vm_reserv_reclaim_contig(int domain, u_long npages, vm
/* This entire reservation is too high; go to next. */
continue;
}
+ if (vm_reserv_trylock(rv) == 0) {
+ vm_reserv_domain_unlock(domain);
+ vm_reserv_lock(rv);
+ if (!rv->inpartpopq) {
+ vm_reserv_domain_lock(domain);
+ if (!rvn->inpartpopq)
+ goto again;
+ continue;
+ }
+ } else
+ vm_reserv_domain_unlock(domain);
if (pa < low) {
/* Start the search for free pages at "low". */
low_index = (low + PAGE_MASK - pa) >> PAGE_SHIFT;
@@ -1240,6 +1346,7 @@ vm_reserv_reclaim_contig(int domain, u_long npages, vm
if ((NBPOPMAP * i - next_free) * PAGE_SIZE >=
size) {
vm_reserv_reclaim(rv);
+ vm_reserv_unlock(rv);
return (TRUE);
}
hi = ffsl(rv->popmap[i]);
@@ -1250,10 +1357,16 @@ vm_reserv_reclaim_contig(int domain, u_long npages, vm
if ((NBPOPMAP * i + hi - next_free) * PAGE_SIZE >=
size) {
vm_reserv_reclaim(rv);
+ vm_reserv_unlock(rv);
return (TRUE);
}
} while (i < NPOPMAP);
+ vm_reserv_unlock(rv);
+ vm_reserv_domain_lock(domain);
+ if (!rvn->inpartpopq)
+ goto again;
}
+ vm_reserv_domain_unlock(domain);
return (FALSE);
}
@@ -1271,7 +1384,11 @@ vm_reserv_rename(vm_page_t m, vm_object_t new_object,
VM_OBJECT_ASSERT_WLOCKED(new_object);
rv = vm_reserv_from_page(m);
if (rv->object == old_object) {
- vm_domain_free_lock(VM_DOMAIN(rv->domain));
+ vm_reserv_lock(rv);
+ CTR6(KTR_VM,
+ "%s: rv %p object %p new %p popcnt %d inpartpop %d",
+ __FUNCTION__, rv, rv->object, new_object, rv->popcnt,
+ rv->inpartpopq);
if (rv->object == old_object) {
vm_reserv_object_lock(old_object);
rv->object = NULL;
@@ -1283,7 +1400,7 @@ vm_reserv_rename(vm_page_t m, vm_object_t new_object,
LIST_INSERT_HEAD(&new_object->rvq, rv, objq);
vm_reserv_object_unlock(new_object);
}
- vm_domain_free_unlock(VM_DOMAIN(rv->domain));
+ vm_reserv_unlock(rv);
}
}
@@ -1342,6 +1459,17 @@ vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end,
*/
return (new_end);
}
+
+static void
+vm_reserv_counter_startup(void)
+{
+
+ vm_reserv_freed = counter_u64_alloc(M_WAITOK);
+ vm_reserv_broken = counter_u64_alloc(M_WAITOK);
+ vm_reserv_reclaimed = counter_u64_alloc(M_WAITOK);
+}
+SYSINIT(vm_reserv_counters, SI_SUB_CPU, SI_ORDER_ANY,
+ vm_reserv_counter_startup, NULL);
/*
* Returns the superpage containing the given page.
Modified: user/jeff/numa/sys/vm/vm_reserv.h
==============================================================================
--- user/jeff/numa/sys/vm/vm_reserv.h Sat Feb 24 02:08:18 2018 (r329893)
+++ user/jeff/numa/sys/vm/vm_reserv.h Sat Feb 24 02:52:39 2018 (r329894)
@@ -47,14 +47,14 @@
/*
* The following functions are only to be used by the virtual memory system.
*/
-vm_page_t vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex,
+vm_page_t vm_reserv_alloc_contig(int req, vm_object_t object, vm_pindex_t pindex,
int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
u_long alignment, vm_paddr_t boundary, vm_page_t mpred);
vm_page_t vm_reserv_extend_contig(int req, vm_object_t object,
vm_pindex_t pindex, int domain, u_long npages,
vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_page_t mpred);
-vm_page_t vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex,
+vm_page_t vm_reserv_alloc_page(int req, vm_object_t object, vm_pindex_t pindex,
int domain, vm_page_t mpred);
vm_page_t vm_reserv_extend(int req, vm_object_t object,
vm_pindex_t pindex, int domain, vm_page_t mpred);
More information about the svn-src-user
mailing list