svn commit: r328097 - in user/jeff/numa/sys: arm/arm fs/tmpfs kern powerpc/booke vm
Jeff Roberson
jeff at FreeBSD.org
Wed Jan 17 22:11:01 UTC 2018
Author: jeff
Date: Wed Jan 17 22:10:58 2018
New Revision: 328097
URL: https://svnweb.freebsd.org/changeset/base/328097
Log:
Fix a reservation locking bug spotted by markj.
Make the reservation object lock an array of locks.
Fix the paging and laundry targets.
Make laundry per-domain.
Fix a compile error on powerpc and arm.
Modified:
user/jeff/numa/sys/arm/arm/pmap-v4.c
user/jeff/numa/sys/fs/tmpfs/tmpfs_subr.c
user/jeff/numa/sys/kern/subr_witness.c
user/jeff/numa/sys/powerpc/booke/pmap.c
user/jeff/numa/sys/vm/vm_meter.c
user/jeff/numa/sys/vm/vm_page.c
user/jeff/numa/sys/vm/vm_pageout.c
user/jeff/numa/sys/vm/vm_pagequeue.h
user/jeff/numa/sys/vm/vm_reserv.c
Modified: user/jeff/numa/sys/arm/arm/pmap-v4.c
==============================================================================
--- user/jeff/numa/sys/arm/arm/pmap-v4.c Wed Jan 17 21:52:12 2018 (r328096)
+++ user/jeff/numa/sys/arm/arm/pmap-v4.c Wed Jan 17 22:10:58 2018 (r328097)
@@ -3817,7 +3817,7 @@ pmap_get_pv_entry(void)
pv_entry_count++;
if (pv_entry_count > pv_entry_high_water)
- pagedaemon_wakeup();
+ pagedaemon_wakeup(0); /* XXX ARM NUMA */
ret_value = uma_zalloc(pvzone, M_NOWAIT);
return ret_value;
}
Modified: user/jeff/numa/sys/fs/tmpfs/tmpfs_subr.c
==============================================================================
--- user/jeff/numa/sys/fs/tmpfs/tmpfs_subr.c Wed Jan 17 21:52:12 2018 (r328096)
+++ user/jeff/numa/sys/fs/tmpfs/tmpfs_subr.c Wed Jan 17 22:10:58 2018 (r328097)
@@ -106,6 +106,7 @@ tmpfs_mem_avail(void)
{
vm_ooffset_t avail;
+ /* XXX */
avail = swap_pager_avail + vm_free_count() - tmpfs_pages_reserved;
if (__predict_false(avail < 0))
avail = 0;
Modified: user/jeff/numa/sys/kern/subr_witness.c
==============================================================================
--- user/jeff/numa/sys/kern/subr_witness.c Wed Jan 17 21:52:12 2018 (r328096)
+++ user/jeff/numa/sys/kern/subr_witness.c Wed Jan 17 22:10:58 2018 (r328097)
@@ -139,7 +139,7 @@ __FBSDID("$FreeBSD$");
#define WITNESS_COUNT 1536
#endif
#define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */
-#define WITNESS_PENDLIST (2048 + MAXCPU)
+#define WITNESS_PENDLIST (2048 + (MAXCPU * 4))
/* Allocate 256 KB of stack data space */
#define WITNESS_LO_DATA_COUNT 2048
Modified: user/jeff/numa/sys/powerpc/booke/pmap.c
==============================================================================
--- user/jeff/numa/sys/powerpc/booke/pmap.c Wed Jan 17 21:52:12 2018 (r328096)
+++ user/jeff/numa/sys/powerpc/booke/pmap.c Wed Jan 17 22:10:58 2018 (r328097)
@@ -1183,7 +1183,7 @@ pv_alloc(void)
pv_entry_count++;
if (pv_entry_count > pv_entry_high_water)
- pagedaemon_wakeup();
+ pagedaemon_wakeup(0); /* XXX powerpc NUMA */
pv = uma_zalloc(pvzone, M_NOWAIT);
return (pv);
Modified: user/jeff/numa/sys/vm/vm_meter.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_meter.c Wed Jan 17 21:52:12 2018 (r328096)
+++ user/jeff/numa/sys/vm/vm_meter.c Wed Jan 17 22:10:58 2018 (r328097)
@@ -470,7 +470,13 @@ vm_inactive_count(void)
u_int
vm_laundry_count(void)
{
+ u_int v;
+ int i;
- return (vm_dom[0].vmd_pagequeues[PQ_LAUNDRY].pq_cnt);
+ v = 0;
+ for (i = 0; i < vm_ndomains; i++)
+ v += vm_dom[i].vmd_pagequeues[PQ_LAUNDRY].pq_cnt;
+
+ return (v);
}
Modified: user/jeff/numa/sys/vm/vm_page.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_page.c Wed Jan 17 21:52:12 2018 (r328096)
+++ user/jeff/numa/sys/vm/vm_page.c Wed Jan 17 22:10:58 2018 (r328097)
@@ -484,7 +484,7 @@ vm_page_startup(vm_offset_t vaddr)
for (i = 0; i < PA_LOCK_COUNT; i++)
mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF);
for (i = 0; i < vm_ndomains; i++)
- vm_page_domain_init(&vm_dom[i]);
+ vm_page_domain_init(VM_DOMAIN(i));
/*
* Almost all of the pages needed for bootstrapping UMA are used
@@ -709,7 +709,7 @@ vm_page_startup(vm_offset_t vaddr)
vm_pagequeue_free_unlock(seg->domain);
vm_cnt.v_page_count += (u_int)pagecount;
- vmd = &vm_dom[seg->domain];
+ vmd = VM_DOMAIN(seg->domain);;
vmd->vmd_page_count += (u_int)pagecount;
vmd->vmd_segs |= 1UL << m->segind;
break;
@@ -1644,7 +1644,7 @@ vm_page_available(int domain, int req, int npages)
struct vm_domain *vmd;
vm_pagequeue_free_assert_locked(domain);
- vmd = &vm_dom[domain];
+ vmd = VM_DOMAIN(domain);
req = req & VM_ALLOC_CLASS_MASK;
/*
@@ -1745,7 +1745,7 @@ again:
* Don't wakeup too often - wakeup the pageout daemon when
* we would be nearly out of memory.
*/
- if (vm_paging_needed(domain, free_count))
+ if (vm_paging_needed(VM_DOMAIN(domain), free_count))
pagedaemon_wakeup(domain);
#if VM_NRESERVLEVEL > 0
found:
@@ -1874,6 +1874,7 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pin
int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_memattr_t memattr)
{
+ struct vm_domain *vmd;
vm_page_t m, m_ret, mpred;
u_int busy_lock, flags, oflags;
#if VM_NRESERVLEVEL > 0
@@ -2016,7 +2017,8 @@ found:
pmap_page_set_memattr(m, memattr);
pindex++;
}
- if (vm_paging_needed(domain, vm_dom[domain].vmd_free_count))
+ vmd = VM_DOMAIN(domain);
+ if (vm_paging_needed(vmd, vmd->vmd_free_count))
pagedaemon_wakeup(domain);
return (m_ret);
}
@@ -2117,7 +2119,7 @@ again:
}
/* Unmanaged pages don't use "act_count". */
m->oflags = VPO_UNMANAGED;
- if (vm_paging_needed(domain, free_count))
+ if (vm_paging_needed(VM_DOMAIN(domain), free_count))
pagedaemon_wakeup(domain);
return (m);
}
@@ -2586,7 +2588,7 @@ vm_page_reclaim_contig_domain(int domain, int req, u_l
* Return if the number of free pages cannot satisfy the requested
* allocation.
*/
- vmd = &vm_dom[domain];
+ vmd = VM_DOMAIN(domain);
count = vmd->vmd_free_count;
if (count < npages + vmd->vmd_free_reserved || (count < npages +
vmd->vmd_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) ||
@@ -2679,7 +2681,7 @@ vm_wait_domain(int domain)
struct vm_domain *vmd;
vm_pagequeue_free_assert_locked(domain);
- vmd = &vm_dom[domain];
+ vmd = VM_DOMAIN(domain);
if (curproc == pageproc) {
vmd->vmd_pageout_pages_needed = 1;
@@ -2720,7 +2722,7 @@ vm_page_alloc_fail(vm_object_t object, int domain, int
vm_pagequeue_free_assert_locked(domain);
- vmd = &vm_dom[domain];
+ vmd = VM_DOMAIN(domain);
atomic_add_int(&vmd->vmd_pageout_deficit,
max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1));
if (req & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) {
@@ -2763,10 +2765,7 @@ struct vm_pagequeue *
vm_page_pagequeue(vm_page_t m)
{
- if (vm_page_in_laundry(m))
- return (&vm_dom[0].vmd_pagequeues[m->queue]);
- else
- return (&vm_pagequeue_domain(m)->vmd_pagequeues[m->queue]);
+ return (&vm_pagequeue_domain(m)->vmd_pagequeues[m->queue]);
}
/*
@@ -2828,10 +2827,7 @@ vm_page_enqueue(uint8_t queue, vm_page_t m)
KASSERT(queue < PQ_COUNT,
("vm_page_enqueue: invalid queue %u request for page %p",
queue, m));
- if (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE)
- pq = &vm_dom[0].vmd_pagequeues[queue];
- else
- pq = &vm_pagequeue_domain(m)->vmd_pagequeues[queue];
+ pq = &vm_pagequeue_domain(m)->vmd_pagequeues[queue];
vm_pagequeue_lock(pq);
m->queue = queue;
TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
@@ -2926,7 +2922,7 @@ vm_page_free_wakeup(int domain)
struct vm_domain *vmd;
vm_pagequeue_free_assert_locked(domain);
- vmd = &vm_dom[domain];
+ vmd = VM_DOMAIN(domain);
/*
* if pageout daemon needs pages, then tell it that there are
@@ -2942,7 +2938,7 @@ vm_page_free_wakeup(int domain)
* high water mark. And wakeup scheduler process if we have
* lots of memory. this process will swapin processes.
*/
- if (vmd->vmd_pages_needed && !vm_page_count_min()) {
+ if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) {
vmd->vmd_pages_needed = false;
wakeup(&vmd->vmd_free_count);
}
Modified: user/jeff/numa/sys/vm/vm_pageout.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_pageout.c Wed Jan 17 21:52:12 2018 (r328096)
+++ user/jeff/numa/sys/vm/vm_pageout.c Wed Jan 17 22:10:58 2018 (r328097)
@@ -150,14 +150,6 @@ SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan);
static int vm_pageout_oom_seq = 12;
-/* Pending request for dirty page laundering. */
-static enum {
- VM_LAUNDRY_IDLE,
- VM_LAUNDRY_BACKGROUND,
- VM_LAUNDRY_SHORTFALL
-} vm_laundry_request = VM_LAUNDRY_IDLE;
-static int vm_inactq_scans;
-
static int vm_pageout_update_period;
static int disable_swap_pageouts;
static int lowmem_period = 10;
@@ -958,10 +950,9 @@ vm_pageout_laundry_worker(void *arg)
u_int inactq_scans, last_launder;
int domain, last_target, launder, shortfall, shortfall_cycle, target;
bool in_shortfall;
- int i;
domain = (uintptr_t)arg;
- vmd = &vm_dom[domain];
+ vmd = VM_DOMAIN(domain);
pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
KASSERT(vmd->vmd_segs != 0, ("domain without segments"));
vm_pageout_init_marker(&vmd->vmd_laundry_marker, PQ_LAUNDRY);
@@ -1000,7 +991,7 @@ vm_pageout_laundry_worker(void *arg)
target = shortfall;
} else if (!in_shortfall)
goto trybackground;
- else if (shortfall_cycle == 0 || vm_laundry_target() <= 0) {
+ else if (shortfall_cycle == 0 || vm_laundry_target(vmd) <= 0) {
/*
* We recently entered shortfall and began laundering
* pages. If we have completed that laundering run
@@ -1034,12 +1025,9 @@ vm_pageout_laundry_worker(void *arg)
* memory pressure required to trigger laundering decreases.
*/
trybackground:
- nclean = 0;
- for (i = 0; i < vm_ndomains; i++) {
- nclean += vm_dom[i].vmd_free_count;
- nclean += vm_dom[i].vmd_pagequeues[PQ_INACTIVE].pq_cnt;
- }
- ndirty = vm_laundry_count();
+ nclean = vmd->vmd_free_count +
+ vmd->vmd_pagequeues[PQ_INACTIVE].pq_cnt;
+ ndirty = vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt;
if (target == 0 && inactq_scans != last_launder &&
ndirty * isqrt(inactq_scans - last_launder) >= nclean) {
target = vm_background_launder_target;
@@ -1085,8 +1073,8 @@ dolaundry:
* kicks us.
*/
vm_pagequeue_lock(pq);
- if (target == 0 && vm_laundry_request == VM_LAUNDRY_IDLE)
- (void)mtx_sleep(&vm_laundry_request,
+ if (target == 0 && vmd->vmd_laundry_request == VM_LAUNDRY_IDLE)
+ (void)mtx_sleep(&vmd->vmd_laundry_request,
vm_pagequeue_lockptr(pq), PVM, "launds", 0);
/*
@@ -1094,17 +1082,17 @@ dolaundry:
* a shortfall laundering unless we're already in the middle of
* one. This may preempt a background laundering.
*/
- if (vm_laundry_request == VM_LAUNDRY_SHORTFALL &&
+ if (vmd->vmd_laundry_request == VM_LAUNDRY_SHORTFALL &&
(!in_shortfall || shortfall_cycle == 0)) {
- shortfall = vm_laundry_target() +
+ shortfall = vm_laundry_target(vmd) +
vmd->vmd_pageout_deficit;
target = 0;
} else
shortfall = 0;
if (target == 0)
- vm_laundry_request = VM_LAUNDRY_IDLE;
- inactq_scans = vm_inactq_scans;
+ vmd->vmd_laundry_request = VM_LAUNDRY_IDLE;
+ inactq_scans = vmd->vmd_inactq_scans;
vm_pagequeue_unlock(pq);
}
}
@@ -1133,7 +1121,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
* If we need to reclaim memory ask kernel caches to return
* some. We rate limit to avoid thrashing.
*/
- if (vmd == &vm_dom[0] && pass > 0 &&
+ if (vmd == VM_DOMAIN(0) && pass > 0 &&
(time_uptime - lowmem_uptime) >= lowmem_period) {
/*
* Decrease registered cache sizes.
@@ -1356,18 +1344,20 @@ drop_page:
* keep count.
*/
if (starting_page_shortage > 0) {
- pq = &vm_dom[0].vmd_pagequeues[PQ_LAUNDRY];
+ pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
vm_pagequeue_lock(pq);
- if (vm_laundry_request == VM_LAUNDRY_IDLE &&
+ if (vmd->vmd_laundry_request == VM_LAUNDRY_IDLE &&
(pq->pq_cnt > 0 || atomic_load_acq_int(&swapdev_enabled))) {
if (page_shortage > 0) {
- vm_laundry_request = VM_LAUNDRY_SHORTFALL;
+ vmd->vmd_laundry_request = VM_LAUNDRY_SHORTFALL;
VM_CNT_INC(v_pdshortfalls);
- } else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL)
- vm_laundry_request = VM_LAUNDRY_BACKGROUND;
- wakeup(&vm_laundry_request);
+ } else if (vmd->vmd_laundry_request !=
+ VM_LAUNDRY_SHORTFALL)
+ vmd->vmd_laundry_request =
+ VM_LAUNDRY_BACKGROUND;
+ wakeup(&vmd->vmd_laundry_request);
}
- vm_inactq_scans++;
+ vmd->vmd_inactq_scans++;
vm_pagequeue_unlock(pq);
}
@@ -1397,7 +1387,7 @@ drop_page:
* ensuring that they can eventually be reused.
*/
inactq_shortage = vmd->vmd_inactive_target - (pq->pq_cnt +
- /* XXX */vm_laundry_count() / act_scan_laundry_weight) +
+ vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt / act_scan_laundry_weight) +
vm_paging_target(vmd) + deficit + addl_page_shortage;
inactq_shortage *= act_scan_laundry_weight;
@@ -1751,7 +1741,7 @@ vm_pageout_oom(int shortage)
_PRELE(bigproc);
PROC_UNLOCK(bigproc);
for (i = 0; i < vm_ndomains; i++)
- wakeup(&vm_dom[i].vmd_free_count);
+ wakeup(&VM_DOMAIN(i)->vmd_free_count);
}
}
@@ -1763,7 +1753,7 @@ vm_pageout_worker(void *arg)
bool target_met;
domain = (uintptr_t)arg;
- vmd = &vm_dom[domain];
+ vmd = VM_DOMAIN(domain);
pass = 0;
target_met = true;
@@ -1798,7 +1788,7 @@ vm_pageout_worker(void *arg)
* thread will, nonetheless, wait until another page is freed
* or this wakeup is performed.
*/
- if (vmd->vmd_pages_needed && !vm_page_count_min() /* XXX */) {
+ if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) {
vmd->vmd_pages_needed = false;
wakeup(&vmd->vmd_free_count);
}
@@ -1861,7 +1851,7 @@ vm_pageout_init_domain(int domain)
{
struct vm_domain *vmd;
- vmd = &vm_dom[domain];
+ vmd = VM_DOMAIN(domain);
vmd->vmd_interrupt_free_min = 2;
/*
@@ -1909,7 +1899,7 @@ vm_pageout_init(void)
struct vm_domain *vmd;
vm_pageout_init_domain(i);
- vmd = &vm_dom[i];
+ vmd = VM_DOMAIN(i);
vm_cnt.v_free_reserved += vmd->vmd_free_reserved;
vm_cnt.v_free_target += vmd->vmd_free_target;
vm_cnt.v_free_min += vmd->vmd_free_min;
@@ -1961,6 +1951,12 @@ vm_pageout(void)
panic("starting pageout for domain %d, error %d\n",
i, error);
}
+ error = kthread_add(vm_pageout_laundry_worker,
+ (void *)(uintptr_t)i, curproc, NULL, 0, 0,
+ "laundry: dom%d", i);
+ if (error != 0)
+ panic("starting laundry for domain %d, error %d",
+ i, error);
}
error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL,
0, 0, "uma");
@@ -1978,7 +1974,7 @@ pagedaemon_wakeup(int domain)
struct vm_domain *vmd;
vm_pagequeue_free_assert_unlocked(domain);
- vmd = &vm_dom[domain];
+ vmd = VM_DOMAIN(domain);
if (!vmd->vmd_pageout_wanted && curthread->td_proc != pageproc) {
vmd->vmd_pageout_wanted = true;
@@ -1997,7 +1993,7 @@ pagedaemon_wait(int domain, int pri, const char *wmesg
struct vm_domain *vmd;
vm_pagequeue_free_assert_locked(domain);
- vmd = &vm_dom[domain];
+ vmd = VM_DOMAIN(domain);
/*
* vmd_pageout_wanted may have been set by an advisory wakeup, but if
Modified: user/jeff/numa/sys/vm/vm_pagequeue.h
==============================================================================
--- user/jeff/numa/sys/vm/vm_pagequeue.h Wed Jan 17 21:52:12 2018 (r328096)
+++ user/jeff/numa/sys/vm/vm_pagequeue.h Wed Jan 17 22:10:58 2018 (r328097)
@@ -92,9 +92,13 @@ struct vm_domain {
int vmd_pageout_deficit; /* Estimated number of pages deficit */
bool vmd_pages_needed; /* Are threads waiting for free pages? */
bool vmd_pageout_wanted; /* pageout daemon wait channel */
+ int vmd_inactq_scans;
+ enum {
+ VM_LAUNDRY_IDLE = 0,
+ VM_LAUNDRY_BACKGROUND,
+ VM_LAUNDRY_SHORTFALL
+ } vmd_laundry_request;
-
-
u_int vmd_free_reserved; /* (c) pages reserved for deadlock */
u_int vmd_free_target; /* (c) pages desired free */
u_int vmd_free_min; /* (c) pages desired free */
@@ -107,6 +111,8 @@ struct vm_domain {
extern struct vm_domain vm_dom[MAXMEMDOM];
+#define VM_DOMAIN(n) (&vm_dom[(n)])
+
#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED)
#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex)
#define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex)
@@ -119,7 +125,7 @@ extern struct vm_domain vm_dom[MAXMEMDOM];
#define vm_pagequeue_free_lock(n) \
mtx_lock(vm_pagequeue_free_lockptr((n)))
#define vm_pagequeue_free_lockptr(n) \
- (&vm_dom[(n)].vmd_pagequeue_free_mtx)
+ (&VM_DOMAIN((n))->vmd_pagequeue_free_mtx)
#define vm_pagequeue_free_unlock(n) \
mtx_unlock(vm_pagequeue_free_lockptr((n)))
@@ -152,7 +158,7 @@ static inline struct vm_domain *
vm_pagequeue_domain(vm_page_t m)
{
- return (&vm_dom[vm_phys_domain(m)]);
+ return (VM_DOMAIN(vm_phys_domain(m)));
}
/*
@@ -170,22 +176,28 @@ vm_paging_target(struct vm_domain *vmd)
* Returns TRUE if the pagedaemon needs to be woken up.
*/
static inline int
-vm_paging_needed(int domain, u_int free_count)
+vm_paging_needed(struct vm_domain *vmd, u_int free_count)
{
- return (free_count < vm_dom[domain].vmd_pageout_wakeup_thresh);
+ return (free_count < vmd->vmd_pageout_wakeup_thresh);
}
+static inline int
+vm_paging_min(struct vm_domain *vmd)
+{
+
+ return (vmd->vmd_free_min > vmd->vmd_free_count);
+}
+
/*
* Return the number of pages we need to launder.
* A positive number indicates that we have a shortfall of clean pages.
*/
static inline int
-vm_laundry_target(void)
+vm_laundry_target(struct vm_domain *vmd)
{
- return (0);
- /* XXX return (vm_paging_target()); */
+ return (vm_paging_target(vmd));
}
#endif /* _KERNEL */
Modified: user/jeff/numa/sys/vm/vm_reserv.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_reserv.c Wed Jan 17 21:52:12 2018 (r328096)
+++ user/jeff/numa/sys/vm/vm_reserv.c Wed Jan 17 22:10:58 2018 (r328097)
@@ -166,19 +166,20 @@ popmap_is_set(popmap_t popmap[], int i)
*
* A partially populated reservation can be broken and reclaimed at any time.
*
- * The reservation structure is synchronized by the per-domain pagequeue_free
- * lock. The objq is synchronized by the vm_reserv_object lock.
+ * f - vm_pagequeue_free_lock
+ * o - vm_reserv_object_lock
+ * c - constant after boot
*/
struct vm_reserv {
- TAILQ_ENTRY(vm_reserv) partpopq;
- LIST_ENTRY(vm_reserv) objq;
- vm_object_t object; /* containing object */
- vm_pindex_t pindex; /* offset within object */
- vm_page_t pages; /* first page of a superpage */
- int domain; /* NUMA domain, constant. */
- int popcnt; /* # of pages in use */
- char inpartpopq;
- popmap_t popmap[NPOPMAP]; /* bit vector of used pages */
+ TAILQ_ENTRY(vm_reserv) partpopq; /* (f) per-domain queue. */
+ LIST_ENTRY(vm_reserv) objq; /* (o, f) object queue */
+ vm_object_t object; /* (o, f) containing object */
+ vm_pindex_t pindex; /* (o, f) offset in object */
+ vm_page_t pages; /* (c) first page */
+ int domain; /* (c) NUMA domain. */
+ int popcnt; /* (f) # of pages in use */
+ char inpartpopq; /* (f) */
+ popmap_t popmap[NPOPMAP]; /* (f) bit vector, used pages */
};
/*
@@ -239,8 +240,25 @@ static long vm_reserv_reclaimed;
SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
&vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations");
-static struct mtx vm_reserv_object_mtx;
+/*
+ * The object lock pool is used to synchronize the rvq. We can not use a
+ * pool mutex because it is required before malloc works.
+ *
+ * The "hash" function could be made faster without divide and modulo.
+ */
+#define VM_RESERV_OBJ_LOCK_COUNT MAXCPU
+struct mtx_padalign vm_reserv_object_mtx[VM_RESERV_OBJ_LOCK_COUNT];
+
+#define vm_reserv_object_lock_idx(object) \
+ (((uintptr_t)object / sizeof(*object)) % VM_RESERV_OBJ_LOCK_COUNT)
+#define vm_reserv_object_lock_ptr(object) \
+ &vm_reserv_object_mtx[vm_reserv_object_lock_idx((object))]
+#define vm_reserv_object_lock(object) \
+ mtx_lock(vm_reserv_object_lock_ptr((object)))
+#define vm_reserv_object_unlock(object) \
+ mtx_unlock(vm_reserv_object_lock_ptr((object)))
+
static void vm_reserv_break(vm_reserv_t rv, vm_page_t m);
static void vm_reserv_depopulate(vm_reserv_t rv, int index);
static vm_reserv_t vm_reserv_from_page(vm_page_t m);
@@ -311,9 +329,6 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
return (error);
}
-#define vm_reserv_object_lock(object) mtx_lock(&vm_reserv_object_mtx)
-#define vm_reserv_object_unlock(object) mtx_unlock(&vm_reserv_object_mtx)
-
/*
* Remove a reservation from the object's objq.
*/
@@ -350,8 +365,8 @@ vm_reserv_insert(vm_reserv_t rv, vm_object_t object, v
for (i = 0; i < NPOPMAP; i++)
KASSERT(rv->popmap[i] == 0,
("vm_reserv_insert: reserv %p's popmap is corrupted", rv));
- rv->pindex = pindex;
vm_reserv_object_lock(object);
+ rv->pindex = pindex;
rv->object = object;
LIST_INSERT_HEAD(&object->rvq, rv, objq);
vm_reserv_object_unlock(object);
@@ -655,29 +670,36 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t
* Could at least one reservation fit between the first index to the
* left that can be used ("leftcap") and the first index to the right
* that cannot be used ("rightcap")?
+ *
+ * We must synchronize with the reserv object lock to protect the
+ * pindex/object of the resulting reservations against rename while
+ * we are inspecting.
*/
first = pindex - VM_RESERV_INDEX(object, pindex);
+ minpages = VM_RESERV_INDEX(object, pindex) + npages;
+ maxpages = roundup2(minpages, VM_LEVEL_0_NPAGES);
+ allocpages = maxpages;
+ vm_reserv_object_lock(object);
if (mpred != NULL) {
- /* XXX unlocked rv access */
if ((rv = vm_reserv_from_page(mpred))->object != object)
leftcap = mpred->pindex + 1;
else
leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
- if (leftcap > first)
+ if (leftcap > first) {
+ vm_reserv_object_unlock(object);
return (NULL);
+ }
}
- minpages = VM_RESERV_INDEX(object, pindex) + npages;
- maxpages = roundup2(minpages, VM_LEVEL_0_NPAGES);
- allocpages = maxpages;
if (msucc != NULL) {
- /* XXX unlocked rv access */
if ((rv = vm_reserv_from_page(msucc))->object != object)
rightcap = msucc->pindex;
else
rightcap = rv->pindex;
if (first + maxpages > rightcap) {
- if (maxpages == VM_LEVEL_0_NPAGES)
+ if (maxpages == VM_LEVEL_0_NPAGES) {
+ vm_reserv_object_unlock(object);
return (NULL);
+ }
/*
* At least one reservation will fit between "leftcap"
@@ -688,6 +710,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t
allocpages = minpages;
}
}
+ vm_reserv_object_unlock(object);
/*
* Would the last new reservation extend past the end of the object?
@@ -800,7 +823,7 @@ vm_reserv_extend(int req, vm_object_t object, vm_pinde
free_count = vm_pagequeue_freecnt_adj(domain, -1);
vm_pagequeue_free_unlock(domain);
- if (vm_paging_needed(domain, free_count))
+ if (vm_paging_needed(VM_DOMAIN(domain), free_count))
pagedaemon_wakeup(domain);
return (m);
@@ -845,26 +868,34 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t p
/*
* Could a reservation fit between the first index to the left that
* can be used and the first index to the right that cannot be used?
+ *
+ * We must synchronize with the reserv object lock to protect the
+ * pindex/object of the resulting reservations against rename while
+ * we are inspecting.
*/
first = pindex - VM_RESERV_INDEX(object, pindex);
+ vm_reserv_object_lock(object);
if (mpred != NULL) {
- /* XXX unlocked rv access */
if ((rv = vm_reserv_from_page(mpred))->object != object)
leftcap = mpred->pindex + 1;
else
leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
- if (leftcap > first)
+ if (leftcap > first) {
+ vm_reserv_object_unlock(object);
return (NULL);
+ }
}
if (msucc != NULL) {
- /* XXX unlocked rv access */
if ((rv = vm_reserv_from_page(msucc))->object != object)
rightcap = msucc->pindex;
else
rightcap = rv->pindex;
- if (first + VM_LEVEL_0_NPAGES > rightcap)
+ if (first + VM_LEVEL_0_NPAGES > rightcap) {
+ vm_reserv_object_unlock(object);
return (NULL);
+ }
}
+ vm_reserv_object_unlock(object);
/*
* Would a new reservation extend past the end of the object?
@@ -1250,18 +1281,15 @@ vm_reserv_rename(vm_page_t m, vm_object_t new_object,
if (rv->object == old_object) {
vm_pagequeue_free_lock(rv->domain);
if (rv->object == old_object) {
- /*
- * XXX Do we need to synchronize them simultaneously?
- * or does the pagequeue_free lock protect enough?
- */
vm_reserv_object_lock(old_object);
+ rv->object = NULL;
LIST_REMOVE(rv, objq);
vm_reserv_object_unlock(old_object);
vm_reserv_object_lock(new_object);
rv->object = new_object;
+ rv->pindex -= old_object_offset;
LIST_INSERT_HEAD(&new_object->rvq, rv, objq);
vm_reserv_object_unlock(new_object);
- rv->pindex -= old_object_offset;
}
vm_pagequeue_free_unlock(rv->domain);
}
@@ -1293,6 +1321,7 @@ vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end,
{
vm_paddr_t new_end;
size_t size;
+ int i;
/*
* Calculate the size (in bytes) of the reservation array. Round up
@@ -1312,7 +1341,9 @@ vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end,
VM_PROT_READ | VM_PROT_WRITE);
bzero(vm_reserv_array, size);
- mtx_init(&vm_reserv_object_mtx, "resv obj lock", NULL, MTX_DEF);
+ for (i = 0; i < VM_RESERV_OBJ_LOCK_COUNT; i++)
+ mtx_init(&vm_reserv_object_mtx[i], "resv obj lock", NULL,
+ MTX_DEF);
/*
* Return the next available physical address.
More information about the svn-src-user
mailing list