svn commit: r231027 - in user/attilio/vmcontention/sys: kern vm
Attilio Rao
attilio at FreeBSD.org
Sun Feb 5 17:37:27 UTC 2012
Author: attilio
Date: Sun Feb 5 17:37:26 2012
New Revision: 231027
URL: http://svn.freebsd.org/changeset/base/231027
Log:
Remove the panic from vm_radix_insert() and propagate the error to the
callers of vm_page_insert().
The default action for every caller is to unwind-back the operation
besides vm_page_rename() where this has proven to be impossible to do.
For that case, it just spins until the page is not available to be
allocated. However, due to vm_page_rename() to be mostly rare (and
having never hit this panic in the past) it is tought to be a very
seldom thing and not a possible performance factor.
The patch has been tested with an atomic counter returning NULL from
the zone allocator every 1/100000 allocations. Per-printf, I've verified
that a typical buildkernel could trigger this 30 times. The patch
survived to 2 hours of repeated buildkernel/world.
Several technical notes:
- The vm_page_insert() is moved, in several callers, closer to failure
points. This could be committed separately before vmcontention hits
the tree just to verify -CURRENT is happy with it.
- vm_page_rename() does not need to have the page lock in the callers
as it hide that as an implementation detail. Do the locking internally.
- now vm_page_insert() returns an int, with 0 meaning everything was ok,
thus KPI is broken by this patch.
Modified:
user/attilio/vmcontention/sys/kern/subr_uio.c
user/attilio/vmcontention/sys/vm/device_pager.c
user/attilio/vmcontention/sys/vm/sg_pager.c
user/attilio/vmcontention/sys/vm/vm_fault.c
user/attilio/vmcontention/sys/vm/vm_object.c
user/attilio/vmcontention/sys/vm/vm_page.c
user/attilio/vmcontention/sys/vm/vm_page.h
user/attilio/vmcontention/sys/vm/vm_radix.c
Modified: user/attilio/vmcontention/sys/kern/subr_uio.c
==============================================================================
--- user/attilio/vmcontention/sys/kern/subr_uio.c Sun Feb 5 16:54:26 2012 (r231026)
+++ user/attilio/vmcontention/sys/kern/subr_uio.c Sun Feb 5 17:37:26 2012 (r231027)
@@ -85,6 +85,7 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t k
vm_map_entry_t entry;
vm_pindex_t upindex;
vm_prot_t prot;
+ vm_page_bits_t vbits;
boolean_t wired;
KASSERT((uaddr & PAGE_MASK) == 0,
@@ -95,6 +96,7 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t k
* unwired in sf_buf_mext().
*/
kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr));
+ vbits = kern_pg->valid;
kern_pg->valid = VM_PAGE_BITS_ALL;
KASSERT(kern_pg->queue == PQ_NONE && kern_pg->wire_count == 1,
("vm_pgmoveco: kern_pg is not correctly wired"));
@@ -105,6 +107,13 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t k
return(EFAULT);
}
VM_OBJECT_LOCK(uobject);
+ if (vm_page_insert(kern_pg, uobject, upindex) != 0) {
+ kern_pg->valid = vbits;
+ VM_OBJECT_UNLOCK(uobject);
+ vm_map_lookup_done(map, entry);
+ return(ENOMEM);
+ }
+ vm_page_dirty(kern_pg);
retry:
if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) {
if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco"))
@@ -122,8 +131,6 @@ retry:
if (uobject->backing_object != NULL)
pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE);
}
- vm_page_insert(kern_pg, uobject, upindex);
- vm_page_dirty(kern_pg);
VM_OBJECT_UNLOCK(uobject);
vm_map_lookup_done(map, entry);
return(KERN_SUCCESS);
Modified: user/attilio/vmcontention/sys/vm/device_pager.c
==============================================================================
--- user/attilio/vmcontention/sys/vm/device_pager.c Sun Feb 5 16:54:26 2012 (r231026)
+++ user/attilio/vmcontention/sys/vm/device_pager.c Sun Feb 5 17:37:26 2012 (r231027)
@@ -307,11 +307,14 @@ old_dev_pager_fault(vm_object_t object,
*/
page = vm_page_getfake(paddr, memattr);
VM_OBJECT_LOCK(object);
+ if (vm_page_insert(page, object, offset) != 0) {
+ vm_page_putfake(page);
+ return (VM_PAGER_FAIL);
+ }
vm_page_lock(*mres);
vm_page_free(*mres);
vm_page_unlock(*mres);
*mres = page;
- vm_page_insert(page, object, pidx);
}
page->valid = VM_PAGE_BITS_ALL;
return (VM_PAGER_OK);
Modified: user/attilio/vmcontention/sys/vm/sg_pager.c
==============================================================================
--- user/attilio/vmcontention/sys/vm/sg_pager.c Sun Feb 5 16:54:26 2012 (r231026)
+++ user/attilio/vmcontention/sys/vm/sg_pager.c Sun Feb 5 17:37:26 2012 (r231027)
@@ -179,6 +179,10 @@ sg_pager_getpages(vm_object_t object, vm
/* Construct a new fake page. */
page = vm_page_getfake(paddr, memattr);
VM_OBJECT_LOCK(object);
+ if (vm_page_insert(page, object, offset) != 0) {
+ vm_page_putfake(page);
+ return (VM_PAGER_FAIL);
+ }
TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, pageq);
/* Free the original pages and insert this fake page into the object. */
@@ -187,7 +191,6 @@ sg_pager_getpages(vm_object_t object, vm
vm_page_free(m[i]);
vm_page_unlock(m[i]);
}
- vm_page_insert(page, object, offset);
m[reqpage] = page;
page->valid = VM_PAGE_BITS_ALL;
Modified: user/attilio/vmcontention/sys/vm/vm_fault.c
==============================================================================
--- user/attilio/vmcontention/sys/vm/vm_fault.c Sun Feb 5 16:54:26 2012 (r231026)
+++ user/attilio/vmcontention/sys/vm/vm_fault.c Sun Feb 5 17:37:26 2012 (r231027)
@@ -769,9 +769,7 @@ vnode_locked:
* process'es object. The page is
* automatically made dirty.
*/
- vm_page_lock(fs.m);
vm_page_rename(fs.m, fs.first_object, fs.first_pindex);
- vm_page_unlock(fs.m);
vm_page_busy(fs.m);
fs.first_m = fs.m;
fs.m = NULL;
Modified: user/attilio/vmcontention/sys/vm/vm_object.c
==============================================================================
--- user/attilio/vmcontention/sys/vm/vm_object.c Sun Feb 5 16:54:26 2012 (r231026)
+++ user/attilio/vmcontention/sys/vm/vm_object.c Sun Feb 5 17:37:26 2012 (r231027)
@@ -1397,9 +1397,7 @@ retry:
vm_reserv_rename(m, new_object, orig_object,
offidxstart);
#endif
- vm_page_lock(m);
vm_page_rename(m, new_object, idx);
- vm_page_unlock(m);
/*
* page automatically made dirty by rename and
* cache handled
@@ -1654,9 +1652,7 @@ restart:
* If the page was mapped to a process, it can remain
* mapped through the rename.
*/
- vm_page_lock(p);
vm_page_rename(p, object, new_pindex);
- vm_page_unlock(p);
/* page automatically made dirty by rename */
}
}
Modified: user/attilio/vmcontention/sys/vm/vm_page.c
==============================================================================
--- user/attilio/vmcontention/sys/vm/vm_page.c Sun Feb 5 16:54:26 2012 (r231026)
+++ user/attilio/vmcontention/sys/vm/vm_page.c Sun Feb 5 17:37:26 2012 (r231027)
@@ -109,6 +109,7 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <vm/uma_int.h>
+#include <machine/cpu.h>
#include <machine/md_var.h>
/*
@@ -777,13 +778,15 @@ vm_page_dirty(vm_page_t m)
* The object and page must be locked.
* This routine may not block.
*/
-void
+int
vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
{
vm_page_t neighbor;
+ vm_pindex_t cpindex;
VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
if (m->object != NULL)
panic("vm_page_insert: page already inserted");
+ cpindex = m->pindex;
/*
* Record the object/offset pair in this page
@@ -804,8 +807,13 @@ vm_page_insert(vm_page_t m, vm_object_t
} else
TAILQ_INSERT_TAIL(&object->memq, m, listq);
}
- if (vm_radix_insert(&object->rtree, pindex, m) != 0)
- panic("vm_page_insert: unable to insert the new page");
+
+ if (vm_radix_insert(&object->rtree, pindex, m) != 0) {
+ TAILQ_REMOVE(&object->memq, m, listq);
+ m->object = NULL;
+ m->pindex = cpindex;
+ return (ENOMEM);
+ }
/*
* show that the object has one more resident page.
@@ -823,6 +831,7 @@ vm_page_insert(vm_page_t m, vm_object_t
*/
if (m->aflags & PGA_WRITEABLE)
vm_object_set_writeable_dirty(object);
+ return (0);
}
/*
@@ -967,9 +976,20 @@ vm_page_prev(vm_page_t m)
void
vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
{
+ u_int i;
+
+ MPASS(m->object != NULL);
+ VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
+ VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED);
+ vm_page_lock(m);
vm_page_remove(m);
- vm_page_insert(m, new_object, new_pindex);
+ vm_page_unlock(m);
+ while (vm_page_insert(m, new_object, new_pindex) != 0) {
+ pagedaemon_wakeup();
+ for (i = 0; i < 10000000; i++)
+ cpu_spinwait();
+ }
vm_page_dirty(m);
}
@@ -1250,7 +1270,19 @@ vm_page_alloc(vm_object_t object, vm_pin
if (object->memattr != VM_MEMATTR_DEFAULT &&
object->type != OBJT_DEVICE && object->type != OBJT_SG)
pmap_page_set_memattr(m, object->memattr);
- vm_page_insert(m, object, pindex);
+ if (vm_page_insert(m, object, pindex) != 0) {
+
+ /* See the comment below about hold count handling. */
+ if (vp != NULL)
+ vdrop(vp);
+ vm_page_lock(m);
+ if (req & VM_ALLOC_WIRED)
+ vm_page_unwire(m, 0);
+ vm_page_free(m);
+ vm_page_unlock(m);
+ pagedaemon_wakeup();
+ return (NULL);
+ }
} else
m->pindex = pindex;
@@ -1317,6 +1349,7 @@ vm_page_alloc_contig(vm_object_t object,
{
struct vnode *drop;
vm_page_t deferred_vdrop_list, m, m_ret;
+ vm_pindex_t cpindex;
u_int flags, oflags;
int req_class;
@@ -1403,6 +1436,7 @@ retry:
memattr == VM_MEMATTR_DEFAULT)
memattr = object->memattr;
}
+ cpindex = pindex;
for (m = m_ret; m < &m_ret[npages]; m++) {
m->aflags = 0;
m->flags &= flags;
@@ -1412,12 +1446,30 @@ retry:
m->oflags = oflags;
if (memattr != VM_MEMATTR_DEFAULT)
pmap_page_set_memattr(m, memattr);
- if (object != NULL)
- vm_page_insert(m, object, pindex);
- else
- m->pindex = pindex;
pindex++;
}
+ for (m = m_ret; m < &m_ret[npages]; m++) {
+ if (object != NULL) {
+ if (vm_page_insert(m, object, cpindex) != 0) {
+ while (deferred_vdrop_list != NULL) {
+ vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
+ deferred_vdrop_list =
+ deferred_vdrop_list->pageq.tqe_next;
+ }
+ for (m = m_ret; m < &m_ret[npages]; m++) {
+ vm_page_lock(m);
+ if (req & VM_ALLOC_WIRED)
+ vm_page_unwire(m, 0);
+ vm_page_free(m);
+ vm_page_unlock(m);
+ }
+ pagedaemon_wakeup();
+ return (NULL);
+ }
+ } else
+ m->pindex = cpindex;
+ cpindex++;
+ }
while (deferred_vdrop_list != NULL) {
vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next;
@@ -2642,11 +2694,8 @@ vm_page_cowfault(vm_page_t m)
pindex = m->pindex;
retry_alloc:
- pmap_remove_all(m);
- vm_page_remove(m);
mnew = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY);
if (mnew == NULL) {
- vm_page_insert(m, object, pindex);
vm_page_unlock(m);
VM_OBJECT_UNLOCK(object);
VM_WAIT;
@@ -2672,8 +2721,9 @@ vm_page_cowfault(vm_page_t m)
vm_page_lock(mnew);
vm_page_free(mnew);
vm_page_unlock(mnew);
- vm_page_insert(m, object, pindex);
} else { /* clear COW & copy page */
+ pmap_remove_all(m);
+ vm_page_remove(m);
if (!so_zerocp_fullpage)
pmap_copy_page(m, mnew);
mnew->valid = VM_PAGE_BITS_ALL;
Modified: user/attilio/vmcontention/sys/vm/vm_page.h
==============================================================================
--- user/attilio/vmcontention/sys/vm/vm_page.h Sun Feb 5 16:54:26 2012 (r231026)
+++ user/attilio/vmcontention/sys/vm/vm_page.h Sun Feb 5 17:37:26 2012 (r231027)
@@ -386,7 +386,7 @@ void vm_page_dontneed(vm_page_t);
void vm_page_deactivate (vm_page_t);
vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);
vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr);
-void vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
+int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t);
vm_page_t vm_page_next(vm_page_t m);
int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *);
Modified: user/attilio/vmcontention/sys/vm/vm_radix.c
==============================================================================
--- user/attilio/vmcontention/sys/vm/vm_radix.c Sun Feb 5 16:54:26 2012 (r231026)
+++ user/attilio/vmcontention/sys/vm/vm_radix.c Sun Feb 5 17:37:26 2012 (r231027)
@@ -55,6 +55,7 @@
#include <sys/kdb.h>
CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE);
+CTASSERT((sizeof(u_int) * NBBY) >= VM_RADIX_LIMIT);
static uma_zone_t vm_radix_node_zone;
@@ -211,6 +212,24 @@ vm_radix_setroot(struct vm_radix *rtree,
rtree->rt_root = root;
}
+static inline void
+vm_radix_unwind_heightup(struct vm_radix *rtree, struct vm_radix_node *root,
+ struct vm_radix_node *iroot, int ilevel)
+{
+ struct vm_radix_node *rnode;
+
+ CTR4(KTR_VM, "unwind: tree %p, root %p, iroot %p, ilevel %d",
+ rtree, root, iroot, ilevel);
+ while (iroot != root && root != NULL) {
+ rnode = root;
+ MPASS(rnode->rn_count == 0 || rnode->rn_count == 1);
+ rnode->rn_count = 0;
+ root = rnode->rn_child[0];
+ vm_radix_node_put(rnode);
+ }
+ vm_radix_setroot(rtree, iroot, ilevel);
+}
+
static inline void *
vm_radix_match(void *child, int color)
{
@@ -262,10 +281,9 @@ vm_radix_reclaim_allnodes_internal(struc
int
vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val)
{
- struct vm_radix_node *rnode;
- struct vm_radix_node *root;
- int level;
- int slot;
+ struct vm_radix_node *iroot, *rnode, *root;
+ u_int allocmsk;
+ int clev, ilevel, level, slot;
CTR3(KTR_VM,
"insert: tree %p, index %ju, val %p", rtree, (uintmax_t)index, val);
@@ -276,6 +294,8 @@ vm_radix_insert(struct vm_radix *rtree,
* Increase the height by adding nodes at the root until
* there is sufficient space.
*/
+ ilevel = level;
+ iroot = root;
while (level == 0 || index > VM_RADIX_MAX(level)) {
CTR3(KTR_VM, "insert: expanding %ju > %ju height %d",
(uintmax_t)index, (uintmax_t)VM_RADIX_MAX(level), level);
@@ -292,6 +312,8 @@ vm_radix_insert(struct vm_radix *rtree,
CTR4(KTR_VM,
"insert: tree %p, root %p, index: %ju, level: %d ENOMEM",
rtree, root, (uintmax_t)index, level);
+ vm_radix_unwind_heightup(rtree, root, iroot,
+ ilevel);
return (ENOMEM);
}
/*
@@ -309,6 +331,8 @@ vm_radix_insert(struct vm_radix *rtree,
}
/* Now that the tree is tall enough, fill in the path to the index. */
+ allocmsk = 0;
+ clev = level;
rnode = root;
for (level = level - 1; level > 0; level--) {
slot = vm_radix_slot(index, level);
@@ -324,9 +348,35 @@ vm_radix_insert(struct vm_radix *rtree,
"insert: tree %p, rnode %p, child %p, count %u ENOMEM",
rtree, rnode, rnode->rn_child[slot],
rnode->rn_count);
+ MPASS(level != clev || allocmsk == 0);
+ while (allocmsk != 0) {
+ rnode = root;
+ level = clev;
+ level--;
+ CTR4(KTR_VM,
+ "insert: unwind root %p, level %d, slot %d, allocmsk: 0x%x",
+ root, level, slot, allocmsk);
+ slot = vm_radix_slot(index, level);
+ MPASS(level >= (ffs(allocmsk) - 1));
+ while (level > (ffs(allocmsk) - 1)) {
+ MPASS(level > 0);
+ slot = vm_radix_slot(index,
+ level);
+ rnode = rnode->rn_child[slot];
+ level--;
+ }
+ MPASS((allocmsk & (1 << level)) != 0);
+ allocmsk &= ~(1 << level);
+ rnode->rn_count--;
+ vm_radix_node_put(rnode->rn_child[slot]);
+ rnode->rn_child[slot] = NULL;
+ }
+ vm_radix_unwind_heightup(rtree, root, iroot,
+ ilevel);
return (ENOMEM);
}
rnode->rn_count++;
+ allocmsk |= (1 << level);
}
CTR5(KTR_VM,
"insert: tree %p, index %ju, level %d, slot %d, rnode %p",
More information about the svn-src-user
mailing list