git: fdb1dbb1cc06 - main - vm: read-locked fault handling for backing objects
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 11 Mar 2023 11:08:43 UTC
The branch main has been updated by mjg: URL: https://cgit.FreeBSD.org/src/commit/?id=fdb1dbb1cc0608edd54451050fa56b84a303c8a6 commit fdb1dbb1cc0608edd54451050fa56b84a303c8a6 Author: Mateusz Guzik <mjg@FreeBSD.org> AuthorDate: 2023-03-07 20:56:54 +0000 Commit: Mateusz Guzik <mjg@FreeBSD.org> CommitDate: 2023-03-11 11:08:21 +0000 vm: read-locked fault handling for backing objects This is almost the simplest patch which manages to avoid write locking for backing objects, as a result mostly fixing vm object contention problems. What is not fixed: 1. cacheline ping pong due to read-locks 2. cacheline ping pong due to pip 3. cacheling ping pong due to object busying 4. write locking on first object On top of it the use of VM_OBJECT_UNLOCK instead of explicitly tracking the state is slower multithreaded that it needs to be, done for simplicity for the time being. Sample lock profiling results doing -j 104 buildkernel on tmpfs: before: 71446200 (rw:vmobject) 14689706 (sx:vm map (user)) 4166251 (rw:pmap pv list) 2799924 (spin mutex:turnstile chain) after: 19940411 (rw:vmobject) 8166012 (rw:pmap pv list) 6017608 (sx:vm map (user)) 1151416 (sleep mutex:pipe mutex) Reviewed by: kib Reviewed by: markj Tested by: pho Differential Revision: https://reviews.freebsd.org/D38964 --- sys/vm/vm_fault.c | 81 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 67 insertions(+), 14 deletions(-) diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 2afe5a19d2d7..5df667052615 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -132,6 +132,7 @@ struct faultstate { struct timeval oom_start_time; bool oom_started; int nera; + bool can_read_lock; /* Page reference for cow. */ vm_page_t m_cow; @@ -170,6 +171,12 @@ enum fault_status { FAULT_PROTECTION_FAILURE, /* Invalid access. */ }; +enum fault_next_status { + FAULT_NEXT_GOTOBJ = 1, + FAULT_NEXT_NOOBJ, + FAULT_NEXT_RESTART, +}; + static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr, int ahead); static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, @@ -278,7 +285,7 @@ static void unlock_and_deallocate(struct faultstate *fs) { - VM_OBJECT_WUNLOCK(fs->object); + VM_OBJECT_UNLOCK(fs->object); fault_deallocate(fs); } @@ -736,6 +743,26 @@ vm_fault_trap(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, return (result); } +static bool +vm_fault_object_ensure_wlocked(struct faultstate *fs) +{ + if (fs->object == fs->first_object) + VM_OBJECT_ASSERT_WLOCKED(fs->object); + + if (!fs->can_read_lock) { + VM_OBJECT_ASSERT_WLOCKED(fs->object); + return (true); + } + + if (VM_OBJECT_WOWNED(fs->object)) + return (true); + + if (VM_OBJECT_TRYUPGRADE(fs->object)) + return (true); + + return (false); +} + static enum fault_status vm_fault_lock_vnode(struct faultstate *fs, bool objlocked) { @@ -1042,12 +1069,15 @@ vm_fault_cow(struct faultstate *fs) curthread->td_cow++; } -static bool +static enum fault_next_status vm_fault_next(struct faultstate *fs) { vm_object_t next_object; - VM_OBJECT_ASSERT_WLOCKED(fs->object); + if (fs->object == fs->first_object || !fs->can_read_lock) + VM_OBJECT_ASSERT_WLOCKED(fs->object); + else + VM_OBJECT_ASSERT_LOCKED(fs->object); /* * The requested page does not exist at this object/ @@ -1062,8 +1092,14 @@ vm_fault_next(struct faultstate *fs) if (fs->object == fs->first_object) { fs->first_m = fs->m; fs->m = NULL; - } else + } else { + if (!vm_fault_object_ensure_wlocked(fs)) { + fs->can_read_lock = false; + unlock_and_deallocate(fs); + return (FAULT_NEXT_RESTART); + } fault_page_free(&fs->m); + } /* * Move on to the next object. Lock the next object before @@ -1071,18 +1107,21 @@ vm_fault_next(struct faultstate *fs) */ next_object = fs->object->backing_object; if (next_object == NULL) - return (false); + return (FAULT_NEXT_NOOBJ); MPASS(fs->first_m != NULL); KASSERT(fs->object != next_object, ("object loop %p", next_object)); - VM_OBJECT_WLOCK(next_object); + if (fs->can_read_lock) + VM_OBJECT_RLOCK(next_object); + else + VM_OBJECT_WLOCK(next_object); vm_object_pip_add(next_object, 1); if (fs->object != fs->first_object) vm_object_pip_wakeup(fs->object); fs->pindex += OFF_TO_IDX(fs->object->backing_object_offset); - VM_OBJECT_WUNLOCK(fs->object); + VM_OBJECT_UNLOCK(fs->object); fs->object = next_object; - return (true); + return (FAULT_NEXT_GOTOBJ); } static void @@ -1364,7 +1403,7 @@ vm_fault_busy_sleep(struct faultstate *fs) unlock_map(fs); if (fs->m != vm_page_lookup(fs->object, fs->pindex) || !vm_page_busy_sleep(fs->m, "vmpfw", 0)) - VM_OBJECT_WUNLOCK(fs->object); + VM_OBJECT_UNLOCK(fs->object); VM_CNT_INC(v_intrans); vm_object_deallocate(fs->first_object); } @@ -1383,7 +1422,10 @@ vm_fault_object(struct faultstate *fs, int *behindp, int *aheadp) enum fault_status res; bool dead; - VM_OBJECT_ASSERT_WLOCKED(fs->object); + if (fs->object == fs->first_object || !fs->can_read_lock) + VM_OBJECT_ASSERT_WLOCKED(fs->object); + else + VM_OBJECT_ASSERT_LOCKED(fs->object); /* * If the object is marked for imminent termination, we retry @@ -1415,7 +1457,7 @@ vm_fault_object(struct faultstate *fs, int *behindp, int *aheadp) * done. */ if (vm_page_all_valid(fs->m)) { - VM_OBJECT_WUNLOCK(fs->object); + VM_OBJECT_UNLOCK(fs->object); return (FAULT_SOFT); } } @@ -1427,6 +1469,11 @@ vm_fault_object(struct faultstate *fs, int *behindp, int *aheadp) */ if (fs->m == NULL && (fault_object_needs_getpages(fs->object) || fs->object == fs->first_object)) { + if (!vm_fault_object_ensure_wlocked(fs)) { + fs->can_read_lock = false; + unlock_and_deallocate(fs); + return (FAULT_RESTART); + } res = vm_fault_allocate(fs); if (res != FAULT_CONTINUE) return (res); @@ -1448,7 +1495,7 @@ vm_fault_object(struct faultstate *fs, int *behindp, int *aheadp) * prevents simultaneous faults and collapses while * the object lock is dropped. */ - VM_OBJECT_WUNLOCK(fs->object); + VM_OBJECT_UNLOCK(fs->object); res = vm_fault_getpages(fs, behindp, aheadp); if (res == FAULT_CONTINUE) VM_OBJECT_WLOCK(fs->object); @@ -1465,6 +1512,7 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, struct faultstate fs; int ahead, behind, faultcount, rv; enum fault_status res; + enum fault_next_status res_next; bool hardfault; VM_CNT_INC(v_vm_faults); @@ -1480,6 +1528,7 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, fs.lookup_still_valid = false; fs.oom_started = false; fs.nera = -1; + fs.can_read_lock = true; faultcount = 0; hardfault = false; @@ -1590,15 +1639,19 @@ RetryFault: * traverse into a backing object or zero fill if none is * found. */ - if (vm_fault_next(&fs)) + res_next = vm_fault_next(&fs); + if (res_next == FAULT_NEXT_RESTART) + goto RetryFault; + else if (res_next == FAULT_NEXT_GOTOBJ) continue; + MPASS(res_next == FAULT_NEXT_NOOBJ); if ((fs.fault_flags & VM_FAULT_NOFILL) != 0) { if (fs.first_object == fs.object) fault_page_free(&fs.first_m); unlock_and_deallocate(&fs); return (KERN_OUT_OF_BOUNDS); } - VM_OBJECT_WUNLOCK(fs.object); + VM_OBJECT_UNLOCK(fs.object); vm_fault_zerofill(&fs); /* Don't try to prefault neighboring pages. */ faultcount = 1;