mmap MAP_NOSYNC regression in 10.x
Konstantin Belousov
kostikbel at gmail.com
Fri Sep 5 12:38:39 UTC 2014
On Fri, Sep 05, 2014 at 01:56:40PM +0200, Pieter de Goeje wrote:
> Thanks, works for me!
I realized that the patch contains yet another bug. The oflags page
flags update is protected by the exclusive vm object lock, which is only
held in shared mode on the fast path. Below is the fixed patch, where
I take the page lock around setting VPO_NOSYNC (exclusive lock owners
cannot race with fast path since we own shared lock, and two parallel
fast path execution would be handled by the page lock).
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 30b0456..f327ab0 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -174,6 +174,54 @@ unlock_and_deallocate(struct faultstate *fs)
}
}
+static void
+vm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_prot_t prot,
+ vm_prot_t fault_type, int fault_flags, boolean_t set_wd)
+{
+
+ if (((prot & VM_PROT_WRITE) != 0 ||
+ (fault_flags & VM_FAULT_DIRTY) != 0) &&
+ (m->oflags & VPO_UNMANAGED) == 0) {
+ if (set_wd)
+ vm_object_set_writeable_dirty(m->object);
+
+ /*
+ * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC
+ * if the page is already dirty to prevent data written with
+ * the expectation of being synced from not being synced.
+ * Likewise if this entry does not request NOSYNC then make
+ * sure the page isn't marked NOSYNC. Applications sharing
+ * data should use the same flags to avoid ping ponging.
+ */
+ if (entry->eflags & MAP_ENTRY_NOSYNC) {
+ if (m->dirty == 0) {
+ if (!set_wd)
+ vm_page_lock(m);
+ m->oflags |= VPO_NOSYNC;
+ if (!set_wd)
+ vm_page_unlock(m);
+ }
+ } else {
+ m->oflags &= ~VPO_NOSYNC;
+ }
+
+ /*
+ * If the fault is a write, we know that this page is being
+ * written NOW so dirty it explicitly to save on
+ * pmap_is_modified() calls later.
+ *
+ * Also tell the backing pager, if any, that it should remove
+ * any swap backing since the page is now dirty.
+ */
+ if (((fault_type & VM_PROT_WRITE) != 0 &&
+ (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) ||
+ (fault_flags & VM_FAULT_DIRTY) != 0) {
+ vm_page_dirty(m);
+ vm_pager_page_unswapped(m);
+ }
+ }
+}
+
/*
* TRYPAGER - used by vm_fault to calculate whether the pager for the
* current object *might* contain the page.
@@ -321,11 +369,8 @@ RetryFault:;
vm_page_hold(m);
vm_page_unlock(m);
}
- if ((fault_type & VM_PROT_WRITE) != 0 &&
- (m->oflags & VPO_UNMANAGED) == 0) {
- vm_page_dirty(m);
- vm_pager_page_unswapped(m);
- }
+ vm_fault_dirty(fs.entry, m, prot, fault_type, fault_flags,
+ FALSE);
VM_OBJECT_RUNLOCK(fs.first_object);
if (!wired)
vm_fault_prefault(&fs, vaddr, 0, 0);
@@ -898,42 +943,7 @@ vnode_locked:
if (hardfault)
fs.entry->next_read = fs.pindex + faultcount - reqpage;
- if (((prot & VM_PROT_WRITE) != 0 ||
- (fault_flags & VM_FAULT_DIRTY) != 0) &&
- (fs.m->oflags & VPO_UNMANAGED) == 0) {
- vm_object_set_writeable_dirty(fs.object);
-
- /*
- * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC
- * if the page is already dirty to prevent data written with
- * the expectation of being synced from not being synced.
- * Likewise if this entry does not request NOSYNC then make
- * sure the page isn't marked NOSYNC. Applications sharing
- * data should use the same flags to avoid ping ponging.
- */
- if (fs.entry->eflags & MAP_ENTRY_NOSYNC) {
- if (fs.m->dirty == 0)
- fs.m->oflags |= VPO_NOSYNC;
- } else {
- fs.m->oflags &= ~VPO_NOSYNC;
- }
-
- /*
- * If the fault is a write, we know that this page is being
- * written NOW so dirty it explicitly to save on
- * pmap_is_modified() calls later.
- *
- * Also tell the backing pager, if any, that it should remove
- * any swap backing since the page is now dirty.
- */
- if (((fault_type & VM_PROT_WRITE) != 0 &&
- (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) ||
- (fault_flags & VM_FAULT_DIRTY) != 0) {
- vm_page_dirty(fs.m);
- vm_pager_page_unswapped(fs.m);
- }
- }
-
+ vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags, TRUE);
vm_page_assert_xbusied(fs.m);
/*
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index f12b76c..a45648d 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -147,7 +147,7 @@ struct vm_page {
uint16_t hold_count; /* page hold count (P) */
uint16_t flags; /* page PG_* flags (P) */
uint8_t aflags; /* access is atomic */
- uint8_t oflags; /* page VPO_* flags (O) */
+ uint8_t oflags; /* page VPO_* flags (OM) */
uint8_t queue; /* page queue index (P,Q) */
int8_t psind; /* pagesizes[] index (O) */
int8_t segind;
@@ -163,8 +163,9 @@ struct vm_page {
/*
* Page flags stored in oflags:
*
- * Access to these page flags is synchronized by the lock on the object
- * containing the page (O).
+ * Access to these page flags is synchronized by the exclusive lock on
+ * the object containing the page, or combination of shared object
+ * lock and the page lock (OM).
*
* Note: VPO_UNMANAGED (used by OBJT_DEVICE, OBJT_PHYS and OBJT_SG)
* indicates that the page is not under PV management but
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 819 bytes
Desc: not available
URL: <http://lists.freebsd.org/pipermail/freebsd-hackers/attachments/20140905/8077f928/attachment.sig>
More information about the freebsd-hackers
mailing list