git: 75694e650ed9 - main - swap_pager: speedup meta_transfer

From: Doug Moore <dougm_at_FreeBSD.org>
Date: Sun, 07 Jul 2024 23:21:45 UTC
The branch main has been updated by dougm:

URL: https://cgit.FreeBSD.org/src/commit/?id=75694e650ed99e43cf8418629ae7a3a596ef4f5d

commit 75694e650ed99e43cf8418629ae7a3a596ef4f5d
Author:     Doug Moore <dougm@FreeBSD.org>
AuthorDate: 2024-07-07 23:19:22 +0000
Commit:     Doug Moore <dougm@FreeBSD.org>
CommitDate: 2024-07-07 23:19:22 +0000

    swap_pager: speedup meta_transfer
    
    Add a parameter to swp_pager_meta_build, for the benefit of
    swp_pager_meta_transfer.
    
    swp_pager_meta_transfer calls swp_pager_xfer_source, which may look up
    the same trie entry twice - first, by calling sw_pager_meta_lookup,
    and then as the first step in swp_pager_meta_build. A boolean
    parameter to swp_pager_meta_build tells that function not to replace a
    previously assigned swapblk with a new one, and setting it in this
    call makes the first meta_lookup call unnecessary.
    
    swp_pager_meta_transfer calls swp_pager_xfer_source, which may release
    and reacquire the source object write lock, because the call to
    swp_pager_meta_build may acquire and then release the destination
    object write block. But it probably doesn't, so fiddling with the
    source object write block was probably unnecessary. This boolean
    parameter to swp_pager_meta_build tells it to return immediately if
    memory allocation problems are about to require a lock
    release/reacquisitiion, so that the caller can release/reacquire the
    source object write lock only if truly necessary, around a second call
    the swp_pager_meta_build with that boolean parameter not set. This
    should make manipulation of the source object write lock rarer.
    
    Reviewed by:    alc, kib (previous version)
    Tested by:      pho
    Differential Revision:  https://reviews.freebsd.org/D45781
---
 sys/vm/swap_pager.c | 94 ++++++++++++++++++++++++++---------------------------
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 79986842d814..9df1521858c1 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -486,7 +486,8 @@ static daddr_t	swp_pager_getswapspace(int *npages);
 /*
  * Metadata functions
  */
-static daddr_t swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t);
+static daddr_t swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t,
+	bool);
 static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_pindex_t,
     vm_size_t *);
 static void swp_pager_meta_transfer(vm_object_t src, vm_object_t dst,
@@ -1031,7 +1032,7 @@ swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_pindex_t size)
 		}
 		for (j = 0; j < n; ++j) {
 			addr = swp_pager_meta_build(object,
-			    start + i + j, blk + j);
+			    start + i + j, blk + j, false);
 			if (addr != SWAPBLK_NONE)
 				swp_pager_update_freerange(&range, addr);
 		}
@@ -1041,35 +1042,6 @@ swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_pindex_t size)
 	return (0);
 }
 
-static bool
-swp_pager_xfer_source(vm_object_t srcobject, vm_object_t dstobject,
-    vm_pindex_t pindex, daddr_t addr)
-{
-	daddr_t dstaddr __diagused;
-
-	KASSERT((srcobject->flags & OBJ_SWAP) != 0,
-	    ("%s: srcobject not swappable", __func__));
-	KASSERT((dstobject->flags & OBJ_SWAP) != 0,
-	    ("%s: dstobject not swappable", __func__));
-
-	if (swp_pager_meta_lookup(dstobject, pindex) != SWAPBLK_NONE) {
-		/* Caller should destroy the source block. */
-		return (false);
-	}
-
-	/*
-	 * Destination has no swapblk and is not resident, transfer source.
-	 * swp_pager_meta_build() can sleep.
-	 */
-	VM_OBJECT_WUNLOCK(srcobject);
-	dstaddr = swp_pager_meta_build(dstobject, pindex, addr);
-	KASSERT(dstaddr == SWAPBLK_NONE,
-	    ("Unexpected destination swapblk"));
-	VM_OBJECT_WLOCK(srcobject);
-
-	return (true);
-}
-
 /*
  * SWAP_PAGER_COPY() -  copy blocks from source pager to destination pager
  *			and destroy the source.
@@ -1548,7 +1520,7 @@ swap_pager_putpages(vm_object_t object, vm_page_t *ma, int count,
 			mreq = ma[i + j];
 			vm_page_aflag_clear(mreq, PGA_SWAP_FREE);
 			addr = swp_pager_meta_build(mreq->object, mreq->pindex,
-			    blk + j);
+			    blk + j, false);
 			if (addr != SWAPBLK_NONE)
 				swp_pager_update_freerange(&range, addr);
 			MPASS(mreq->dirty == VM_PAGE_BITS_ALL);
@@ -2052,16 +2024,22 @@ swp_pager_free_empty_swblk(vm_object_t object, struct swblk *sb)
 		uma_zfree(swblk_zone, sb);
 	}
 }
-   
+
 /*
  * SWP_PAGER_META_BUILD() -	add swap block to swap meta data for object
  *
- *	The specified swapblk is added to the object's swap metadata.  If
- *	the swapblk is not valid, it is freed instead.  Any previously
- *	assigned swapblk is returned.
+ *	Try to add the specified swapblk to the object's swap metadata.  If
+ *	nowait_noreplace is set, add the specified swapblk only if there is no
+ *	previously assigned swapblk at pindex.  If the swapblk is invalid, and
+ *	replaces a valid swapblk, empty swap metadata is freed.  If memory
+ *	allocation fails, and nowait_noreplace is set, return the specified
+ *	swapblk immediately to indicate failure; otherwise, wait and retry until
+ *	memory allocation succeeds.  Return the previously assigned swapblk, if
+ *	any.
  */
 static daddr_t
-swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk)
+swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk,
+    bool nowait_noreplace)
 {
 	static volatile int swblk_zone_exhausted, swpctrie_zone_exhausted;
 	struct swblk *sb, *sb1;
@@ -2088,6 +2066,8 @@ swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk)
 					printf("swblk zone ok\n");
 				break;
 			}
+			if (nowait_noreplace)
+				return (swapblk);
 			VM_OBJECT_WUNLOCK(object);
 			if (uma_zone_exhausted(swblk_zone)) {
 				if (atomic_cmpset_int(&swblk_zone_exhausted,
@@ -2118,6 +2098,10 @@ swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk)
 					printf("swpctrie zone ok\n");
 				break;
 			}
+			if (nowait_noreplace) {
+				uma_zfree(swblk_zone, sb);
+				return (swapblk);
+			}
 			VM_OBJECT_WUNLOCK(object);
 			if (uma_zone_exhausted(swpctrie_zone)) {
 				if (atomic_cmpset_int(&swpctrie_zone_exhausted,
@@ -2144,14 +2128,16 @@ allocated:
 	modpi = pindex % SWAP_META_PAGES;
 	/* Return prior contents of metadata. */
 	prev_swapblk = sb->d[modpi];
-	/* Enter block into metadata. */
-	sb->d[modpi] = swapblk;
+	if (!nowait_noreplace || prev_swapblk == SWAPBLK_NONE) {
+		/* Enter block into metadata. */
+		sb->d[modpi] = swapblk;
 
-	/*
-	 * Free the swblk if we end up with the empty page run.
-	 */
-	if (swapblk == SWAPBLK_NONE)
-		swp_pager_free_empty_swblk(object, sb);
+		/*
+		 * Free the swblk if we end up with the empty page run.
+		 */
+		if (swapblk == SWAPBLK_NONE)
+			swp_pager_free_empty_swblk(object, sb);
+	}
 	return (prev_swapblk);
 }
 
@@ -2168,6 +2154,7 @@ swp_pager_meta_transfer(vm_object_t srcobject, vm_object_t dstobject,
 {
 	struct page_range range;
 	struct swblk *sb;
+	daddr_t blk;
 	vm_page_t m;
 	vm_pindex_t offset, last;
 	vm_size_t mc;
@@ -2193,12 +2180,25 @@ swp_pager_meta_transfer(vm_object_t srcobject, vm_object_t dstobject,
 		limit = last - sb->p < SWAP_META_PAGES ? last - sb->p :
 		    SWAP_META_PAGES;
 		for (i = start; i < limit; i++) {
-			if (sb->d[i] == SWAPBLK_NONE)
+			blk = sb->d[i];
+			if (blk == SWAPBLK_NONE)
 				continue;
 			if (dstobject == NULL ||
-			    !swp_pager_xfer_source(srcobject, dstobject, 
-			    sb->p + i - offset, sb->d[i])) {
+			    (blk = swp_pager_meta_build(dstobject,
+			    sb->p + i - offset, blk, true),
+			    blk != sb->d[i] && blk != SWAPBLK_NONE))
 				swp_pager_update_freerange(&range, sb->d[i]);
+			else if (blk == sb->d[i]) {
+				/*
+				 * Destination has no swapblk and is not
+				 * resident, so transfer source.
+				 * swp_pager_meta_build() failed memory
+				 * allocation already, likely to sleep in retry.
+				 */
+				VM_OBJECT_WUNLOCK(srcobject);
+				swp_pager_meta_build(dstobject,
+				    sb->p + i - offset, blk, false);
+				VM_OBJECT_WLOCK(srcobject);
 			}
 			if (moved != NULL) {
 				if (m != NULL && m->pindex != pindex + i - 1)