git: d37286b9bf92 - main - proc: Remove kernel stack swapping support, part 7

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Mon, 29 Jul 2024 01:49:59 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=d37286b9bf92ec923ab6823bbedef9e39e7e1ebb

commit d37286b9bf92ec923ab6823bbedef9e39e7e1ebb
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2024-07-29 01:41:06 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2024-07-29 01:43:49 +0000

    proc: Remove kernel stack swapping support, part 7
    
    Remove some uses of PHOLD which were there only to prevent the process'
    threads from being swapped out.
    
    Tested by:      pho
    Reviewed by:    imp, kib
    Differential Revision:  https://reviews.freebsd.org/D46118
---
 sys/cam/cam_periph.c                            | 14 --------------
 sys/compat/linuxkpi/common/src/linux_schedule.c |  6 ------
 sys/dev/nvme/nvme_ctrlr.c                       | 12 ------------
 sys/kern/kern_physio.c                          |  7 -------
 sys/kern/kern_procctl.c                         | 10 ----------
 sys/kern/subr_firmware.c                        |  2 --
 sys/ufs/ffs/ffs_rawread.c                       |  6 ------
 sys/ufs/ffs/ffs_softdep.c                       |  4 ----
 8 files changed, 61 deletions(-)

diff --git a/sys/cam/cam_periph.c b/sys/cam/cam_periph.c
index e957edee67f1..4052ec2e1570 100644
--- a/sys/cam/cam_periph.c
+++ b/sys/cam/cam_periph.c
@@ -928,16 +928,6 @@ cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo,
 		}
 	}
 
-	/*
-	 * This keeps the kernel stack of current thread from getting
-	 * swapped.  In low-memory situations where the kernel stack might
-	 * otherwise get swapped out, this holds it and allows the thread
-	 * to make progress and release the kernel mapped pages sooner.
-	 *
-	 * XXX KDM should I use P_NOSWAP instead?
-	 */
-	PHOLD(curproc);
-
 	for (i = 0; i < numbufs; i++) {
 		/* Save the user's data address. */
 		mapinfo->orig[i] = *data_ptrs[i];
@@ -1005,7 +995,6 @@ fail:
 			free(*data_ptrs[i], M_CAMPERIPH);
 		*data_ptrs[i] = mapinfo->orig[i];
 	}
-	PRELE(curproc);
 	return(EACCES);
 }
 
@@ -1116,9 +1105,6 @@ cam_periph_unmapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo)
 		*data_ptrs[i] = mapinfo->orig[i];
 	}
 
-	/* allow ourselves to be swapped once again */
-	PRELE(curproc);
-
 	return (error);
 }
 
diff --git a/sys/compat/linuxkpi/common/src/linux_schedule.c b/sys/compat/linuxkpi/common/src/linux_schedule.c
index 66b339bfbdbd..3349a4aa8d72 100644
--- a/sys/compat/linuxkpi/common/src/linux_schedule.c
+++ b/sys/compat/linuxkpi/common/src/linux_schedule.c
@@ -268,11 +268,6 @@ linux_wait_event_common(wait_queue_head_t *wqh, wait_queue_t *wq, int timeout,
 
 	task = current;
 
-	/*
-	 * Our wait queue entry is on the stack - make sure it doesn't
-	 * get swapped out while we sleep.
-	 */
-	PHOLD(task->task_thread->td_proc);
 	sleepq_lock(task);
 	if (atomic_read(&task->state) != TASK_WAKING) {
 		ret = linux_add_to_sleepqueue(task, task, "wevent", timeout,
@@ -281,7 +276,6 @@ linux_wait_event_common(wait_queue_head_t *wqh, wait_queue_t *wq, int timeout,
 		sleepq_release(task);
 		ret = 0;
 	}
-	PRELE(task->task_thread->td_proc);
 
 	if (lock != NULL)
 		spin_lock_irq(lock);
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index e07ac737ffc8..52834cb26b8e 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -1259,11 +1259,6 @@ nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
 			return EIO;
 		}
 		if (is_user_buffer) {
-			/*
-			 * Ensure the user buffer is wired for the duration of
-			 *  this pass-through command.
-			 */
-			PHOLD(curproc);
 			buf = uma_zalloc(pbuf_zone, M_WAITOK);
 			buf->b_iocmd = pt->is_read ? BIO_READ : BIO_WRITE;
 			if (vmapbuf(buf, pt->buf, pt->len, 1) < 0) {
@@ -1309,7 +1304,6 @@ nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
 		vunmapbuf(buf);
 err:
 		uma_zfree(pbuf_zone, buf);
-		PRELE(curproc);
 	}
 
 	return (ret);
@@ -1356,11 +1350,6 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr,
 		if ((npc->opcode & 0x3) == 0 || (npc->opcode & 0x3) == 3)
 			return (EINVAL);
 		if (is_user) {
-			/*
-			 * Ensure the user buffer is wired for the duration of
-			 *  this pass-through command.
-			 */
-			PHOLD(curproc);
 			buf = uma_zalloc(pbuf_zone, M_WAITOK);
 			buf->b_iocmd = npc->opcode & 1 ? BIO_WRITE : BIO_READ;
 			if (vmapbuf(buf, (void *)(uintptr_t)npc->addr,
@@ -1408,7 +1397,6 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr,
 		vunmapbuf(buf);
 err:
 		uma_zfree(pbuf_zone, buf);
-		PRELE(curproc);
 	}
 
 	return (ret);
diff --git a/sys/kern/kern_physio.c b/sys/kern/kern_physio.c
index 79b7694192e4..787505c1096f 100644
--- a/sys/kern/kern_physio.c
+++ b/sys/kern/kern_physio.c
@@ -87,12 +87,6 @@ physio(struct cdev *dev, struct uio *uio, int ioflag)
 		return (EFBIG);
 	}
 
-	/*
-	 * Keep the process UPAGES from being swapped.  Processes swapped
-	 * out while holding pbufs, used by swapper, may lead to deadlock.
-	 */
-	PHOLD(curproc);
-
 	bp = g_alloc_bio();
 	if (uio->uio_segflg != UIO_USERSPACE) {
 		pbuf = NULL;
@@ -209,6 +203,5 @@ doerror:
 	else if (pages)
 		free(pages, M_DEVBUF);
 	g_destroy_bio(bp);
-	PRELE(curproc);
 	return (error);
 }
diff --git a/sys/kern/kern_procctl.c b/sys/kern/kern_procctl.c
index 888feb94bb8d..e7519f1b0de4 100644
--- a/sys/kern/kern_procctl.c
+++ b/sys/kern/kern_procctl.c
@@ -573,17 +573,7 @@ reap_kill(struct thread *td, struct proc *p, void *data)
 		w.rk = rk;
 		w.error = &error;
 		TASK_INIT(&w.t, 0, reap_kill_proc_work, &w);
-
-		/*
-		 * Prevent swapout, since w, ksi, and possibly rk, are
-		 * allocated on the stack.  We sleep in
-		 * reap_kill_subtree_once() waiting for task to
-		 * complete single-threading.
-		 */
-		PHOLD(td->td_proc);
-
 		reap_kill_subtree(td, p, reaper, &w);
-		PRELE(td->td_proc);
 		crfree(w.cr);
 	}
 	PROC_LOCK(p);
diff --git a/sys/kern/subr_firmware.c b/sys/kern/subr_firmware.c
index 99f7ef6ff8b9..d616339f2c07 100644
--- a/sys/kern/subr_firmware.c
+++ b/sys/kern/subr_firmware.c
@@ -419,9 +419,7 @@ firmware_get_flags(const char *imagename, uint32_t flags)
 		fwli.flags = flags;
 		TASK_INIT(&fwload_task, 0, loadimage, (void *)&fwli);
 		taskqueue_enqueue(firmware_tq, &fwload_task);
-		PHOLD(curproc);
 		msleep((void *)&fwli, &firmware_mtx, 0, "fwload", 0);
-		PRELE(curproc);
 	}
 	/*
 	 * After attempting to load the module, see if the image is registered.
diff --git a/sys/ufs/ffs/ffs_rawread.c b/sys/ufs/ffs/ffs_rawread.c
index 3a415d766303..9db0bee0d66d 100644
--- a/sys/ufs/ffs/ffs_rawread.c
+++ b/sys/ufs/ffs/ffs_rawread.c
@@ -265,11 +265,6 @@ ffs_rawread_main(struct vnode *vp,
 	resid = uio->uio_resid;
 	offset = uio->uio_offset;
 
-	/*
-	 * keep the process from being swapped
-	 */
-	PHOLD(td->td_proc);
-
 	error = 0;
 	nerror = 0;
 
@@ -389,7 +384,6 @@ ffs_rawread_main(struct vnode *vp,
 
 	if (error == 0)
 		error = nerror;
-	PRELE(td->td_proc);
 	uio->uio_iov->iov_base = udata;
 	uio->uio_resid = resid;
 	uio->uio_offset = offset;
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 5c8e2b6cde81..86c90000e8d2 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -1904,7 +1904,6 @@ process_worklist_item(struct mount *mp,
 	 */
 	if (curthread->td_pflags & TDP_COWINPROGRESS)
 		return (-1);
-	PHOLD(curproc);	/* Don't let the stack go away. */
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	matchcnt = 0;
@@ -1977,7 +1976,6 @@ process_worklist_item(struct mount *mp,
 		ump->softdep_worklist_tail =
 		    (struct worklist *)sentinel.wk_list.le_prev;
 	LIST_REMOVE(&sentinel, wk_list);
-	PRELE(curproc);
 	return (matchcnt);
 }
 
@@ -10230,7 +10228,6 @@ softdep_disk_io_initiation(
 		return;
 
 	marker.wk_type = D_LAST + 1;	/* Not a normal workitem */
-	PHOLD(curproc);			/* Don't swap out kernel stack */
 	ACQUIRE_LOCK(ump);
 	/*
 	 * Do any necessary pre-I/O processing.
@@ -10315,7 +10312,6 @@ softdep_disk_io_initiation(
 		}
 	}
 	FREE_LOCK(ump);
-	PRELE(curproc);			/* Allow swapout of kernel stack */
 }
 
 /*