git: f3b7dbdad53b - main - shm: Handle swap pager allocation failures

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Wed, 04 Dec 2024 18:35:17 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=f3b7dbdad53b31492757417fc1336ed74ec80fd8

commit f3b7dbdad53b31492757417fc1336ed74ec80fd8
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2024-12-04 01:04:33 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2024-12-04 16:22:50 +0000

    shm: Handle swap pager allocation failures
    
    shm_alloc() can fail if swap reservation fails (i.e., vm.overcommit is
    non-zero) or racct is imposing some limits on swap usage.
    
    PR:             282994
    MFC after:      2 weeks
    Reviewed by:    olce, kib
    Differential Revision:  https://reviews.freebsd.org/D47839
---
 sys/kern/kern_umtx.c |  8 +++++-
 sys/kern/uipc_shm.c  | 80 ++++++++++++++++++++++++++++++++--------------------
 2 files changed, 57 insertions(+), 31 deletions(-)

diff --git a/sys/kern/kern_umtx.c b/sys/kern/kern_umtx.c
index b71fa9a6de24..c4a820f41bc3 100644
--- a/sys/kern/kern_umtx.c
+++ b/sys/kern/kern_umtx.c
@@ -4485,6 +4485,7 @@ static int
 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key,
     struct umtx_shm_reg **res)
 {
+	struct shmfd *shm;
 	struct umtx_shm_reg *reg, *reg1;
 	struct ucred *cred;
 	int error;
@@ -4504,9 +4505,14 @@ umtx_shm_create_reg(struct thread *td, const struct umtx_key *key,
 	cred = td->td_ucred;
 	if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP)))
 		return (ENOMEM);
+	shm = shm_alloc(td->td_ucred, O_RDWR, false);
+	if (shm == NULL) {
+		chgumtxcnt(cred->cr_ruidinfo, -1, 0);
+		return (ENOMEM);
+	}
 	reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO);
 	bcopy(key, &reg->ushm_key, sizeof(*key));
-	reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false);
+	reg->ushm_obj = shm;
 	reg->ushm_cred = crhold(cred);
 	error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE);
 	if (error != 0) {
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 074ca514c77d..026611a59593 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -939,22 +939,32 @@ shm_alloc(struct ucred *ucred, mode_t mode, bool largepage)
 	struct shmfd *shmfd;
 	vm_object_t obj;
 
+	if (largepage) {
+		obj = phys_pager_allocate(NULL, &shm_largepage_phys_ops,
+		    NULL, 0, VM_PROT_DEFAULT, 0, ucred);
+	} else {
+		obj = vm_pager_allocate(shmfd_pager_type, NULL, 0,
+		    VM_PROT_DEFAULT, 0, ucred);
+	}
+	if (obj == NULL) {
+		/*
+		 * swap reservation limits can cause object allocation
+		 * to fail.
+		 */
+		return (NULL);
+	}
+
 	shmfd = malloc(sizeof(*shmfd), M_SHMFD, M_WAITOK | M_ZERO);
-	shmfd->shm_size = 0;
 	shmfd->shm_uid = ucred->cr_uid;
 	shmfd->shm_gid = ucred->cr_gid;
 	shmfd->shm_mode = mode;
 	if (largepage) {
-		obj = phys_pager_allocate(NULL, &shm_largepage_phys_ops,
-		    NULL, shmfd->shm_size, VM_PROT_DEFAULT, 0, ucred);
 		obj->un_pager.phys.phys_priv = shmfd;
 		shmfd->shm_lp_alloc_policy = SHM_LARGEPAGE_ALLOC_DEFAULT;
 	} else {
-		obj = vm_pager_allocate(shmfd_pager_type, NULL,
-		    shmfd->shm_size, VM_PROT_DEFAULT, 0, ucred);
 		obj->un_pager.swp.swp_priv = shmfd;
 	}
-	KASSERT(obj != NULL, ("shm_create: vm_pager_allocate"));
+
 	VM_OBJECT_WLOCK(obj);
 	vm_object_set_flag(obj, OBJ_POSIXSHM);
 	VM_OBJECT_WUNLOCK(obj);
@@ -1211,8 +1221,8 @@ kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode,
 		if (CAP_TRACING(td))
 			ktrcapfail(CAPFAIL_NAMEI, path);
 		if (IN_CAPABILITY_MODE(td)) {
-			free(path, M_SHMFD);
-			return (ECAPMODE);
+			error = ECAPMODE;
+			goto outnofp;
 		}
 #endif
 
@@ -1232,20 +1242,21 @@ kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode,
 	 * in sys_shm_open() to keep this implementation compliant.
 	 */
 	error = falloc_caps(td, &fp, &fd, flags & O_CLOEXEC, fcaps);
-	if (error) {
-		free(path, M_SHMFD);
-		return (error);
-	}
+	if (error != 0)
+		goto outnofp;
 
 	/* A SHM_ANON path pointer creates an anonymous object. */
 	if (userpath == SHM_ANON) {
 		/* A read-only anonymous object is pointless. */
 		if ((flags & O_ACCMODE) == O_RDONLY) {
-			fdclose(td, fp, fd);
-			fdrop(fp, td);
-			return (EINVAL);
+			error = EINVAL;
+			goto out;
 		}
 		shmfd = shm_alloc(td->td_ucred, cmode, largepage);
+		if (shmfd == NULL) {
+			error = ENOMEM;
+			goto out;
+		}
 		shmfd->shm_seals = initial_seals;
 		shmfd->shm_flags = shmflags;
 	} else {
@@ -1262,17 +1273,26 @@ kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode,
 #endif
 					shmfd = shm_alloc(td->td_ucred, cmode,
 					    largepage);
-					shmfd->shm_seals = initial_seals;
-					shmfd->shm_flags = shmflags;
-					shm_insert(path, fnv, shmfd);
+					if (shmfd == NULL) {
+						error = ENOMEM;
+					} else {
+						shmfd->shm_seals =
+						    initial_seals;
+						shmfd->shm_flags = shmflags;
+						shm_insert(path, fnv, shmfd);
+						path = NULL;
+					}
 #ifdef MAC
 				}
 #endif
 			} else {
-				free(path, M_SHMFD);
 				error = ENOENT;
 			}
 		} else {
+			/*
+			 * Object already exists, obtain a new reference if
+			 * requested and permitted.
+			 */
 			rl_cookie = shm_rangelock_wlock(shmfd, 0, OFF_MAX);
 
 			/*
@@ -1285,12 +1305,6 @@ kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode,
 			 */
 			initial_seals &= ~shmfd->shm_seals;
 
-			/*
-			 * Object already exists, obtain a new
-			 * reference if requested and permitted.
-			 */
-			free(path, M_SHMFD);
-
 			/*
 			 * initial_seals can't set additional seals if we've
 			 * already been set F_SEAL_SEAL.  If F_SEAL_SEAL is set,
@@ -1349,19 +1363,25 @@ kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode,
 		}
 		sx_xunlock(&shm_dict_lock);
 
-		if (error) {
-			fdclose(td, fp, fd);
-			fdrop(fp, td);
-			return (error);
-		}
+		if (error != 0)
+			goto out;
 	}
 
 	finit(fp, FFLAGS(flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops);
 
 	td->td_retval[0] = fd;
 	fdrop(fp, td);
+	free(path, M_SHMFD);
 
 	return (0);
+
+out:
+	fdclose(td, fp, fd);
+	fdrop(fp, td);
+outnofp:
+	free(path, M_SHMFD);
+
+	return (error);
 }
 
 /* System calls. */