git: 7d1d9cc440f8 - main - sysctl: Do not serialize requests when running as root

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Sat, 21 Dec 2024 19:26:57 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=7d1d9cc440f800858b6ec8dfb5a41c853fc8c36d

commit 7d1d9cc440f800858b6ec8dfb5a41c853fc8c36d
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2024-12-21 19:25:32 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2024-12-21 19:25:32 +0000

    sysctl: Do not serialize requests when running as root
    
    Bugs or unexpected behaviour can cause a user thread to block in a
    sysctl handler for a long time.  "procstat -kka" is the most useful tool
    to see why this might happen, but it can block on sysctlmemlock too.
    
    Since the purpose of this lock is merely to ensure userspace can't wire
    too much memory, don't require it for requests from privileged threads.
    
    PR:             282994
    Reviewed by:    kib, jhb
    MFC after:      2 weeks
    Differential Revision:  https://reviews.freebsd.org/D47842
---
 sys/kern/kern_sysctl.c | 10 ++++++----
 sys/sys/priv.h         |  1 +
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
index e139d9c39181..9d824fbd3cbd 100644
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@@ -2516,8 +2516,9 @@ userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
     size_t *oldlenp, int inkernel, const void *new, size_t newlen,
     size_t *retval, int flags)
 {
-	int error = 0, memlocked;
 	struct sysctl_req req;
+	int error = 0;
+	bool memlocked;
 
 	bzero(&req, sizeof req);
 
@@ -2549,9 +2550,10 @@ userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
 	if (KTRPOINT(curthread, KTR_SYSCTL))
 		ktrsysctl(name, namelen);
 #endif
-	memlocked = 0;
-	if (req.oldptr && req.oldlen > 4 * PAGE_SIZE) {
-		memlocked = 1;
+	memlocked = false;
+	if (priv_check(td, PRIV_SYSCTL_MEMLOCK) != 0 &&
+	    req.oldptr != NULL && req.oldlen > 4 * PAGE_SIZE) {
+		memlocked = true;
 		sx_xlock(&sysctlmemlock);
 	}
 	CURVNET_SET(TD_TO_VNET(td));
diff --git a/sys/sys/priv.h b/sys/sys/priv.h
index b570e4d7884a..9a1886454d86 100644
--- a/sys/sys/priv.h
+++ b/sys/sys/priv.h
@@ -211,6 +211,7 @@
 #define	PRIV_SYSCTL_DEBUG	240	/* Can invoke sysctl.debug. */
 #define	PRIV_SYSCTL_WRITE	241	/* Can write sysctls. */
 #define	PRIV_SYSCTL_WRITEJAIL	242	/* Can write sysctls, jail permitted. */
+#define	PRIV_SYSCTL_MEMLOCK	243	/* Large requests are not serialized. */
 
 /*
  * TTY privileges.