git: e60f608eb9cf - main - Add sysctl kern.proc.kqueue

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Thu, 13 Mar 2025 16:10:06 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=e60f608eb9cf3b38099948545934d699de9bbcea

commit e60f608eb9cf3b38099948545934d699de9bbcea
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2025-02-23 20:25:25 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2025-03-13 16:09:35 +0000

    Add sysctl kern.proc.kqueue
    
    reporting registered events in the specified kqueue.
    
    Reviewed by:    markj
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D49163
---
 sys/kern/kern_event.c | 141 +++++++++++++++++++++++++++++++++++++++++++++++++-
 sys/kern/sys_pipe.c   |  20 ++++++-
 sys/kern/vfs_subr.c   |  50 ++++++++++++++++--
 sys/sys/event.h       |   5 ++
 sys/sys/sysctl.h      |   1 +
 sys/sys/user.h        |  28 ++++++++++
 sys/vm/sg_pager.c     |   1 +
 7 files changed, 238 insertions(+), 8 deletions(-)

diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index dcb2c10ee1f5..14aa3abd1901 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -64,6 +64,7 @@
 #include <sys/socketvar.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
+#include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/syscallsubr.h>
 #include <sys/taskqueue.h>
@@ -2730,8 +2731,15 @@ knote_drop_detached(struct knote *kn, struct thread *td)
 	KQ_NOTOWNED(kq);
 
 	KQ_LOCK(kq);
-	KASSERT(kn->kn_influx == 1,
-	    ("knote_drop called on %p with influx %d", kn, kn->kn_influx));
+	for (;;) {
+		KASSERT(kn->kn_influx >= 1,
+		    ("knote_drop called on %p with influx %d",
+		    kn, kn->kn_influx));
+		if (kn->kn_influx == 1)
+			break;
+		kq->kq_state |= KQ_FLUXWAIT;
+		msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0);
+	}
 
 	if (kn->kn_fop->f_isfd)
 		list = &kq->kq_knlist[kn->kn_id];
@@ -2829,3 +2837,132 @@ noacquire:
 	fdrop(fp, td);
 	return (error);
 }
+
+struct knote_status_export_bit {
+	int kn_status_bit;
+	int knt_status_bit;
+};
+
+#define	ST(name) \
+    { .kn_status_bit = KN_##name, .knt_status_bit = KNOTE_STATUS_##name }
+static const struct knote_status_export_bit knote_status_export_bits[] = {
+	ST(ACTIVE),
+	ST(QUEUED),
+	ST(DISABLED),
+	ST(DETACHED),
+	ST(KQUEUE),
+};
+#undef ST
+
+static int
+knote_status_export(int kn_status)
+{
+	const struct knote_status_export_bit *b;
+	unsigned i;
+	int res;
+
+	res = 0;
+	for (i = 0; i < nitems(knote_status_export_bits); i++) {
+		b = &knote_status_export_bits[i];
+		if ((kn_status & b->kn_status_bit) != 0)
+			res |= b->knt_status_bit;
+	}
+	return (res);
+}
+
+static int
+sysctl_kern_proc_kqueue_report_one(struct proc *p, struct sysctl_req *req,
+    struct kqueue *kq, struct knote *kn)
+{
+	struct kinfo_knote kin;
+	int error;
+
+	if (kn->kn_status == KN_MARKER)
+		return (0);
+
+	memset(&kin, 0, sizeof(kin));
+	memcpy(&kin.knt_event, &kn->kn_kevent, sizeof(struct kevent));
+	kin.knt_status = knote_status_export(kn->kn_status);
+	kn_enter_flux(kn);
+	KQ_UNLOCK_FLUX(kq);
+	if (kn->kn_fop->f_userdump != NULL)
+		(void)kn->kn_fop->f_userdump(p, kn, &kin);
+	error = SYSCTL_OUT(req, &kin, sizeof(kin));
+	maybe_yield();
+	KQ_LOCK(kq);
+	kn_leave_flux(kn);
+	return (error);
+}
+
+static int
+sysctl_kern_proc_kqueue(SYSCTL_HANDLER_ARGS)
+{
+	struct thread *td;
+	struct proc *p;
+	struct file *fp;
+	struct kqueue *kq;
+	struct knote *kn;
+	int error, i, *name;
+
+	name = (int *)arg1;
+	if ((u_int)arg2 != 2)
+		return (EINVAL);
+
+	error = pget((pid_t)name[0], PGET_HOLD | PGET_CANDEBUG, &p);
+	if (error != 0)
+		return (error);
+#ifdef COMPAT_FREEBSD32
+	if (SV_CURPROC_FLAG(SV_ILP32)) {
+		/* XXXKIB */
+		error = EOPNOTSUPP;
+		goto out1;
+	}
+#endif
+
+	td = curthread;
+	error = fget_remote(td, p, name[1] /* kqfd */, &fp);
+	if (error != 0)
+		goto out1;
+	if (fp->f_type != DTYPE_KQUEUE) {
+		error = EINVAL;
+		goto out2;
+	}
+
+	kq = fp->f_data;
+	if (req->oldptr == NULL) {
+		error = SYSCTL_OUT(req, NULL, sizeof(struct kinfo_knote) *
+		    kq->kq_knlistsize * 11 / 10);
+		goto out2;
+	}
+
+	KQ_LOCK(kq);
+	for (i = 0; i < kq->kq_knlistsize; i++) {
+		SLIST_FOREACH(kn, &kq->kq_knlist[i], kn_link) {
+			error = sysctl_kern_proc_kqueue_report_one(p, req,
+			    kq, kn);
+			if (error != 0)
+				goto out3;
+		}
+	}
+	if (kq->kq_knhashmask == 0)
+		goto out3;
+	for (i = 0; i <= kq->kq_knhashmask; i++) {
+		SLIST_FOREACH(kn, &kq->kq_knhash[i], kn_link) {
+			error = sysctl_kern_proc_kqueue_report_one(p, req,
+			    kq, kn);
+			if (error != 0)
+				goto out3;
+		}
+	}
+out3:
+	KQ_UNLOCK_FLUX(kq);
+out2:
+	fdrop(fp, td);
+out1:
+	PRELE(p);
+	return (error);
+}
+
+static SYSCTL_NODE(_kern_proc, KERN_PROC_KQUEUE, kq,
+    CTLFLAG_RD | CTLFLAG_MPSAFE,
+    sysctl_kern_proc_kqueue, "KQueue events");
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
index b842db44e7f1..9340779918a2 100644
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c
@@ -175,21 +175,26 @@ static void	filt_pipedetach_notsup(struct knote *kn);
 static int	filt_pipenotsup(struct knote *kn, long hint);
 static int	filt_piperead(struct knote *kn, long hint);
 static int	filt_pipewrite(struct knote *kn, long hint);
+static int	filt_pipedump(struct proc *p, struct knote *kn,
+    struct kinfo_knote *kin);
 
 static const struct filterops pipe_nfiltops = {
 	.f_isfd = 1,
 	.f_detach = filt_pipedetach_notsup,
 	.f_event = filt_pipenotsup
+	/* no userdump */
 };
 static const struct filterops pipe_rfiltops = {
 	.f_isfd = 1,
 	.f_detach = filt_pipedetach,
-	.f_event = filt_piperead
+	.f_event = filt_piperead,
+	.f_userdump = filt_pipedump,
 };
 static const struct filterops pipe_wfiltops = {
 	.f_isfd = 1,
 	.f_detach = filt_pipedetach,
-	.f_event = filt_pipewrite
+	.f_event = filt_pipewrite,
+	.f_userdump = filt_pipedump,
 };
 
 /*
@@ -1900,3 +1905,14 @@ filt_pipenotsup(struct knote *kn, long hint)
 
 	return (0);
 }
+
+static int
+filt_pipedump(struct proc *p, struct knote *kn,
+    struct kinfo_knote *kin)
+{
+	struct pipe *pipe = kn->kn_hook;
+
+	kin->knt_extdata = KNOTE_EXTDATA_PIPE;
+	kin->knt_pipe.knt_pipe_ino = pipe->pipe_ino;
+	return (0);
+}
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 9e9b56064ecf..95ed98d3217d 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -79,6 +79,7 @@
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
+#include <sys/user.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 #include <sys/watchdog.h>
@@ -6483,7 +6484,7 @@ const struct filterops fs_filtops = {
 	.f_isfd = 0,
 	.f_attach = filt_fsattach,
 	.f_detach = filt_fsdetach,
-	.f_event = filt_fsevent
+	.f_event = filt_fsevent,
 };
 
 static int
@@ -6559,20 +6560,26 @@ static int	filt_vfsread(struct knote *kn, long hint);
 static int	filt_vfswrite(struct knote *kn, long hint);
 static int	filt_vfsvnode(struct knote *kn, long hint);
 static void	filt_vfsdetach(struct knote *kn);
+static int	filt_vfsdump(struct proc *p, struct knote *kn,
+		    struct kinfo_knote *kin);
+
 static const struct filterops vfsread_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_vfsdetach,
-	.f_event = filt_vfsread
+	.f_event = filt_vfsread,
+	.f_userdump = filt_vfsdump,
 };
 static const struct filterops vfswrite_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_vfsdetach,
-	.f_event = filt_vfswrite
+	.f_event = filt_vfswrite,
+	.f_userdump = filt_vfsdump,
 };
 static const struct filterops vfsvnode_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_vfsdetach,
-	.f_event = filt_vfsvnode
+	.f_event = filt_vfsvnode,
+	.f_userdump = filt_vfsdump,
 };
 
 static void
@@ -6721,6 +6728,41 @@ filt_vfsvnode(struct knote *kn, long hint)
 	return (res);
 }
 
+static int
+filt_vfsdump(struct proc *p, struct knote *kn, struct kinfo_knote *kin)
+{
+	struct vattr va;
+	struct vnode *vp;
+	char *fullpath, *freepath;
+	int error;
+
+	kin->knt_extdata = KNOTE_EXTDATA_VNODE;
+
+	vp = kn->kn_fp->f_vnode;
+	kin->knt_vnode.knt_vnode_type = vntype_to_kinfo(vp->v_type);
+
+	va.va_fsid = VNOVAL;
+	vn_lock(vp, LK_SHARED | LK_RETRY);
+	error = VOP_GETATTR(vp, &va, curthread->td_ucred);
+	VOP_UNLOCK(vp);
+	if (error != 0)
+		return (error);
+	kin->knt_vnode.knt_vnode_fsid = va.va_fsid;
+	kin->knt_vnode.knt_vnode_fileid = va.va_fileid;
+
+	freepath = NULL;
+	fullpath = "-";
+	error = vn_fullpath(vp, &fullpath, &freepath);
+	if (error == 0) {
+		strlcpy(kin->knt_vnode.knt_vnode_fullpath, fullpath,
+		    sizeof(kin->knt_vnode.knt_vnode_fullpath));
+	}
+	if (freepath != NULL)
+		free(freepath, M_TEMP);
+
+	return (0);
+}
+
 int
 vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off)
 {
diff --git a/sys/sys/event.h b/sys/sys/event.h
index 1c640c86703d..dee3365ba7b6 100644
--- a/sys/sys/event.h
+++ b/sys/sys/event.h
@@ -262,12 +262,17 @@ struct knlist {
 #define EVENT_REGISTER	1
 #define EVENT_PROCESS	2
 
+struct kinfo_knote;
+struct proc;
+
 struct filterops {
 	int	f_isfd;		/* true if ident == filedescriptor */
 	int	(*f_attach)(struct knote *kn);
 	void	(*f_detach)(struct knote *kn);
 	int	(*f_event)(struct knote *kn, long hint);
 	void	(*f_touch)(struct knote *kn, struct kevent *kev, u_long type);
+	int	(*f_userdump)(struct proc *p, struct knote *kn,
+		    struct kinfo_knote *kin);
 };
 
 /*
diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h
index f7abc27083aa..916c91da3d53 100644
--- a/sys/sys/sysctl.h
+++ b/sys/sys/sysctl.h
@@ -1041,6 +1041,7 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
 #define	KERN_PROC_SIGFASTBLK	44	/* address of fastsigblk magic word */
 #define	KERN_PROC_VM_LAYOUT	45	/* virtual address space layout info */
 #define	KERN_PROC_RLIMIT_USAGE	46	/* array of rlim_t */
+#define	KERN_PROC_KQUEUE	47	/* array of struct kinfo_knote */
 
 /*
  * KERN_IPC identifiers
diff --git a/sys/sys/user.h b/sys/sys/user.h
index 02ca69968541..cf42412af66f 100644
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -38,6 +38,7 @@
 #ifndef _KERNEL
 /* stuff that *used* to be included by user.h, or is now needed */
 #include <sys/errno.h>
+#include <sys/event.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/ucred.h>
@@ -665,6 +666,33 @@ struct kinfo_vm_layout {
 	uintptr_t	kvm_spare[12];
 };
 
+#define	KNOTE_STATUS_ACTIVE		0x00000001
+#define	KNOTE_STATUS_QUEUED		0x00000002
+#define	KNOTE_STATUS_DISABLED		0x00000004
+#define	KNOTE_STATUS_DETACHED		0x00000008
+#define	KNOTE_STATUS_KQUEUE		0x00000010
+
+#define	KNOTE_EXTDATA_NONE		0
+#define	KNOTE_EXTDATA_VNODE		1
+#define	KNOTE_EXTDATA_PIPE		2
+
+struct kinfo_knote {
+	struct kevent	knt_event;
+	int		knt_status;
+	int		knt_extdata;
+	union {
+		struct {
+			int		knt_vnode_type;
+			uint64_t	knt_vnode_fsid;
+			uint64_t	knt_vnode_fileid;
+			char		knt_vnode_fullpath[PATH_MAX];
+		} knt_vnode;
+		struct {
+			ino_t		knt_pipe_ino;
+		} knt_pipe;
+	};
+};
+
 #ifdef _KERNEL
 /* Flags for kern_proc_out function. */
 #define KERN_PROC_NOTHREADS	0x1
diff --git a/sys/vm/sg_pager.c b/sys/vm/sg_pager.c
index f1f4a3763bb0..64f226dd9c58 100644
--- a/sys/vm/sg_pager.c
+++ b/sys/vm/sg_pager.c
@@ -34,6 +34,7 @@
  */
 
 #include <sys/param.h>
+#include <sys/event.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>