git: 6d2a673f7ec5 - stable/14 - ktrace: Record namei violations with KTR_CAPFAIL

From: Jake Freeland <jfree_at_FreeBSD.org>
Date: Sun, 12 May 2024 00:08:30 UTC
The branch stable/14 has been updated by jfree:

URL: https://cgit.FreeBSD.org/src/commit/?id=6d2a673f7ec5d337c5af253f54a614ac6176f89f

commit 6d2a673f7ec5d337c5af253f54a614ac6176f89f
Author:     Jake Freeland <jfree@FreeBSD.org>
AuthorDate: 2024-04-06 18:31:25 +0000
Commit:     Jake Freeland <jfree@FreeBSD.org>
CommitDate: 2024-05-11 23:57:44 +0000

    ktrace: Record namei violations with KTR_CAPFAIL
    
    Report namei path lookups while Capsicum violation tracing with
    CAPFAIL_NAMEI. vfs caching is also ignored when tracing to mimic
    capability mode behavior.
    
    Reviewed by:    markj
    Approved by:    markj (mentor)
    MFC after:      1 month
    Differential Revision:  https://reviews.freebsd.org/D40680
    
    (cherry picked from commit 0cd9cde767c32780df9abee7ba9d2a8a51536728)
---
 sys/kern/kern_descrip.c  |  4 +--
 sys/kern/kern_exec.c     |  2 ++
 sys/kern/uipc_shm.c      |  8 +++--
 sys/kern/uipc_syscalls.c | 16 ++++++---
 sys/kern/vfs_cache.c     |  2 +-
 sys/kern/vfs_lookup.c    | 90 ++++++++++++++++++++++++++----------------------
 sys/sys/namei.h          | 11 +++++-
 7 files changed, 81 insertions(+), 52 deletions(-)

diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index b72ae27e2a0b..35e9afea4625 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -3056,7 +3056,7 @@ fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch)
 	    ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL ||
 	    ndp->ni_filecaps.fc_nioctls != -1) {
 #ifdef notyet
-		ndp->ni_lcf |= NI_LCF_STRICTRELATIVE;
+		ndp->ni_lcf |= NI_LCF_STRICTREL;
 #else
 		return (EAGAIN);
 #endif
@@ -3148,7 +3148,7 @@ fgetvp_lookup(struct nameidata *ndp, struct vnode **vpp)
 	if (!cap_rights_contains(&ndp->ni_filecaps.fc_rights, &rights) ||
 	    ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL ||
 	    ndp->ni_filecaps.fc_nioctls != -1) {
-		ndp->ni_lcf |= NI_LCF_STRICTRELATIVE;
+		ndp->ni_lcf |= NI_LCF_STRICTREL;
 		ndp->ni_resflags |= NIRES_STRICTREL;
 	}
 #endif
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index ae2b624c2659..6727872b5b10 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -454,6 +454,8 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
 interpret:
 	if (args->fname != NULL) {
 #ifdef CAPABILITY_MODE
+		if (CAP_TRACING(td))
+			ktrcapfail(CAPFAIL_NAMEI, args->fname);
 		/*
 		 * While capability mode can't reach this point via direct
 		 * path arguments to execve(), we also don't allow
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 1b9434aa438c..613656ca0a5a 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -1176,8 +1176,12 @@ kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode,
 	/*
 	 * shm_open(2) is only allowed for anonymous objects.
 	 */
-	if (IN_CAPABILITY_MODE(td) && (userpath != SHM_ANON))
-		return (ECAPMODE);
+	if (userpath != SHM_ANON) {
+		if (CAP_TRACING(td))
+			ktrcapfail(CAPFAIL_NAMEI, userpath);
+		if (IN_CAPABILITY_MODE(td))
+			return (ECAPMODE);
+	}
 #endif
 
 	AUDIT_ARG_FFLAGS(flags);
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 85b2214eaeb9..70a7ebbee4ed 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -201,8 +201,12 @@ kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
 	int error;
 
 #ifdef CAPABILITY_MODE
-	if (IN_CAPABILITY_MODE(td) && (dirfd == AT_FDCWD))
-		return (ECAPMODE);
+	if (dirfd == AT_FDCWD) {
+		if (CAP_TRACING(td))
+			ktrcapfail(CAPFAIL_NAMEI, "AT_FDCWD");
+		if (IN_CAPABILITY_MODE(td))
+			return (ECAPMODE);
+	}
 #endif
 
 	AUDIT_ARG_FD(fd);
@@ -487,8 +491,12 @@ kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
 	int error;
 
 #ifdef CAPABILITY_MODE
-	if (IN_CAPABILITY_MODE(td) && (dirfd == AT_FDCWD))
-		return (ECAPMODE);
+	if (dirfd == AT_FDCWD) {
+		if (CAP_TRACING(td))
+			ktrcapfail(CAPFAIL_NAMEI, "AT_FDCWD");
+		if (IN_CAPABILITY_MODE(td))
+			return (ECAPMODE);
+	}
 #endif
 
 	AUDIT_ARG_FD(fd);
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index e3ab80f94482..70c4dbc01c98 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -4396,7 +4396,7 @@ cache_can_fplookup(struct cache_fpl *fpl)
 		cache_fpl_aborted_early(fpl);
 		return (false);
 	}
-	if (IN_CAPABILITY_MODE(td)) {
+	if (IN_CAPABILITY_MODE(td) || CAP_TRACING(td)) {
 		cache_fpl_aborted_early(fpl);
 		return (false);
 	}
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index 6c83746eaf8b..600af6d486eb 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -238,14 +238,17 @@ nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp)
 	struct mount *mp;
 
 	if (dp == NULL || dp->v_type != VDIR || (ndp->ni_lcf &
-	    NI_LCF_STRICTRELATIVE) == 0)
+	    NI_LCF_STRICTREL) == 0)
 		return (0);
+	if (__predict_false((ndp->ni_lcf & (NI_LCF_STRICTREL_KTR |
+	    NI_LCF_CAP_DOTDOT_KTR)) == NI_LCF_STRICTREL_KTR))
+		NI_CAP_VIOLATION(ndp, ndp->ni_cnd.cn_pnbuf);
 	if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0)
 		return (ENOTCAPABLE);
 	mp = dp->v_mount;
 	if (lookup_cap_dotdot_nonlocal == 0 && mp != NULL &&
 	    (mp->mnt_flag & MNT_LOCAL) == 0)
-		return (ENOTCAPABLE);
+		goto capfail;
 	TAILQ_FOREACH_REVERSE(nt, &ndp->ni_cap_tracker, nameicap_tracker_head,
 	    nm_link) {
 		if (dp == nt->dp) {
@@ -255,6 +258,10 @@ nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp)
 			return (0);
 		}
 	}
+
+capfail:
+	if (__predict_false((ndp->ni_lcf & NI_LCF_STRICTREL_KTR) != 0))
+		NI_CAP_VIOLATION(ndp, ndp->ni_cnd.cn_pnbuf);
 	return (ENOTCAPABLE);
 }
 
@@ -273,12 +280,12 @@ namei_handle_root(struct nameidata *ndp, struct vnode **dpp)
 	struct componentname *cnp;
 
 	cnp = &ndp->ni_cnd;
-	if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0) {
-#ifdef KTRACE
-		if (KTRPOINT(curthread, KTR_CAPFAIL))
-			ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
-#endif
-		return (ENOTCAPABLE);
+	if (__predict_false((ndp->ni_lcf & (NI_LCF_STRICTREL |
+	    NI_LCF_STRICTREL_KTR)) != 0)) {
+		if ((ndp->ni_lcf & NI_LCF_STRICTREL_KTR) != 0)
+			NI_CAP_VIOLATION(ndp, cnp->cn_pnbuf);
+		if ((ndp->ni_lcf & NI_LCF_STRICTREL) != 0)
+			return (ENOTCAPABLE);
 	}
 	while (*(cnp->cn_nameptr) == '/') {
 		cnp->cn_nameptr++;
@@ -319,15 +326,17 @@ namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp)
 	 *   previously walked by us, which prevents an escape from
 	 *   the relative root.
 	 */
-	if (IN_CAPABILITY_MODE(td) && (cnp->cn_flags & NOCAPCHECK) == 0) {
-		ndp->ni_lcf |= NI_LCF_STRICTRELATIVE;
-		ndp->ni_resflags |= NIRES_STRICTREL;
-		if (ndp->ni_dirfd == AT_FDCWD) {
-#ifdef KTRACE
-			if (KTRPOINT(td, KTR_CAPFAIL))
-				ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
-#endif
-			return (ECAPMODE);
+	if ((cnp->cn_flags & NOCAPCHECK) == 0) {
+		if (CAP_TRACING(td)) {
+			ndp->ni_lcf |= NI_LCF_STRICTREL_KTR;
+			if (ndp->ni_dirfd == AT_FDCWD)
+				NI_CAP_VIOLATION(ndp, "AT_FDCWD");
+		}
+		if (IN_CAPABILITY_MODE(td)) {
+			ndp->ni_lcf |= NI_LCF_STRICTREL;
+			ndp->ni_resflags |= NIRES_STRICTREL;
+			if (ndp->ni_dirfd == AT_FDCWD)
+				return (ECAPMODE);
 		}
 	}
 #endif
@@ -370,8 +379,8 @@ namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp)
 	if (error == 0 && (cnp->cn_flags & RBENEATH) != 0) {
 		if (cnp->cn_pnbuf[0] == '/') {
 			error = ENOTCAPABLE;
-		} else if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) == 0) {
-			ndp->ni_lcf |= NI_LCF_STRICTRELATIVE |
+		} else if ((ndp->ni_lcf & NI_LCF_STRICTREL) == 0) {
+			ndp->ni_lcf |= NI_LCF_STRICTREL |
 			    NI_LCF_CAP_DOTDOT;
 		}
 	}
@@ -393,9 +402,12 @@ namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp)
 		pwd_drop(pwd);
 		return (error);
 	}
-	if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0 &&
-	    lookup_cap_dotdot != 0)
-		ndp->ni_lcf |= NI_LCF_CAP_DOTDOT;
+	if (lookup_cap_dotdot != 0) {
+		if ((ndp->ni_lcf & NI_LCF_STRICTREL_KTR) != 0)
+			ndp->ni_lcf |= NI_LCF_CAP_DOTDOT_KTR;
+		if ((ndp->ni_lcf & NI_LCF_STRICTREL) != 0)
+			ndp->ni_lcf |= NI_LCF_CAP_DOTDOT;
+	}
 	SDT_PROBE4(vfs, namei, lookup, entry, *dpp, cnp->cn_pnbuf,
 	    cnp->cn_flags, false);
 	*pwdp = pwd;
@@ -1170,12 +1182,11 @@ dirloop:
 	 *    result of dotdot lookup.
 	 */
 	if (cnp->cn_flags & ISDOTDOT) {
-		if ((ndp->ni_lcf & (NI_LCF_STRICTRELATIVE | NI_LCF_CAP_DOTDOT))
-		    == NI_LCF_STRICTRELATIVE) {
-#ifdef KTRACE
-			if (KTRPOINT(curthread, KTR_CAPFAIL))
-				ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
-#endif
+		if (__predict_false((ndp->ni_lcf & (NI_LCF_STRICTREL_KTR |
+		    NI_LCF_CAP_DOTDOT_KTR)) == NI_LCF_STRICTREL_KTR))
+			NI_CAP_VIOLATION(ndp, cnp->cn_pnbuf);
+		if (__predict_false((ndp->ni_lcf & (NI_LCF_STRICTREL |
+		    NI_LCF_CAP_DOTDOT)) == NI_LCF_STRICTREL)) {
 			error = ENOTCAPABLE;
 			goto bad;
 		}
@@ -1192,10 +1203,14 @@ dirloop:
 			bool isroot = dp == ndp->ni_rootdir ||
 			    dp == ndp->ni_topdir || dp == rootvnode ||
 			    pr != NULL;
-			if (isroot && (ndp->ni_lcf &
-			    NI_LCF_STRICTRELATIVE) != 0) {
-				error = ENOTCAPABLE;
-				goto capdotdot;
+			if (__predict_false(isroot && (ndp->ni_lcf &
+			    (NI_LCF_STRICTREL | NI_LCF_STRICTREL_KTR)) != 0)) {
+				if ((ndp->ni_lcf & NI_LCF_STRICTREL_KTR) != 0)
+					NI_CAP_VIOLATION(ndp, cnp->cn_pnbuf);
+				if ((ndp->ni_lcf & NI_LCF_STRICTREL) != 0) {
+					error = ENOTCAPABLE;
+					goto capdotdot;
+				}
 			}
 			if (isroot || ((dp->v_vflag & VV_ROOT) != 0 &&
 			    (cnp->cn_flags & NOCROSSMOUNT) != 0)) {
@@ -1220,10 +1235,6 @@ dirloop:
 			error = nameicap_check_dotdot(ndp, dp);
 			if (error != 0) {
 capdotdot:
-#ifdef KTRACE
-				if (KTRPOINT(curthread, KTR_CAPFAIL))
-					ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
-#endif
 				goto bad;
 			}
 		}
@@ -1376,13 +1387,8 @@ nextname:
 	}
 	if (cnp->cn_flags & ISDOTDOT) {
 		error = nameicap_check_dotdot(ndp, ndp->ni_vp);
-		if (error != 0) {
-#ifdef KTRACE
-			if (KTRPOINT(curthread, KTR_CAPFAIL))
-				ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
-#endif
+		if (error != 0)
 			goto bad2;
-		}
 	}
 	if (*ndp->ni_next == '/') {
 		cnp->cn_nameptr = ndp->ni_next;
diff --git a/sys/sys/namei.h b/sys/sys/namei.h
index 8ffa87aa3d7e..d464423c025c 100644
--- a/sys/sys/namei.h
+++ b/sys/sys/namei.h
@@ -198,8 +198,17 @@ int	cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
 /*
  * Flags in ni_lcf, valid for the duration of the namei call.
  */
-#define	NI_LCF_STRICTRELATIVE	0x0001	/* relative lookup only */
+#define	NI_LCF_STRICTREL	0x0001	/* relative lookup only */
 #define	NI_LCF_CAP_DOTDOT	0x0002	/* ".." in strictrelative case */
+/* Track capability restrictions seperately for violation ktracing. */
+#define	NI_LCF_STRICTREL_KTR	0x0004	/* trace relative lookups */
+#define	NI_LCF_CAP_DOTDOT_KTR	0x0008	/* ".." in strictrelative case */
+#define	NI_LCF_KTR_FLAGS	(NI_LCF_STRICTREL_KTR | NI_LCF_CAP_DOTDOT_KTR)
+
+#define	NI_CAP_VIOLATION(ndp, path)	do {			\
+	ktrcapfail(CAPFAIL_NAMEI, (path));			\
+	(ndp)->ni_lcf &= ~NI_LCF_KTR_FLAGS;			\
+} while (0)
 
 /*
  * Initialization of a nameidata structure.