git: 7dd419cabc6b - main - cache: add empty path support

From: Mateusz Guzik <mjg_at_FreeBSD.org>
Date: Sat, 16 Oct 2021 20:09:16 UTC
The branch main has been updated by mjg:

URL: https://cgit.FreeBSD.org/src/commit/?id=7dd419cabc6bb9e019c56d15f8e6a88ee2f46859

commit 7dd419cabc6bb9e019c56d15f8e6a88ee2f46859
Author:     Mateusz Guzik <mjg@FreeBSD.org>
AuthorDate: 2021-09-26 13:00:24 +0000
Commit:     Mateusz Guzik <mjg@FreeBSD.org>
CommitDate: 2021-10-16 20:08:37 +0000

    cache: add empty path support
    
    This avoids spurious drop offs as EMPTY is passed regardless of the
    actual path name.
    
    Pushign the work inside the lookup instead of just ignorign the flag
    allows avoid checking for empty pathname for all other lookups.
---
 sys/kern/kern_descrip.c |  2 +-
 sys/kern/vfs_cache.c    | 67 ++++++++++++++++++++++++++++++++++++++++++++++---
 sys/kern/vfs_lookup.c   | 18 ++++++-------
 3 files changed, 74 insertions(+), 13 deletions(-)

diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 55c2a36955a5..a7e3785bc672 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -2903,7 +2903,7 @@ fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsear
 		return (EAGAIN);
 	*fsearch = ((fp->f_flag & FSEARCH) != 0);
 	vp = fp->f_vnode;
-	if (__predict_false(vp == NULL || vp->v_type != VDIR)) {
+	if (__predict_false(vp == NULL)) {
 		return (EAGAIN);
 	}
 	if (!filecaps_copy(&fde->fde_caps, &ndp->ni_filecaps, false)) {
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index ae26dc70bd05..c1a3b0dab1e8 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -4176,9 +4176,9 @@ cache_fpl_terminated(struct cache_fpl *fpl)
 
 #define CACHE_FPL_SUPPORTED_CN_FLAGS \
 	(NC_NOMAKEENTRY | NC_KEEPPOSENTRY | LOCKLEAF | LOCKPARENT | WANTPARENT | \
-	 FAILIFEXISTS | FOLLOW | LOCKSHARED | SAVENAME | SAVESTART | WILLBEDIR | \
-	 ISOPEN | NOMACCHECK | AUDITVNODE1 | AUDITVNODE2 | NOCAPCHECK | OPENREAD | \
-	 OPENWRITE)
+	 FAILIFEXISTS | FOLLOW | EMPTYPATH | LOCKSHARED | SAVENAME | SAVESTART | \
+	 WILLBEDIR | ISOPEN | NOMACCHECK | AUDITVNODE1 | AUDITVNODE2 | NOCAPCHECK | \
+	 OPENREAD | OPENWRITE)
 
 #define CACHE_FPL_INTERNAL_CN_FLAGS \
 	(ISDOTDOT | MAKEENTRY | ISLASTCN)
@@ -4197,6 +4197,7 @@ static bool
 cache_fpl_istrailingslash(struct cache_fpl *fpl)
 {
 
+	MPASS(fpl->nulchar > fpl->cnp->cn_pnbuf);
 	return (*(fpl->nulchar - 1) == '/');
 }
 
@@ -4767,6 +4768,54 @@ cache_fplookup_degenerate(struct cache_fpl *fpl)
 	return (cache_fpl_handled(fpl));
 }
 
+static int __noinline
+cache_fplookup_emptypath(struct cache_fpl *fpl)
+{
+	struct nameidata *ndp;
+	struct componentname *cnp;
+	enum vgetstate tvs;
+	struct vnode *tvp;
+	seqc_t tvp_seqc;
+	int error, lkflags;
+
+	fpl->tvp = fpl->dvp;
+	fpl->tvp_seqc = fpl->dvp_seqc;
+
+	ndp = fpl->ndp;
+	cnp = fpl->cnp;
+	tvp = fpl->tvp;
+	tvp_seqc = fpl->tvp_seqc;
+
+	MPASS(*cnp->cn_pnbuf == '\0');
+	MPASS((cnp->cn_flags & (LOCKPARENT | WANTPARENT)) == 0);
+
+	if (__predict_false((cnp->cn_flags & EMPTYPATH) == 0)) {
+		cache_fpl_smr_exit(fpl);
+		return (cache_fpl_handled_error(fpl, ENOENT));
+	}
+
+	tvs = vget_prep_smr(tvp);
+	cache_fpl_smr_exit(fpl);
+	if (__predict_false(tvs == VGET_NONE)) {
+		return (cache_fpl_aborted(fpl));
+	}
+
+	if ((cnp->cn_flags & LOCKLEAF) != 0) {
+		lkflags = LK_SHARED;
+		if ((cnp->cn_flags & LOCKSHARED) == 0)
+			lkflags = LK_EXCLUSIVE;
+		error = vget_finish(tvp, lkflags, tvs);
+		if (__predict_false(error != 0)) {
+			return (cache_fpl_aborted(fpl));
+		}
+	} else {
+		vget_finish_ref(tvp, tvs);
+	}
+
+	ndp->ni_resflags |= NIRES_EMPTYPATH;
+	return (cache_fpl_handled(fpl));
+}
+
 static int __noinline
 cache_fplookup_noentry(struct cache_fpl *fpl)
 {
@@ -4799,6 +4848,10 @@ cache_fplookup_noentry(struct cache_fpl *fpl)
 		return (cache_fplookup_skip_slashes(fpl));
 	}
 
+	if (cnp->cn_pnbuf[0] == '\0') {
+		return (cache_fplookup_emptypath(fpl));
+	}
+
 	if (cnp->cn_nameptr[0] == '\0') {
 		if (fpl->tvp == NULL) {
 			return (cache_fplookup_degenerate(fpl));
@@ -5486,6 +5539,7 @@ cache_fplookup_parse(struct cache_fpl *fpl)
 	 *
 	 * TODO: fix this to be word-sized.
 	 */
+	MPASS(&cnp->cn_nameptr[fpl->debug.ni_pathlen - 1] >= cnp->cn_pnbuf);
 	KASSERT(&cnp->cn_nameptr[fpl->debug.ni_pathlen - 1] == fpl->nulchar,
 	    ("%s: mismatch between pathlen (%zu) and nulchar (%p != %p), string [%s]\n",
 	    __func__, fpl->debug.ni_pathlen, &cnp->cn_nameptr[fpl->debug.ni_pathlen - 1],
@@ -5739,6 +5793,13 @@ cache_fplookup_failed_vexec(struct cache_fpl *fpl, int error)
 	dvp = fpl->dvp;
 	dvp_seqc = fpl->dvp_seqc;
 
+	/*
+	 * Hack: delayed empty path checking.
+	 */
+	if (cnp->cn_pnbuf[0] == '\0') {
+		return (cache_fplookup_emptypath(fpl));
+	}
+
 	/*
 	 * TODO: Due to ignoring trailing slashes lookup will perform a
 	 * permission check on the last dir when it should not be doing it.  It
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index 9e10c3092f5a..8cccd93152ef 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -463,13 +463,6 @@ namei_getpath(struct nameidata *ndp)
 	if (__predict_false(error != 0))
 		return (error);
 
-	/*
-	 * Don't allow empty pathnames unless EMPTYPATH is specified.
-	 * Caller checks for ENOENT as an indication for the empty path.
-	 */
-	if (__predict_false(*cnp->cn_pnbuf == '\0'))
-		return (ENOENT);
-
 	cnp->cn_nameptr = cnp->cn_pnbuf;
 	return (0);
 }
@@ -598,8 +591,6 @@ namei(struct nameidata *ndp)
 
 	error = namei_getpath(ndp);
 	if (__predict_false(error != 0)) {
-		if (error == ENOENT && (cnp->cn_flags & EMPTYPATH) != 0) 
-			return (namei_emptypath(ndp));
 		namei_cleanup_cnp(cnp);
 		SDT_PROBE4(vfs, namei, lookup, return, error, NULL,
 		    false, ndp);
@@ -642,6 +633,15 @@ namei(struct nameidata *ndp)
 	case CACHE_FPL_STATUS_ABORTED:
 		TAILQ_INIT(&ndp->ni_cap_tracker);
 		MPASS(ndp->ni_lcf == 0);
+		if (*cnp->cn_pnbuf == '\0') {
+			if ((cnp->cn_flags & EMPTYPATH) != 0) {
+				return (namei_emptypath(ndp));
+			}
+			namei_cleanup_cnp(cnp);
+			SDT_PROBE4(vfs, namei, lookup, return, ENOENT, NULL,
+			    false, ndp);
+			return (ENOENT);
+		}
 		error = namei_setup(ndp, &dp, &pwd);
 		if (error != 0) {
 			namei_cleanup_cnp(cnp);