git: 2ec2ba7e232d - main - vfs: Add VFS/syscall support for Solaris style extended attributes

From: Rick Macklem <rmacklem_at_FreeBSD.org>
Date: Wed, 02 Apr 2025 20:47:22 UTC
The branch main has been updated by rmacklem:

URL: https://cgit.FreeBSD.org/src/commit/?id=2ec2ba7e232dd126df0617194fd07be78c7a2ab9

commit 2ec2ba7e232dd126df0617194fd07be78c7a2ab9
Author:     Rick Macklem <rmacklem@FreeBSD.org>
AuthorDate: 2025-04-02 20:45:14 +0000
Commit:     Rick Macklem <rmacklem@FreeBSD.org>
CommitDate: 2025-04-02 20:45:14 +0000

    vfs: Add VFS/syscall support for Solaris style extended attributes
    
    Some systems, such as Solaris, represent extended attributes as
    a set of files in a directory associated with a file object.  This
    allows extended attributes to be acquired/modified via regular
    file system operations, such as read(2), write(2), lseek(2) and
    ftruncate(2).
    
    Since ZFS already has the capability to do this, this patch allows
    system calls (and the NFSv4 client/server) such access to extended
    attributes.
    This permits handling of large extended attributes and allows the NFSv4
    server to provide the service to NFSv4 clients that want it, such as
    Windows, MacOS and Solaris.
    
    The top level syscall change is a new open(2)/openat(2) flag I called
    O_NAMEDATTR that allows the named attribute directory or any attribute
    within that directory to be open'd.
    
    The patch defines two new v_irflag flags called VIRF_NAMEDDIR and
    VIRF_NAMEDATTR to indicate that the vnode is for this alternate name
    space and not a normal file object.
    The patch also defines flags (OPENNAMED and CREATENAMED) for VOP_LOOKUP()
    to pass this new case down into VOP_LOOKUP() and MNT_NAMEDATTR for file
    systems that support named attributes.
    
    Most of the code in this patch is to avoid creation of links, symlinks
    or non-regular file objects in the named attribute directory.
    
    It also must avoid using the name cache, since the named attribute
    directory is associated with the same name as the file object.
    
    Man pages updates will be done as separate commits.
    
    Reviewed by:    kib
    Differential Revision:  https://reviews.freebsd.org/D49583
---
 sys/kern/vfs_cache.c    |  6 +++++-
 sys/kern/vfs_default.c  |  1 +
 sys/kern/vfs_lookup.c   |  1 +
 sys/kern/vfs_subr.c     |  1 +
 sys/kern/vfs_syscalls.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 sys/kern/vfs_vnops.c    | 39 ++++++++++++++++++++++++++++++++++++++-
 sys/sys/fcntl.h         |  1 +
 sys/sys/mount.h         |  4 +++-
 sys/sys/namei.h         |  5 +++--
 sys/sys/unistd.h        |  1 +
 sys/sys/vnode.h         |  9 ++++++---
 11 files changed, 102 insertions(+), 8 deletions(-)

diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index c55e103a1a37..e96724b928a6 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -4461,7 +4461,7 @@ cache_fpl_terminated(struct cache_fpl *fpl)
 	(NC_NOMAKEENTRY | NC_KEEPPOSENTRY | LOCKLEAF | LOCKPARENT | WANTPARENT | \
 	 FAILIFEXISTS | FOLLOW | EMPTYPATH | LOCKSHARED | ISRESTARTED | WILLBEDIR | \
 	 ISOPEN | NOMACCHECK | AUDITVNODE1 | AUDITVNODE2 | NOCAPCHECK | OPENREAD | \
-	 OPENWRITE | WANTIOCTLCAPS)
+	 OPENWRITE | WANTIOCTLCAPS | OPENNAMED)
 
 #define CACHE_FPL_INTERNAL_CN_FLAGS \
 	(ISDOTDOT | MAKEENTRY | ISLASTCN)
@@ -4525,6 +4525,10 @@ cache_can_fplookup(struct cache_fpl *fpl)
 		cache_fpl_aborted_early(fpl);
 		return (false);
 	}
+	if ((cnp->cn_flags & OPENNAMED) != 0) {
+		cache_fpl_aborted_early(fpl);
+		return (false);
+	}
 	return (true);
 }
 
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 5e6516921002..9f150570945b 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -451,6 +451,7 @@ vop_stdpathconf(struct vop_pathconf_args *ap)
 		case _PC_DEALLOC_PRESENT:
 		case _PC_INF_PRESENT:
 		case _PC_MAC_PRESENT:
+		case _PC_NAMEDATTR_ENABLED:
 			*ap->a_retval = 0;
 			return (0);
 		default:
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index 290834c10be5..127d068f1fff 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -381,6 +381,7 @@ namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp)
 			error = fgetvp_lookup(ndp, dpp);
 		}
 		if (error == 0 && (*dpp)->v_type != VDIR &&
+		    (cnp->cn_flags & OPENNAMED) == 0 &&
 		    (cnp->cn_pnbuf[0] != '\0' ||
 		    (cnp->cn_flags & EMPTYPATH) == 0))
 			error = ENOTDIR;
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 96e8bb765972..c8ccbc18a7fe 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -4774,6 +4774,7 @@ DB_SHOW_COMMAND(mount, db_show_mount)
 	MNT_FLAG(MNT_FORCE);
 	MNT_FLAG(MNT_SNAPSHOT);
 	MNT_FLAG(MNT_BYFSID);
+	MNT_FLAG(MNT_NAMEDATTR);
 #undef MNT_FLAG
 	if (mflags != 0) {
 		if (buf[0] != '\0')
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index a6c216d61d8c..7096c4b34d5b 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1435,6 +1435,10 @@ restart:
 			vput(nd.ni_dvp);
 		vrele(vp);
 		return (EEXIST);
+	} else if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) {
+		NDFREE_PNBUF(&nd);
+		vput(nd.ni_dvp);
+		return (EINVAL);
 	} else {
 		VATTR_NULL(&vattr);
 		vattr.va_mode = (mode & ALLPERMS) &
@@ -1543,6 +1547,11 @@ restart:
 		vrele(nd.ni_vp);
 		return (EEXIST);
 	}
+	if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) {
+		NDFREE_PNBUF(&nd);
+		vput(nd.ni_dvp);
+		return (EINVAL);
+	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE_PNBUF(&nd);
 		vput(nd.ni_dvp);
@@ -1688,6 +1697,10 @@ kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path,
 		vrele(vp);
 		return (EPERM);		/* POSIX */
 	}
+	if ((vn_irflag_read(vp) & (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0) {
+		vrele(vp);
+		return (EINVAL);
+	}
 	NDINIT_ATRIGHTS(&nd, CREATE,
 	    LOCKPARENT | AUDITVNODE2 | NOCACHE, segflag, path, fd,
 	    &cap_linkat_target_rights);
@@ -1829,6 +1842,10 @@ restart:
 			goto out;
 		goto restart;
 	}
+	if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) {
+		error = EINVAL;
+		goto out;
+	}
 	VATTR_NULL(&vattr);
 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask;
 #ifdef MAC
@@ -3721,6 +3738,7 @@ kern_renameat(struct thread *td, int oldfd, const char *old, int newfd,
 	struct nameidata fromnd, tond;
 	uint64_t tondflags;
 	int error;
+	short irflag;
 
 again:
 	bwillwrite();
@@ -3773,6 +3791,12 @@ again:
 			return (error);
 		goto again;
 	}
+	irflag = vn_irflag_read(fvp);
+	if (((irflag & VIRF_NAMEDATTR) != 0 && tdvp != fromnd.ni_dvp) ||
+	    (irflag & VIRF_NAMEDDIR) != 0) {
+		error = EINVAL;
+		goto out;
+	}
 	if (tvp != NULL) {
 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 			error = ENOTDIR;
@@ -3892,6 +3916,10 @@ restart:
 			return (error);
 		goto restart;
 	}
+	if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) {
+		error = EINVAL;
+		goto out;
+	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VDIR;
 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask;
@@ -4629,6 +4657,7 @@ kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags)
 	struct file *fp;
 	int fmode, error;
 	int indx;
+	bool named_attr;
 
 	error = priv_check(td, PRIV_VFS_FHOPEN);
 	if (error != 0)
@@ -4651,6 +4680,19 @@ kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags)
 	if (error != 0)
 		return (error);
 
+	/*
+	 * Check to see if the file handle refers to a named attribute
+	 * directory or attribute.  If it does, the O_NAMEDATTR flag
+	 * must have been specified.
+	 */
+	named_attr = (vn_irflag_read(vp) &
+	    (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0;
+	if ((named_attr && (fmode & O_NAMEDATTR) == 0) ||
+	    (!named_attr && (fmode & O_NAMEDATTR) != 0)) {
+		vput(vp);
+		return (ENOATTR);
+	}
+
 	error = falloc_noinstall(td, &fp);
 	if (error != 0) {
 		vput(vp);
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 69d8ffcdd5ad..c448d62e9920 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -208,6 +208,11 @@ open2nameif(int fmode, u_int vn_open_flags)
 		res |= OPENREAD;
 	if ((fmode & FWRITE) != 0)
 		res |= OPENWRITE;
+	if ((fmode & O_NAMEDATTR) != 0) {
+		res |= OPENNAMED;
+		if ((fmode & O_CREAT) != 0)
+			res |= CREATENAMED;
+	}
 	if ((vn_open_flags & VN_OPEN_NOAUDIT) == 0)
 		res |= AUDITVNODE1;
 	if ((vn_open_flags & VN_OPEN_NOCAPCHECK) != 0)
@@ -261,6 +266,19 @@ restart:
 		if ((error = namei(ndp)) != 0)
 			return (error);
 		if (ndp->ni_vp == NULL) {
+			if ((fmode & O_NAMEDATTR) != 0) {
+				if ((ndp->ni_dvp->v_mount->mnt_flag &
+				     MNT_NAMEDATTR) == 0)
+					error = EINVAL;
+				else if ((vn_irflag_read(ndp->ni_dvp) &
+				     VIRF_NAMEDDIR) == 0)
+					error = ENOENT;
+				if (error != 0) {
+					vp = ndp->ni_dvp;
+					ndp->ni_dvp = NULL;
+					goto bad;
+				}
+			}
 			VATTR_NULL(vap);
 			vap->va_type = VREG;
 			vap->va_mode = cmode;
@@ -315,7 +333,21 @@ restart:
 				error = EEXIST;
 				goto bad;
 			}
-			if (vp->v_type == VDIR) {
+			if ((fmode & O_NAMEDATTR) != 0) {
+				short irflag;
+
+				irflag = vn_irflag_read(vp);
+				if ((vp->v_mount->mnt_flag &
+				     MNT_NAMEDATTR) == 0 ||
+				    ((irflag & VIRF_NAMEDATTR) != 0 &&
+				    vp->v_type != VREG))
+					error = EINVAL;
+				else if ((irflag & (VIRF_NAMEDDIR |
+				    VIRF_NAMEDATTR)) == 0)
+					error = ENOATTR;
+				if (error != 0)
+					goto bad;
+			} else if (vp->v_type == VDIR) {
 				error = EISDIR;
 				goto bad;
 			}
@@ -331,6 +363,11 @@ restart:
 		if ((error = namei(ndp)) != 0)
 			return (error);
 		vp = ndp->ni_vp;
+		if ((fmode & O_NAMEDATTR) != 0 && (vp->v_mount->mnt_flag &
+		     MNT_NAMEDATTR) == 0) {
+			error = EINVAL;
+			goto bad;
+		}
 	}
 	error = vn_open_vnode(vp, fmode, cred, curthread, fp);
 	if (first_open) {
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
index 143824558fc2..08aa26fbce02 100644
--- a/sys/sys/fcntl.h
+++ b/sys/sys/fcntl.h
@@ -140,6 +140,7 @@ typedef	__pid_t		pid_t;
 #define	O_DSYNC		0x01000000	/* POSIX data sync */
 #if __BSD_VISIBLE
 #define	O_EMPTY_PATH	0x02000000
+#define	O_NAMEDATTR	0x04000000	/* NFSv4 named attributes */
 #endif
 
 /*
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index 6715c55d7a6d..66822a5aadc2 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -365,6 +365,7 @@ struct mntoptnames {
 	{ MNT_RELOAD,		"reload" },				\
 	{ MNT_FORCE,		"force" },				\
 	{ MNT_SNAPSHOT,		"snapshot" },				\
+	{ MNT_NAMEDATTR,	"named attributes" },			\
 	{ 0, NULL }
 #endif
 
@@ -390,6 +391,7 @@ struct mntoptnames {
 #define	MNT_SUJ		0x0000000100000000ULL /* using journaled soft updates */
 #define	MNT_AUTOMOUNTED	0x0000000200000000ULL /* mounted by automountd(8) */
 #define	MNT_UNTRUSTED	0x0000000800000000ULL /* filesys metadata untrusted */
+#define	MNT_NAMEDATTR	0x0000020000000000ULL /* named attributes enabled */
 
 /*
  * NFS export related mount flags.
@@ -429,7 +431,7 @@ struct mntoptnames {
 			MNT_IGNORE	| MNT_EXPUBLIC	| MNT_NOSYMFOLLOW | \
 			MNT_GJOURNAL	| MNT_MULTILABEL | MNT_ACLS	| \
 			MNT_NFS4ACLS	| MNT_AUTOMOUNTED | MNT_VERIFIED | \
-			MNT_UNTRUSTED)
+			MNT_UNTRUSTED	| MNT_NAMEDATTR)
 
 /* Mask of flags that can be updated. */
 #define	MNT_UPDATEMASK (MNT_NOSUID	| MNT_NOEXEC	| \
diff --git a/sys/sys/namei.h b/sys/sys/namei.h
index 1416ff983f32..20c4f4c34dc0 100644
--- a/sys/sys/namei.h
+++ b/sys/sys/namei.h
@@ -172,14 +172,15 @@ int	cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
 #define	OPENREAD	0x00200000 /* open for reading */
 #define	OPENWRITE	0x00400000 /* open for writing */
 #define	WANTIOCTLCAPS	0x00800000 /* leave ioctl caps for the caller */
-/* UNUSED		0x01000000 */
+#define	OPENNAMED	0x01000000 /* opening a named attribute (dir) */
 #define	NOEXECCHECK	0x02000000 /* do not perform exec check on dir */
 #define	MAKEENTRY	0x04000000 /* entry is to be added to name cache */
 #define	ISSYMLINK	0x08000000 /* symlink needs interpretation */
 #define	ISLASTCN	0x10000000 /* this is last component of pathname */
 #define	ISDOTDOT	0x20000000 /* current component name is .. */
 #define	TRAILINGSLASH	0x40000000 /* path ended in a slash */
-#define	PARAMASK	0x7ffffe00 /* mask of parameter descriptors */
+#define	CREATENAMED	0x80000000 /* create a named attribute dir */
+#define	PARAMASK	0xfffffe00 /* mask of parameter descriptors */
 
 /*
  * Flags which must not be passed in by callers.
diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h
index 096d3444f997..59cef241754a 100644
--- a/sys/sys/unistd.h
+++ b/sys/sys/unistd.h
@@ -154,6 +154,7 @@
 #define	_PC_MAC_PRESENT		63
 #define	_PC_ACL_NFS4		64
 #define	_PC_DEALLOC_PRESENT	65
+#define	_PC_NAMEDATTR_ENABLED	66
 #endif
 
 /* From OpenSolaris, used by SEEK_DATA/SEEK_HOLE. */
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 00f8a1eabc4e..e1b30977f4bd 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -246,6 +246,8 @@ _Static_assert(sizeof(struct vnode) <= 448, "vnode size crosses 448 bytes");
 #define	VIRF_MOUNTPOINT	0x0004	/* This vnode is mounted on */
 #define	VIRF_TEXT_REF	0x0008	/* Executable mappings ref the vnode */
 #define	VIRF_CROSSMP	0x0010	/* Cross-mp vnode, no locking */
+#define	VIRF_NAMEDDIR	0x0020	/* Named attribute directory */
+#define	VIRF_NAMEDATTR	0x0040	/* Named attribute */
 
 #define	VI_UNUSED0	0x0001	/* unused */
 #define	VI_MOUNT	0x0002	/* Mount in progress */
@@ -305,9 +307,10 @@ struct vattr {
 /*
  * Flags for va_vaflags.
  */
-#define	VA_UTIMES_NULL	0x01		/* utimes argument was NULL */
-#define	VA_EXCLUSIVE	0x02		/* exclusive create request */
-#define	VA_SYNC		0x04		/* O_SYNC truncation */
+#define	VA_UTIMES_NULL		0x01	/* utimes argument was NULL */
+#define	VA_EXCLUSIVE		0x02	/* exclusive create request */
+#define	VA_SYNC			0x04	/* O_SYNC truncation */
+#define	VA_NAMEDATTR_TYPE	0x08	/* vnode is for named attribute */
 
 /*
  * Flags for ioflag. (high 16 bits used to ask for read-ahead and