git: 2ec2ba7e232d - main - vfs: Add VFS/syscall support for Solaris style extended attributes
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 02 Apr 2025 20:47:22 UTC
The branch main has been updated by rmacklem: URL: https://cgit.FreeBSD.org/src/commit/?id=2ec2ba7e232dd126df0617194fd07be78c7a2ab9 commit 2ec2ba7e232dd126df0617194fd07be78c7a2ab9 Author: Rick Macklem <rmacklem@FreeBSD.org> AuthorDate: 2025-04-02 20:45:14 +0000 Commit: Rick Macklem <rmacklem@FreeBSD.org> CommitDate: 2025-04-02 20:45:14 +0000 vfs: Add VFS/syscall support for Solaris style extended attributes Some systems, such as Solaris, represent extended attributes as a set of files in a directory associated with a file object. This allows extended attributes to be acquired/modified via regular file system operations, such as read(2), write(2), lseek(2) and ftruncate(2). Since ZFS already has the capability to do this, this patch allows system calls (and the NFSv4 client/server) such access to extended attributes. This permits handling of large extended attributes and allows the NFSv4 server to provide the service to NFSv4 clients that want it, such as Windows, MacOS and Solaris. The top level syscall change is a new open(2)/openat(2) flag I called O_NAMEDATTR that allows the named attribute directory or any attribute within that directory to be open'd. The patch defines two new v_irflag flags called VIRF_NAMEDDIR and VIRF_NAMEDATTR to indicate that the vnode is for this alternate name space and not a normal file object. The patch also defines flags (OPENNAMED and CREATENAMED) for VOP_LOOKUP() to pass this new case down into VOP_LOOKUP() and MNT_NAMEDATTR for file systems that support named attributes. Most of the code in this patch is to avoid creation of links, symlinks or non-regular file objects in the named attribute directory. It also must avoid using the name cache, since the named attribute directory is associated with the same name as the file object. Man pages updates will be done as separate commits. Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D49583 --- sys/kern/vfs_cache.c | 6 +++++- sys/kern/vfs_default.c | 1 + sys/kern/vfs_lookup.c | 1 + sys/kern/vfs_subr.c | 1 + sys/kern/vfs_syscalls.c | 42 ++++++++++++++++++++++++++++++++++++++++++ sys/kern/vfs_vnops.c | 39 ++++++++++++++++++++++++++++++++++++++- sys/sys/fcntl.h | 1 + sys/sys/mount.h | 4 +++- sys/sys/namei.h | 5 +++-- sys/sys/unistd.h | 1 + sys/sys/vnode.h | 9 ++++++--- 11 files changed, 102 insertions(+), 8 deletions(-) diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index c55e103a1a37..e96724b928a6 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -4461,7 +4461,7 @@ cache_fpl_terminated(struct cache_fpl *fpl) (NC_NOMAKEENTRY | NC_KEEPPOSENTRY | LOCKLEAF | LOCKPARENT | WANTPARENT | \ FAILIFEXISTS | FOLLOW | EMPTYPATH | LOCKSHARED | ISRESTARTED | WILLBEDIR | \ ISOPEN | NOMACCHECK | AUDITVNODE1 | AUDITVNODE2 | NOCAPCHECK | OPENREAD | \ - OPENWRITE | WANTIOCTLCAPS) + OPENWRITE | WANTIOCTLCAPS | OPENNAMED) #define CACHE_FPL_INTERNAL_CN_FLAGS \ (ISDOTDOT | MAKEENTRY | ISLASTCN) @@ -4525,6 +4525,10 @@ cache_can_fplookup(struct cache_fpl *fpl) cache_fpl_aborted_early(fpl); return (false); } + if ((cnp->cn_flags & OPENNAMED) != 0) { + cache_fpl_aborted_early(fpl); + return (false); + } return (true); } diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 5e6516921002..9f150570945b 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -451,6 +451,7 @@ vop_stdpathconf(struct vop_pathconf_args *ap) case _PC_DEALLOC_PRESENT: case _PC_INF_PRESENT: case _PC_MAC_PRESENT: + case _PC_NAMEDATTR_ENABLED: *ap->a_retval = 0; return (0); default: diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 290834c10be5..127d068f1fff 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -381,6 +381,7 @@ namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp) error = fgetvp_lookup(ndp, dpp); } if (error == 0 && (*dpp)->v_type != VDIR && + (cnp->cn_flags & OPENNAMED) == 0 && (cnp->cn_pnbuf[0] != '\0' || (cnp->cn_flags & EMPTYPATH) == 0)) error = ENOTDIR; diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 96e8bb765972..c8ccbc18a7fe 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -4774,6 +4774,7 @@ DB_SHOW_COMMAND(mount, db_show_mount) MNT_FLAG(MNT_FORCE); MNT_FLAG(MNT_SNAPSHOT); MNT_FLAG(MNT_BYFSID); + MNT_FLAG(MNT_NAMEDATTR); #undef MNT_FLAG if (mflags != 0) { if (buf[0] != '\0') diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index a6c216d61d8c..7096c4b34d5b 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1435,6 +1435,10 @@ restart: vput(nd.ni_dvp); vrele(vp); return (EEXIST); + } else if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { + NDFREE_PNBUF(&nd); + vput(nd.ni_dvp); + return (EINVAL); } else { VATTR_NULL(&vattr); vattr.va_mode = (mode & ALLPERMS) & @@ -1543,6 +1547,11 @@ restart: vrele(nd.ni_vp); return (EEXIST); } + if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { + NDFREE_PNBUF(&nd); + vput(nd.ni_dvp); + return (EINVAL); + } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE_PNBUF(&nd); vput(nd.ni_dvp); @@ -1688,6 +1697,10 @@ kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, vrele(vp); return (EPERM); /* POSIX */ } + if ((vn_irflag_read(vp) & (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0) { + vrele(vp); + return (EINVAL); + } NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE2 | NOCACHE, segflag, path, fd, &cap_linkat_target_rights); @@ -1829,6 +1842,10 @@ restart: goto out; goto restart; } + if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { + error = EINVAL; + goto out; + } VATTR_NULL(&vattr); vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; #ifdef MAC @@ -3721,6 +3738,7 @@ kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, struct nameidata fromnd, tond; uint64_t tondflags; int error; + short irflag; again: bwillwrite(); @@ -3773,6 +3791,12 @@ again: return (error); goto again; } + irflag = vn_irflag_read(fvp); + if (((irflag & VIRF_NAMEDATTR) != 0 && tdvp != fromnd.ni_dvp) || + (irflag & VIRF_NAMEDDIR) != 0) { + error = EINVAL; + goto out; + } if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = ENOTDIR; @@ -3892,6 +3916,10 @@ restart: return (error); goto restart; } + if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { + error = EINVAL; + goto out; + } VATTR_NULL(&vattr); vattr.va_type = VDIR; vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; @@ -4629,6 +4657,7 @@ kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) struct file *fp; int fmode, error; int indx; + bool named_attr; error = priv_check(td, PRIV_VFS_FHOPEN); if (error != 0) @@ -4651,6 +4680,19 @@ kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) if (error != 0) return (error); + /* + * Check to see if the file handle refers to a named attribute + * directory or attribute. If it does, the O_NAMEDATTR flag + * must have been specified. + */ + named_attr = (vn_irflag_read(vp) & + (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0; + if ((named_attr && (fmode & O_NAMEDATTR) == 0) || + (!named_attr && (fmode & O_NAMEDATTR) != 0)) { + vput(vp); + return (ENOATTR); + } + error = falloc_noinstall(td, &fp); if (error != 0) { vput(vp); diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 69d8ffcdd5ad..c448d62e9920 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -208,6 +208,11 @@ open2nameif(int fmode, u_int vn_open_flags) res |= OPENREAD; if ((fmode & FWRITE) != 0) res |= OPENWRITE; + if ((fmode & O_NAMEDATTR) != 0) { + res |= OPENNAMED; + if ((fmode & O_CREAT) != 0) + res |= CREATENAMED; + } if ((vn_open_flags & VN_OPEN_NOAUDIT) == 0) res |= AUDITVNODE1; if ((vn_open_flags & VN_OPEN_NOCAPCHECK) != 0) @@ -261,6 +266,19 @@ restart: if ((error = namei(ndp)) != 0) return (error); if (ndp->ni_vp == NULL) { + if ((fmode & O_NAMEDATTR) != 0) { + if ((ndp->ni_dvp->v_mount->mnt_flag & + MNT_NAMEDATTR) == 0) + error = EINVAL; + else if ((vn_irflag_read(ndp->ni_dvp) & + VIRF_NAMEDDIR) == 0) + error = ENOENT; + if (error != 0) { + vp = ndp->ni_dvp; + ndp->ni_dvp = NULL; + goto bad; + } + } VATTR_NULL(vap); vap->va_type = VREG; vap->va_mode = cmode; @@ -315,7 +333,21 @@ restart: error = EEXIST; goto bad; } - if (vp->v_type == VDIR) { + if ((fmode & O_NAMEDATTR) != 0) { + short irflag; + + irflag = vn_irflag_read(vp); + if ((vp->v_mount->mnt_flag & + MNT_NAMEDATTR) == 0 || + ((irflag & VIRF_NAMEDATTR) != 0 && + vp->v_type != VREG)) + error = EINVAL; + else if ((irflag & (VIRF_NAMEDDIR | + VIRF_NAMEDATTR)) == 0) + error = ENOATTR; + if (error != 0) + goto bad; + } else if (vp->v_type == VDIR) { error = EISDIR; goto bad; } @@ -331,6 +363,11 @@ restart: if ((error = namei(ndp)) != 0) return (error); vp = ndp->ni_vp; + if ((fmode & O_NAMEDATTR) != 0 && (vp->v_mount->mnt_flag & + MNT_NAMEDATTR) == 0) { + error = EINVAL; + goto bad; + } } error = vn_open_vnode(vp, fmode, cred, curthread, fp); if (first_open) { diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index 143824558fc2..08aa26fbce02 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -140,6 +140,7 @@ typedef __pid_t pid_t; #define O_DSYNC 0x01000000 /* POSIX data sync */ #if __BSD_VISIBLE #define O_EMPTY_PATH 0x02000000 +#define O_NAMEDATTR 0x04000000 /* NFSv4 named attributes */ #endif /* diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 6715c55d7a6d..66822a5aadc2 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -365,6 +365,7 @@ struct mntoptnames { { MNT_RELOAD, "reload" }, \ { MNT_FORCE, "force" }, \ { MNT_SNAPSHOT, "snapshot" }, \ + { MNT_NAMEDATTR, "named attributes" }, \ { 0, NULL } #endif @@ -390,6 +391,7 @@ struct mntoptnames { #define MNT_SUJ 0x0000000100000000ULL /* using journaled soft updates */ #define MNT_AUTOMOUNTED 0x0000000200000000ULL /* mounted by automountd(8) */ #define MNT_UNTRUSTED 0x0000000800000000ULL /* filesys metadata untrusted */ +#define MNT_NAMEDATTR 0x0000020000000000ULL /* named attributes enabled */ /* * NFS export related mount flags. @@ -429,7 +431,7 @@ struct mntoptnames { MNT_IGNORE | MNT_EXPUBLIC | MNT_NOSYMFOLLOW | \ MNT_GJOURNAL | MNT_MULTILABEL | MNT_ACLS | \ MNT_NFS4ACLS | MNT_AUTOMOUNTED | MNT_VERIFIED | \ - MNT_UNTRUSTED) + MNT_UNTRUSTED | MNT_NAMEDATTR) /* Mask of flags that can be updated. */ #define MNT_UPDATEMASK (MNT_NOSUID | MNT_NOEXEC | \ diff --git a/sys/sys/namei.h b/sys/sys/namei.h index 1416ff983f32..20c4f4c34dc0 100644 --- a/sys/sys/namei.h +++ b/sys/sys/namei.h @@ -172,14 +172,15 @@ int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status, #define OPENREAD 0x00200000 /* open for reading */ #define OPENWRITE 0x00400000 /* open for writing */ #define WANTIOCTLCAPS 0x00800000 /* leave ioctl caps for the caller */ -/* UNUSED 0x01000000 */ +#define OPENNAMED 0x01000000 /* opening a named attribute (dir) */ #define NOEXECCHECK 0x02000000 /* do not perform exec check on dir */ #define MAKEENTRY 0x04000000 /* entry is to be added to name cache */ #define ISSYMLINK 0x08000000 /* symlink needs interpretation */ #define ISLASTCN 0x10000000 /* this is last component of pathname */ #define ISDOTDOT 0x20000000 /* current component name is .. */ #define TRAILINGSLASH 0x40000000 /* path ended in a slash */ -#define PARAMASK 0x7ffffe00 /* mask of parameter descriptors */ +#define CREATENAMED 0x80000000 /* create a named attribute dir */ +#define PARAMASK 0xfffffe00 /* mask of parameter descriptors */ /* * Flags which must not be passed in by callers. diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h index 096d3444f997..59cef241754a 100644 --- a/sys/sys/unistd.h +++ b/sys/sys/unistd.h @@ -154,6 +154,7 @@ #define _PC_MAC_PRESENT 63 #define _PC_ACL_NFS4 64 #define _PC_DEALLOC_PRESENT 65 +#define _PC_NAMEDATTR_ENABLED 66 #endif /* From OpenSolaris, used by SEEK_DATA/SEEK_HOLE. */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 00f8a1eabc4e..e1b30977f4bd 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -246,6 +246,8 @@ _Static_assert(sizeof(struct vnode) <= 448, "vnode size crosses 448 bytes"); #define VIRF_MOUNTPOINT 0x0004 /* This vnode is mounted on */ #define VIRF_TEXT_REF 0x0008 /* Executable mappings ref the vnode */ #define VIRF_CROSSMP 0x0010 /* Cross-mp vnode, no locking */ +#define VIRF_NAMEDDIR 0x0020 /* Named attribute directory */ +#define VIRF_NAMEDATTR 0x0040 /* Named attribute */ #define VI_UNUSED0 0x0001 /* unused */ #define VI_MOUNT 0x0002 /* Mount in progress */ @@ -305,9 +307,10 @@ struct vattr { /* * Flags for va_vaflags. */ -#define VA_UTIMES_NULL 0x01 /* utimes argument was NULL */ -#define VA_EXCLUSIVE 0x02 /* exclusive create request */ -#define VA_SYNC 0x04 /* O_SYNC truncation */ +#define VA_UTIMES_NULL 0x01 /* utimes argument was NULL */ +#define VA_EXCLUSIVE 0x02 /* exclusive create request */ +#define VA_SYNC 0x04 /* O_SYNC truncation */ +#define VA_NAMEDATTR_TYPE 0x08 /* vnode is for named attribute */ /* * Flags for ioflag. (high 16 bits used to ask for read-ahead and