git: 7926a01ed7ae - main - vfs_export: Add checks for correct prison when updating exports

From: Rick Macklem <rmacklem_at_FreeBSD.org>
Date: Fri, 03 Feb 2023 00:22:32 UTC
The branch main has been updated by rmacklem:

URL: https://cgit.FreeBSD.org/src/commit/?id=7926a01ed7ae7cefd81ef4cc2142c35b84d81913

commit 7926a01ed7ae7cefd81ef4cc2142c35b84d81913
Author:     Rick Macklem <rmacklem@FreeBSD.org>
AuthorDate: 2023-02-03 00:20:58 +0000
Commit:     Rick Macklem <rmacklem@FreeBSD.org>
CommitDate: 2023-02-03 00:20:58 +0000

    vfs_export: Add checks for correct prison when updating exports
    
    mountd(8) basically does the following:
    getmntinfo()
    for each mount
          delete_exports
    using nmount(2) to do the creation/deletion of individual exports.
    
    For prison0 (and for other prisons if enforce_statfs == 0) getmntinfo()
    returns all mount points, including ones being used within other prisons.
    This can cause confusion if the same file system is specified in the
    exports(5) file for multiple prisons.
    
    This patch adds a perminent identifier to each prison
    and marks which prison did the exports in a field of
    the mount structure called mnt_exjail.  This field can
    then be compared to the perminent identifier for the
    prison that the thread's credentials is in.
    Also required was a new function called prison_isalive_permid()
    which returns if the prison is alive, so that the check can be
    ignored for prisons that have been removed.
    
    This prepares the system to allow mountd(8) to run in multiple
    prisons, including prison0.
    
    Future commits will complete the modifications to allow mountd(8)
    to run in vnet prisons.  Until then, these changes should not affect
    semantics.
    
    Reviewed by:    markj
    MFC after:      3 months
    Differential Revision:  https://reviews.freebsd.org/D38144
---
 sys/kern/kern_jail.c  | 32 +++++++++++++++++++++++++++++++
 sys/kern/vfs_export.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++-----
 sys/sys/jail.h        |  2 ++
 sys/sys/mount.h       |  5 ++++-
 4 files changed, 86 insertions(+), 6 deletions(-)

diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 4c1e3ff40c58..293dd53d65c9 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -118,6 +118,7 @@ struct prison prison0 = {
 	.pr_flags	= PR_HOST|_PR_IP_SADDRSEL,
 #endif
 	.pr_allow	= PR_ALLOW_ALL_STATIC,
+	.pr_permid	= 1,
 };
 MTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF);
 
@@ -988,6 +989,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 	uint64_t pr_allow_diff;
 	unsigned tallow;
 	char numbuf[12];
+	static uint64_t init_permid = 2;
 
 	error = priv_check(td, PRIV_JAIL_SET);
 	if (!error && (flags & JAIL_ATTACH))
@@ -1617,6 +1619,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 		TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
 
 		pr->pr_id = jid;
+		pr->pr_permid = init_permid++;
 		if (inspr != NULL)
 			TAILQ_INSERT_BEFORE(inspr, pr, pr_list);
 		else
@@ -3535,6 +3538,35 @@ prison_isalive(const struct prison *pr)
 	return (true);
 }
 
+/*
+ * Return true if the prison is currently alive.  Identified by pr_permid.
+ */
+bool
+prison_isalive_permid(const uint64_t prison_permid)
+{
+	struct prison *pr;
+	bool alive;
+
+	/*
+	 * permid == 0 --> never assigned to a prison
+	 * permid == 1 --> assigned to prison0, always alive
+	 */
+	if (prison_permid == 0)
+		return (false);
+	else if (prison_permid == 1)
+		return (true);
+	sx_slock(&allprison_lock);
+	TAILQ_FOREACH(pr, &allprison, pr_list) {
+		if (pr->pr_permid == prison_permid) {
+			alive = prison_isalive(pr);
+			sx_unlock(&allprison_lock);
+			return (alive);
+		}
+	}
+	sx_unlock(&allprison_lock);
+	return (false);
+}
+
 /*
  * Return true if the prison is currently valid.  A prison is valid if it has
  * been fully created, and is not being destroyed.  Note that dying prisons
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
index 024011de4c89..024adfe152d6 100644
--- a/sys/kern/vfs_export.c
+++ b/sys/kern/vfs_export.c
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mbuf.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
+#include <sys/proc.h>
 #include <sys/rmlock.h>
 #include <sys/refcount.h>
 #include <sys/signalvar.h>
@@ -70,7 +71,6 @@ static MALLOC_DEFINE(M_NETADDR, "export_host", "Export host address structure");
 static struct radix_node_head *vfs_create_addrlist_af(
 		    struct radix_node_head **prnh, int off);
 #endif
-static void	vfs_free_addrlist(struct netexport *nep);
 static int	vfs_free_netcred(struct radix_node *rn, void *w);
 static void	vfs_free_addrlist_af(struct radix_node_head **prnh);
 static int	vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
@@ -274,7 +274,7 @@ vfs_free_addrlist_af(struct radix_node_head **prnh)
 /*
  * Free the net address hash lists that are hanging off the mount points.
  */
-static void
+void
 vfs_free_addrlist(struct netexport *nep)
 {
 	struct ucred *cred;
@@ -285,8 +285,10 @@ vfs_free_addrlist(struct netexport *nep)
 		vfs_free_addrlist_af(&nep->ne6);
 
 	cred = nep->ne_defexported.netc_anon;
-	if (cred != NULL)
+	if (cred != NULL) {
 		crfree(cred);
+		nep->ne_defexported.netc_anon = NULL;
+	}
 
 }
 
@@ -301,6 +303,8 @@ vfs_export(struct mount *mp, struct export_args *argp)
 {
 	struct netexport *nep;
 	int error;
+	uint64_t jail_permid;
+	bool new_nep, prison_alive;
 
 	if ((argp->ex_flags & (MNT_DELEXPORT | MNT_EXPORTED)) == 0)
 		return (EINVAL);
@@ -311,13 +315,29 @@ vfs_export(struct mount *mp, struct export_args *argp)
 		return (EINVAL);
 
 	error = 0;
+	jail_permid = curthread->td_ucred->cr_prison->pr_permid;
 	lockmgr(&mp->mnt_explock, LK_EXCLUSIVE, NULL);
 	nep = mp->mnt_export;
+	prison_alive = prison_isalive_permid(mp->mnt_exjail);
 	if (argp->ex_flags & MNT_DELEXPORT) {
 		if (nep == NULL) {
+			KASSERT(mp->mnt_exjail == 0,
+			    ("vfs_export: mnt_exjail delexport not 0"));
 			error = ENOENT;
 			goto out;
 		}
+		KASSERT(mp->mnt_exjail != 0,
+		    ("vfs_export: mnt_exjail delexport 0"));
+		if (jail_permid == 1 && mp->mnt_exjail != jail_permid &&
+		    prison_alive) {
+			/* EXDEV will not get logged by mountd(8). */
+			error = EXDEV;
+			goto out;
+		} else if (mp->mnt_exjail != jail_permid && prison_alive) {
+			/* EPERM will get logged by mountd(8). */
+			error = EPERM;
+			goto out;
+		}
 		if (mp->mnt_flag & MNT_EXPUBLIC) {
 			vfs_setpublicfs(NULL, NULL, NULL);
 			MNT_ILOCK(mp);
@@ -326,20 +346,37 @@ vfs_export(struct mount *mp, struct export_args *argp)
 		}
 		vfs_free_addrlist(nep);
 		mp->mnt_export = NULL;
+		mp->mnt_exjail = 0;
 		free(nep, M_MOUNT);
 		nep = NULL;
 		MNT_ILOCK(mp);
 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
 		MNT_IUNLOCK(mp);
 	}
+	new_nep = false;
 	if (argp->ex_flags & MNT_EXPORTED) {
 		if (nep == NULL) {
+			KASSERT(mp->mnt_exjail == 0,
+			    ("vfs_export: mnt_exjail not 0"));
 			nep = malloc(sizeof(struct netexport), M_MOUNT, M_WAITOK | M_ZERO);
 			mp->mnt_export = nep;
+			new_nep = true;
+		} else if (mp->mnt_exjail != jail_permid && prison_alive) {
+			KASSERT(mp->mnt_exjail != 0,
+			    ("vfs_export: mnt_exjail 0"));
+			error = EPERM;
+			goto out;
 		}
 		if (argp->ex_flags & MNT_EXPUBLIC) {
-			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
+			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) {
+				if (new_nep) {
+					mp->mnt_export = NULL;
+					free(nep, M_MOUNT);
+				}
 				goto out;
+			}
+			new_nep = false;
+			mp->mnt_exjail = jail_permid;
 			MNT_ILOCK(mp);
 			mp->mnt_flag |= MNT_EXPUBLIC;
 			MNT_IUNLOCK(mp);
@@ -348,8 +385,14 @@ vfs_export(struct mount *mp, struct export_args *argp)
 			argp->ex_numsecflavors = 1;
 			argp->ex_secflavors[0] = AUTH_SYS;
 		}
-		if ((error = vfs_hang_addrlist(mp, nep, argp)))
+		if ((error = vfs_hang_addrlist(mp, nep, argp))) {
+			if (new_nep) {
+				mp->mnt_export = NULL;
+				free(nep, M_MOUNT);
+			}
 			goto out;
+		}
+		mp->mnt_exjail = jail_permid;
 		MNT_ILOCK(mp);
 		mp->mnt_flag |= MNT_EXPORTED;
 		MNT_IUNLOCK(mp);
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index c50c8607aa0a..2031c698ff3d 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -199,6 +199,7 @@ struct prison {
 	int		 pr_spare[2];
 	int		 pr_osreldate;			/* (c) kern.osreldate value */
 	unsigned long	 pr_hostid;			/* (p) jail hostid */
+	uint64_t	 pr_permid;			/* (c) permanent jail id */
 	char		 pr_name[MAXHOSTNAMELEN];	/* (p) admin jail name */
 	char		 pr_path[MAXPATHLEN];		/* (c) chroot path */
 	char		 pr_hostname[MAXHOSTNAMELEN];	/* (p) jail hostname */
@@ -442,6 +443,7 @@ void prison_proc_iterate(struct prison *, void (*)(struct proc *, void *), void
 void prison_set_allow(struct ucred *cred, unsigned flag, int enable);
 int prison_ischild(struct prison *, struct prison *);
 bool prison_isalive(const struct prison *);
+bool prison_isalive_permid(const uint64_t prison_permid);
 bool prison_isvalid(struct prison *);
 #if defined(INET) || defined(INET6)
 int prison_ip_check(const struct prison *, const pr_family_t, const void *);
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index 4bfc77b7f1a1..42247829d42e 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -216,6 +216,7 @@ struct mount_upper_node {
  *	i - interlock
  *	v - vnode freelist mutex
  *	d - deferred unmount list mutex
+ *	e - mnt_explock
  *
  * Unmarked fields are considered stable as long as a ref is held.
  *
@@ -245,13 +246,14 @@ struct mount {
 	void *		mnt_data;		/* private data */
 	time_t		mnt_time;		/* last time written*/
 	int		mnt_iosize_max;		/* max size for clusters, etc */
-	struct netexport *mnt_export;		/* export list */
+	struct netexport *mnt_export;		/* (e) export list */
 	struct label	*mnt_label;		/* MAC label for the fs */
 	u_int		mnt_hashseed;		/* Random seed for vfs_hash */
 	int		mnt_lockref;		/* (i) Lock reference count */
 	int		mnt_secondary_writes;   /* (i) # of secondary writes */
 	int		mnt_secondary_accwrites;/* (i) secondary wr. starts */
 	struct thread	*mnt_susp_owner;	/* (i) thread owning suspension */
+	uint64_t	mnt_exjail;		/* (e) exported in jail ident */
 #define	mnt_endzero	mnt_gjprovider
 	char		*mnt_gjprovider;	/* gjournal provider name */
 	struct mtx	mnt_listmtx;
@@ -1017,6 +1019,7 @@ void	vfs_periodic(struct mount *, int);
 int	vfs_busy(struct mount *, int);
 int	vfs_export			 /* process mount export info */
 	    (struct mount *, struct export_args *);
+void	vfs_free_addrlist(struct netexport *);
 void	vfs_allocate_syncvnode(struct mount *);
 void	vfs_deallocate_syncvnode(struct mount *);
 int	vfs_donmount(struct thread *td, uint64_t fsflags,