git: bba7a2e89602 - main - kern_jail.c: Allow mountd/nfsd to optionally run in a jail

From: Rick Macklem <rmacklem_at_FreeBSD.org>
Date: Sat, 17 Dec 2022 21:45:27 UTC
The branch main has been updated by rmacklem:

URL: https://cgit.FreeBSD.org/src/commit/?id=bba7a2e89602e6745bb2ec474f5ab714aef49f42

commit bba7a2e89602e6745bb2ec474f5ab714aef49f42
Author:     Rick Macklem <rmacklem@FreeBSD.org>
AuthorDate: 2022-12-17 21:43:49 +0000
Commit:     Rick Macklem <rmacklem@FreeBSD.org>
CommitDate: 2022-12-17 21:43:49 +0000

    kern_jail.c: Allow mountd/nfsd to optionally run in a jail
    
    This patch adds "allow.nfsd" to the jail code based on a
    new kernel build option VNET_NFSD.  This will not work
    until future patches fix nmount(2) to allow mountd to
    run in a vnet prison and the NFS server code is patched
    so that global variables are in a vnet.
    
    The jail(8) man page will be patched in a future commit.
    
    Reviewed by:    jamie
    MFC after:      4 months
    Differential Revision:  https://reviews.freebsd.org/D37637
---
 sys/kern/kern_jail.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 sys/sys/jail.h       |  4 +++-
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 8e9cdadd94cd..705d90aecdf8 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
+#include "opt_nfs.h"
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -218,6 +219,9 @@ static struct bool_flags pr_flag_allow[NBBY * NBPW] = {
 	{"allow.unprivileged_proc_debug", "allow.nounprivileged_proc_debug",
 	 PR_ALLOW_UNPRIV_DEBUG},
 	{"allow.suser", "allow.nosuser", PR_ALLOW_SUSER},
+#if defined(VNET_NFSD) && defined(VIMAGE) && defined(NFSD)
+	{"allow.nfsd", "allow.nonfsd", PR_ALLOW_NFSD},
+#endif
 };
 static unsigned pr_allow_all = PR_ALLOW_ALL_STATIC;
 const size_t pr_flag_allow_size = sizeof(pr_flag_allow);
@@ -2102,6 +2106,13 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 	}
 #endif
 
+#ifdef VNET_NFSD
+	if (born && pr != &prison0 && (pr->pr_allow & PR_ALLOW_NFSD) != 0 &&
+	    (pr->pr_root->v_vflag & VV_ROOT) == 0)
+		printf("Warning jail jid=%d: mountd/nfsd requires a separate"
+		   " file system\n", pr->pr_id);
+#endif
+
 	drflags &= ~PD_KILL;
 	td->td_retval[0] = pr->pr_id;
 
@@ -3463,6 +3474,27 @@ prison_check(struct ucred *cred1, struct ucred *cred2)
 	    prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH);
 }
 
+/*
+ * For mountd/nfsd to run within a prison, it must be:
+ * - A vnet prison.
+ * - PR_ALLOW_NFSD must be set on it.
+ * - The root directory (pr_root) of the prison must be
+ *   a file system mount point, so the mountd can hang
+ *   export information on it.
+ */
+bool
+prison_check_nfsd(struct ucred *cred)
+{
+
+	if (jailed_without_vnet(cred))
+		return (false);
+	if (!prison_allow(cred, PR_ALLOW_NFSD))
+		return (false);
+	if ((cred->cr_prison->pr_root->v_vflag & VV_ROOT) == 0)
+		return (false);
+	return (true);
+}
+
 /*
  * Return 1 if p2 is a child of p1, otherwise 0.
  */
@@ -3717,11 +3749,20 @@ prison_priv_check(struct ucred *cred, int priv)
 	 * is only granted conditionally in the legacy jail case.
 	 */
 	switch (priv) {
-#ifdef notyet
 		/*
 		 * NFS-specific privileges.
 		 */
 	case PRIV_NFS_DAEMON:
+	case PRIV_VFS_GETFH:
+	case PRIV_VFS_MOUNT_EXPORTED:
+#ifdef VNET_NFSD
+		if (!prison_check_nfsd(cred))
+#else
+		printf("running nfsd in a prison requires a kernel "
+		    "built with ''options VNET_NFSD''\n");
+#endif
+			return (EPERM);
+#ifdef notyet
 	case PRIV_NFS_LOCKD:
 #endif
 		/*
@@ -4474,6 +4515,10 @@ SYSCTL_JAIL_PARAM(_allow, unprivileged_proc_debug, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Unprivileged processes may use process debugging facilities");
 SYSCTL_JAIL_PARAM(_allow, suser, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Processes in jail with uid 0 have privilege");
+#if defined(VNET_NFSD) && defined(VIMAGE) && defined(NFSD)
+SYSCTL_JAIL_PARAM(_allow, nfsd, CTLTYPE_INT | CTLFLAG_RW,
+    "B", "Mountd/nfsd may run in the jail");
+#endif
 
 SYSCTL_JAIL_PARAM_SUBNODE(allow, mount, "Jail mount/unmount permission flags");
 SYSCTL_JAIL_PARAM(_allow_mount, , CTLTYPE_INT | CTLFLAG_RW,
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index 743b413bc06d..c50c8607aa0a 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -253,7 +253,8 @@ struct prison_racct {
 #define	PR_ALLOW_SUSER			0x00000400
 #define	PR_ALLOW_RESERVED_PORTS		0x00008000
 #define	PR_ALLOW_KMEM_ACCESS		0x00010000	/* reserved, not used yet */
-#define	PR_ALLOW_ALL_STATIC		0x000187ff
+#define	PR_ALLOW_NFSD			0x00020000
+#define	PR_ALLOW_ALL_STATIC		0x000387ff
 
 /*
  * PR_ALLOW_DIFFERENCES determines which flags are able to be
@@ -420,6 +421,7 @@ void getjailname(struct ucred *cred, char *name, size_t len);
 void prison0_init(void);
 int prison_allow(struct ucred *, unsigned);
 int prison_check(struct ucred *cred1, struct ucred *cred2);
+bool prison_check_nfsd(struct ucred *cred);
 int prison_owns_vnet(struct ucred *);
 int prison_canseemount(struct ucred *cred, struct mount *mp);
 void prison_enforce_statfs(struct ucred *cred, struct mount *mp,