git: fcaa890c4469 - main - mbuf: Only allow extpg mbufs if the system has a direct map

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Tue, 16 Nov 2021 18:52:43 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=fcaa890c4469118255d463495b4044eef484fa3e

commit fcaa890c4469118255d463495b4044eef484fa3e
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2021-11-16 18:31:04 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2021-11-16 18:31:04 +0000

    mbuf: Only allow extpg mbufs if the system has a direct map
    
    Some upcoming changes will modify software checksum routines like
    in_cksum() to operate using m_apply(), which uses the direct map to
    access packet data for unmapped mbufs.  This approach of course does not
    work on platforms without a direct map, so we have to disallow the use
    of unmapped mbufs on such platforms.
    
    I believe this is the right tradeoff: we only configure KTLS on amd64
    and arm64 today (and one KTLS consumer, NFS TLS, requires a direct map
    already), and the use of unmapped mbufs with plain sendfile is a recent
    optimization.  If need be, m_apply() could be modified to create
    CPU-private mappings of extpg mbuf pages as a fallback.
    
    So, change mb_use_ext_pgs to be hard-wired to zero on systems without a
    direct map.  Note that PMAP_HAS_DMAP is not a compile-time constant on
    some systems, so the default value of mb_use_ext_pgs has to be
    determined during boot.
    
    Reviewed by:    jhb
    Discussed with: gallatin
    MFC after:      2 weeks
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D32940
---
 sys/kern/kern_mbuf.c             | 32 ++++++++++++++++++++++++++++++--
 sys/rpc/rpcsec_tls/rpctls_impl.c |  2 +-
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c
index d1f2fd2bd9e4..78a270189a4b 100644
--- a/sys/kern/kern_mbuf.c
+++ b/sys/kern/kern_mbuf.c
@@ -116,9 +116,26 @@ int nmbjumbop;			/* limits number of page size jumbo clusters */
 int nmbjumbo9;			/* limits number of 9k jumbo clusters */
 int nmbjumbo16;			/* limits number of 16k jumbo clusters */
 
-bool mb_use_ext_pgs = true;	/* use M_EXTPG mbufs for sendfile & TLS */
-SYSCTL_BOOL(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLFLAG_RWTUN,
+bool mb_use_ext_pgs = false;	/* use M_EXTPG mbufs for sendfile & TLS */
+
+static int
+sysctl_mb_use_ext_pgs(SYSCTL_HANDLER_ARGS)
+{
+	int error, extpg;
+
+	extpg = mb_use_ext_pgs;
+	error = sysctl_handle_int(oidp, &extpg, 0, req);
+	if (error == 0 && req->newptr != NULL) {
+		if (extpg != 0 && !PMAP_HAS_DMAP)
+			error = EOPNOTSUPP;
+		else
+			mb_use_ext_pgs = extpg != 0;
+	}
+	return (error);
+}
+SYSCTL_PROC(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLTYPE_INT | CTLFLAG_RW,
     &mb_use_ext_pgs, 0,
+    sysctl_mb_use_ext_pgs, "IU",
     "Use unmapped mbufs for sendfile(2) and TLS offload");
 
 static quad_t maxmbufmem;	/* overall real memory limit for all mbufs */
@@ -137,6 +154,7 @@ static void
 tunable_mbinit(void *dummy)
 {
 	quad_t realmem;
+	int extpg;
 
 	/*
 	 * The default limit for all mbuf related memory is 1/2 of all
@@ -173,6 +191,16 @@ tunable_mbinit(void *dummy)
 	if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16)
 		nmbufs = lmax(maxmbufmem / MSIZE / 5,
 		    nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
+
+	/*
+	 * Unmapped mbufs can only safely be used on platforms with a direct
+	 * map.
+	 */
+	if (PMAP_HAS_DMAP) {
+		extpg = mb_use_ext_pgs;
+		TUNABLE_INT_FETCH("kern.ipc.mb_use_ext_pgs", &extpg);
+		mb_use_ext_pgs = extpg != 0;
+	}
 }
 SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
 
diff --git a/sys/rpc/rpcsec_tls/rpctls_impl.c b/sys/rpc/rpcsec_tls/rpctls_impl.c
index 110ba107540a..c495213b08e2 100644
--- a/sys/rpc/rpcsec_tls/rpctls_impl.c
+++ b/sys/rpc/rpcsec_tls/rpctls_impl.c
@@ -711,7 +711,7 @@ rpctls_getinfo(u_int *maxlenp, bool rpctlscd_run, bool rpctlssd_run)
 	int error;
 	size_t siz;
 
-	if (PMAP_HAS_DMAP == 0 || !mb_use_ext_pgs)
+	if (!mb_use_ext_pgs)
 		return (false);
 	siz = sizeof(enable);
 	error = kernel_sysctlbyname(curthread, "kern.ipc.tls.enable",