git: 110113bc086f - main - sysctl(9): Enable vnet sysctl variables to be loader tunable

From: Zhenlei Huang <zlei_at_FreeBSD.org>
Date: Sat, 09 Sep 2023 08:21:15 UTC
The branch main has been updated by zlei:

URL: https://cgit.FreeBSD.org/src/commit/?id=110113bc086f5df1a9b6547edb1ab0cec698c55c

commit 110113bc086f5df1a9b6547edb1ab0cec698c55c
Author:     Zhenlei Huang <zlei@FreeBSD.org>
AuthorDate: 2023-09-09 08:06:23 +0000
Commit:     Zhenlei Huang <zlei@FreeBSD.org>
CommitDate: 2023-09-09 08:06:23 +0000

    sysctl(9): Enable vnet sysctl variables to be loader tunable
    
    Complete phase two of 3da1cf1e88f8.
    
    In 3da1cf1e88f8, the meaning of the flag CTLFLAG_TUN is extended to
    automatically check if there is a kernel environment variable which
    shall initialize the SYSCTL during early boot. It works for all SYSCTL
    types both statically and dynamically created ones, except for the
    SYSCTLs which belong to VNETs.
    
    This change extends the meaning further, to allow it also works for
    the SYSCTLs which belong to VNETs. A typical usage is
    ```
    VNET_DEFINE_STATIC(int, foo) = 0;
    SYSCTL_INT(_net, OID_AUTO, foo, CTLFLAG_RWTUN | CTLFLAG_VNET,
        &VNET_NAME(foo), 0, "Description of the foo loader tunable");
    ```
    
    Note that the implementation has a limitation. It behaves the same way
    as that of non-vnet loader tunables. That is, after the kernel or modules
    being initialized, any changes (e.g. via kenv) to kernel environment
    variable will not affect the corresponding vnet variable of subsequently
    created VNETs. To overcome it, we can use TUNABLE_XXX_FETCH to fetch
    the kernel environment variable into those vnet variables during vnet
    constructing.
    
    This change will fix the following SYSCTLs those belong to VNETs and
    have CTLFLAG_TUN flag:
    ```
    net.add_addr_allfibs
    net.bpf.optimize_writers
    net.inet.tcp.fastopen.ccache_buckets
    net.link.bridge.inherit_mac
    net.link.bridge.ipfw_arp
    net.link.bridge.log_stp
    net.link.bridge.pfil_bridge
    net.link.bridge.pfil_local_phys
    net.link.bridge.pfil_member
    net.link.bridge.pfil_onlyip
    net.link.lagg.default_use_flowid
    net.link.lagg.default_use_numa
    net.link.lagg.default_flowid_shift
    net.link.lagg.lacp.debug
    net.link.lagg.lacp.default_strict_mode
    ```
    
    Although the following vnet SYSCTLs have CTLFLAG_TUN flag, theirs
    values are re-fetched via TUNABLE_XXX_FETCH, thus are not affected
    by this change.
    ```
    net.inet.ip.reass_hashsize
    net.inet.tcp.hostcache.cachelimit
    net.inet.tcp.hostcache.hashsize
    net.inet.tcp.hostcache.bucketlimit
    net.inet.tcp.syncache.bucketlimit
    net.inet.tcp.syncache.cachelimit
    net.inet.tcp.syncache.hashsize
    net.key.spdcache.maxentries
    net.key.spdcache.threshold
    ```
    
    In memoriam:    hselasky
    Discussed with: hselasky, glebius
    Fixes:          3da1cf1e88f8 Extend the meaning of the CTLFLAG_TUN flag ...
    MFC after:      2 weeks
    Relnotes:       yes
    Differential Revision:  https://reviews.freebsd.org/D39638
---
 sys/kern/kern_linker.c  |  3 +++
 sys/kern/kern_sysctl.c  |  3 ---
 sys/kern/link_elf.c     | 20 ++++++++++++++++++++
 sys/kern/link_elf_obj.c | 30 +++++++++++++++++++++++++++++-
 sys/kern/linker_if.m    |  9 +++++++++
 5 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/sys/kern/kern_linker.c b/sys/kern/kern_linker.c
index a1073512e856..2367bf35da76 100644
--- a/sys/kern/kern_linker.c
+++ b/sys/kern/kern_linker.c
@@ -471,6 +471,9 @@ linker_load_file(const char *filename, linker_file_t *result)
 			}
 			modules = !TAILQ_EMPTY(&lf->modules);
 			linker_file_register_sysctls(lf, false);
+#ifdef VIMAGE
+			LINKER_PROPAGATE_VNETS(lf);
+#endif
 			linker_file_sysinit(lf);
 			lf->flags |= LINKER_FILE_LINKED;
 
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
index 814579a80f5a..a1d502d58bff 100644
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@@ -510,9 +510,6 @@ sysctl_register_oid(struct sysctl_oid *oidp)
 	RB_INSERT(sysctl_oid_list, parent, oidp);
 
 	if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE &&
-#ifdef VIMAGE
-	    (oidp->oid_kind & CTLFLAG_VNET) == 0 &&
-#endif
 	    (oidp->oid_kind & CTLFLAG_TUN) != 0 &&
 	    (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) {
 		/* only fetch value once */
diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c
index 5be2db9d32bf..3e9998f27baa 100644
--- a/sys/kern/link_elf.c
+++ b/sys/kern/link_elf.c
@@ -160,6 +160,9 @@ static int	link_elf_each_function_nameval(linker_file_t,
 static void	link_elf_reloc_local(linker_file_t);
 static long	link_elf_symtab_get(linker_file_t, const Elf_Sym **);
 static long	link_elf_strtab_get(linker_file_t, caddr_t *);
+#ifdef VIMAGE
+static void	link_elf_propagate_vnets(linker_file_t);
+#endif
 static int	elf_lookup(linker_file_t, Elf_Size, int, Elf_Addr *);
 
 static kobj_method_t link_elf_methods[] = {
@@ -178,6 +181,9 @@ static kobj_method_t link_elf_methods[] = {
 	KOBJMETHOD(linker_ctf_get,		link_elf_ctf_get),
 	KOBJMETHOD(linker_symtab_get,		link_elf_symtab_get),
 	KOBJMETHOD(linker_strtab_get,		link_elf_strtab_get),
+#ifdef VIMAGE
+	KOBJMETHOD(linker_propagate_vnets,	link_elf_propagate_vnets),
+#endif
 	KOBJMETHOD_END
 };
 
@@ -1923,6 +1929,20 @@ link_elf_strtab_get(linker_file_t lf, caddr_t *strtab)
 	return (ef->ddbstrcnt);
 }
 
+#ifdef VIMAGE
+static void
+link_elf_propagate_vnets(linker_file_t lf)
+{
+	elf_file_t ef = (elf_file_t)lf;
+	int size;
+
+	if (ef->vnet_base != 0) {
+		size = (uintptr_t)ef->vnet_stop - (uintptr_t)ef->vnet_start;
+		vnet_data_copy((void *)ef->vnet_base, size);
+	}
+}
+#endif
+
 #if defined(__i386__) || defined(__amd64__) || defined(__aarch64__) || defined(__powerpc__)
 /*
  * Use this lookup routine when performing relocations early during boot.
diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c
index b853ca284f7d..768808d2102e 100644
--- a/sys/kern/link_elf_obj.c
+++ b/sys/kern/link_elf_obj.c
@@ -150,6 +150,9 @@ static int	link_elf_each_function_nameval(linker_file_t,
 static int	link_elf_reloc_local(linker_file_t, bool);
 static long	link_elf_symtab_get(linker_file_t, const Elf_Sym **);
 static long	link_elf_strtab_get(linker_file_t, caddr_t *);
+#ifdef VIMAGE
+static void	link_elf_propagate_vnets(linker_file_t);
+#endif
 
 static int	elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps,
 		    Elf_Addr *);
@@ -170,6 +173,9 @@ static kobj_method_t link_elf_methods[] = {
 	KOBJMETHOD(linker_ctf_get,		link_elf_ctf_get),
 	KOBJMETHOD(linker_symtab_get, 		link_elf_symtab_get),
 	KOBJMETHOD(linker_strtab_get, 		link_elf_strtab_get),
+#ifdef VIMAGE
+	KOBJMETHOD(linker_propagate_vnets,	link_elf_propagate_vnets),
+#endif
 	KOBJMETHOD_END
 };
 
@@ -1848,7 +1854,7 @@ link_elf_symtab_get(linker_file_t lf, const Elf_Sym **symtab)
 		return (0);
 	return (ef->ddbsymcnt);
 }
-    
+
 static long
 link_elf_strtab_get(linker_file_t lf, caddr_t *strtab)
 {
@@ -1859,3 +1865,25 @@ link_elf_strtab_get(linker_file_t lf, caddr_t *strtab)
 		return (0);
 	return (ef->ddbstrcnt);
 }
+
+#ifdef VIMAGE
+static void
+link_elf_propagate_vnets(linker_file_t lf)
+{
+	elf_file_t ef = (elf_file_t) lf;
+
+	if (ef->progtab) {
+		for (int i = 0; i < ef->nprogtab; i++) {
+			if (ef->progtab[i].size == 0)
+				continue;
+			if (ef->progtab[i].name == NULL)
+				continue;
+			if (strcmp(ef->progtab[i].name, VNET_SETNAME) == 0) {
+				vnet_data_copy(ef->progtab[i].addr,
+				    ef->progtab[i].size);
+				break;
+			}
+		}
+	}
+}
+#endif
diff --git a/sys/kern/linker_if.m b/sys/kern/linker_if.m
index 0722390d4e20..a50ed1ea84a3 100644
--- a/sys/kern/linker_if.m
+++ b/sys/kern/linker_if.m
@@ -154,3 +154,12 @@ STATICMETHOD int link_preload {
 METHOD int link_preload_finish {
     linker_file_t	file;
 };
+
+#ifdef VIMAGE
+#
+# Propagate system tunable values to all vnets.
+#
+METHOD void propagate_vnets {
+	linker_file_t	file;
+};
+#endif