git: cf7974fd9e55 - main - sysctl: Update 'master' copy of vnet SYSCTLs on kernel environment variables change
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 21 Sep 2023 04:13:04 UTC
The branch main has been updated by zlei: URL: https://cgit.FreeBSD.org/src/commit/?id=cf7974fd9e554552989237c3d6bc736d672ac7c6 commit cf7974fd9e554552989237c3d6bc736d672ac7c6 Author: Zhenlei Huang <zlei@FreeBSD.org> AuthorDate: 2023-09-21 04:11:28 +0000 Commit: Zhenlei Huang <zlei@FreeBSD.org> CommitDate: 2023-09-21 04:11:28 +0000 sysctl: Update 'master' copy of vnet SYSCTLs on kernel environment variables change Complete phase three of 3da1cf1e88f8. With commit 110113bc086f, vnet sysctl variables can be loader tunable but the feature is limited. When the kernel modules have been initialized, any changes (e.g. via kenv) to kernel environment variable will not affect subsequently created VNETs. This change relexes the limitation by listening on kernel environment variable's set / unset events, and then update the 'master' copy of vnet SYSCTL or restore it to its initial value. With this change, TUNABLE_XXX_FETCH can be greately eliminated for vnet loader tunables. Reviewed by: glebius Fixes: 110113bc086f sysctl(9): Enable vnet sysctl variables to be loader tunable MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D41825 --- sys/kern/kern_environment.c | 3 ++ sys/kern/kern_sysctl.c | 107 +++++++++++++++++++++++++++++++++++++++++++- sys/kern/link_elf.c | 2 + sys/kern/link_elf_obj.c | 8 ++++ sys/net/vnet.c | 33 ++++++++++++++ sys/net/vnet.h | 6 +++ sys/sys/eventhandler.h | 5 +++ 7 files changed, 162 insertions(+), 2 deletions(-) diff --git a/sys/kern/kern_environment.c b/sys/kern/kern_environment.c index 761734674bdf..a0967d044a96 100644 --- a/sys/kern/kern_environment.c +++ b/sys/kern/kern_environment.c @@ -38,6 +38,7 @@ #include <sys/cdefs.h> #include <sys/param.h> +#include <sys/eventhandler.h> #include <sys/systm.h> #include <sys/kenv.h> #include <sys/kernel.h> @@ -666,6 +667,7 @@ kern_setenv(const char *name, const char *value) kenvp[i + 1] = NULL; mtx_unlock(&kenv_lock); } + EVENTHANDLER_INVOKE(setenv, name); return (0); } @@ -689,6 +691,7 @@ kern_unsetenv(const char *name) kenvp[i] = NULL; mtx_unlock(&kenv_lock); zfree(oldenv, M_KENV); + EVENTHANDLER_INVOKE(unsetenv, name); return (0); } mtx_unlock(&kenv_lock); diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index a1d502d58bff..780eb6099b07 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -127,6 +127,7 @@ static int sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse); static int sysctl_old_kernel(struct sysctl_req *, const void *, size_t); static int sysctl_new_kernel(struct sysctl_req *, void *, size_t); +static int name2oid(char *, int *, int *, struct sysctl_oid **); static struct sysctl_oid * sysctl_find_oidname(const char *name, struct sysctl_oid_list *list) @@ -512,8 +513,14 @@ sysctl_register_oid(struct sysctl_oid *oidp) if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE && (oidp->oid_kind & CTLFLAG_TUN) != 0 && (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) { - /* only fetch value once */ - oidp->oid_kind |= CTLFLAG_NOFETCH; +#ifdef VIMAGE + /* + * Can fetch value multiple times for VNET loader tunables. + * Only fetch once for non-VNET loader tunables. + */ + if ((oidp->oid_kind & CTLFLAG_VNET) == 0) +#endif + oidp->oid_kind |= CTLFLAG_NOFETCH; /* try to fetch value from kernel environment */ sysctl_load_tunable_by_oid_locked(oidp); } @@ -969,6 +976,102 @@ sysctl_register_all(void *arg) } SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, NULL); +#ifdef VIMAGE +static void +sysctl_setenv_vnet(void *arg __unused, char *name) +{ + struct sysctl_oid *oidp; + int oid[CTL_MAXNAME]; + int error, nlen; + + SYSCTL_WLOCK(); + error = name2oid(name, oid, &nlen, &oidp); + if (error) + goto out; + + if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE && + (oidp->oid_kind & CTLFLAG_VNET) != 0 && + (oidp->oid_kind & CTLFLAG_TUN) != 0 && + (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) { + /* Update value from kernel environment */ + sysctl_load_tunable_by_oid_locked(oidp); + } +out: + SYSCTL_WUNLOCK(); +} + +static void +sysctl_unsetenv_vnet(void *arg __unused, char *name) +{ + struct sysctl_oid *oidp; + int oid[CTL_MAXNAME]; + int error, nlen; + + SYSCTL_WLOCK(); + /* + * The setenv / unsetenv event handlers are invoked by kern_setenv() / + * kern_unsetenv() without exclusive locks. It is rare but still possible + * that the invoke order of event handlers is different from that of + * kern_setenv() and kern_unsetenv(). + * Re-check environment variable string to make sure it is unset. + */ + if (testenv(name)) + goto out; + error = name2oid(name, oid, &nlen, &oidp); + if (error) + goto out; + + if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE && + (oidp->oid_kind & CTLFLAG_VNET) != 0 && + (oidp->oid_kind & CTLFLAG_TUN) != 0 && + (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) { + size_t size; + + switch (oidp->oid_kind & CTLTYPE) { + case CTLTYPE_INT: + case CTLTYPE_UINT: + size = sizeof(int); + break; + case CTLTYPE_LONG: + case CTLTYPE_ULONG: + size = sizeof(long); + break; + case CTLTYPE_S8: + case CTLTYPE_U8: + size = sizeof(int8_t); + break; + case CTLTYPE_S16: + case CTLTYPE_U16: + size = sizeof(int16_t); + break; + case CTLTYPE_S32: + case CTLTYPE_U32: + size = sizeof(int32_t); + break; + case CTLTYPE_S64: + case CTLTYPE_U64: + size = sizeof(int64_t); + break; + case CTLTYPE_STRING: + MPASS(oidp->oid_arg2 > 0); + size = oidp->oid_arg2; + break; + default: + goto out; + } + vnet_restore_init(oidp->oid_arg1, size); + } +out: + SYSCTL_WUNLOCK(); +} + +/* + * Register the kernel's setenv / unsetenv events. + */ +EVENTHANDLER_DEFINE(setenv, sysctl_setenv_vnet, NULL, EVENTHANDLER_PRI_ANY); +EVENTHANDLER_DEFINE(unsetenv, sysctl_unsetenv_vnet, NULL, EVENTHANDLER_PRI_ANY); +#endif + /* * "Staff-functions" * diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c index 568f1e1dbd95..eb7ce3828deb 100644 --- a/sys/kern/link_elf.c +++ b/sys/kern/link_elf.c @@ -506,6 +506,7 @@ link_elf_init(void* arg) TAILQ_INIT(&set_pcpu_list); #ifdef VIMAGE TAILQ_INIT(&set_vnet_list); + vnet_save_init((void *)VNET_START, VNET_STOP - VNET_START); #endif } @@ -767,6 +768,7 @@ parse_vnet(elf_file_t ef) return (ENOSPC); } memcpy((void *)ef->vnet_base, (void *)ef->vnet_start, size); + vnet_save_init((void *)ef->vnet_base, size); elf_set_add(&set_vnet_list, ef->vnet_start, ef->vnet_stop, ef->vnet_base); diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c index d4ad963e8181..0b2befc02c1a 100644 --- a/sys/kern/link_elf_obj.c +++ b/sys/kern/link_elf_obj.c @@ -547,6 +547,8 @@ link_elf_link_preload(linker_class_t cls, const char *filename, memcpy(vnet_data, ef->progtab[pb].addr, ef->progtab[pb].size); ef->progtab[pb].addr = vnet_data; + vnet_save_init(ef->progtab[pb].addr, + ef->progtab[pb].size); #endif } else if ((ef->progtab[pb].name != NULL && strcmp(ef->progtab[pb].name, ".ctors") == 0) || @@ -1120,6 +1122,12 @@ link_elf_load_file(linker_class_t cls, const char *filename, } else bzero(ef->progtab[pb].addr, shdr[i].sh_size); +#ifdef VIMAGE + if (ef->progtab[pb].addr != (void *)mapbase && + strcmp(ef->progtab[pb].name, VNET_SETNAME) == 0) + vnet_save_init(ef->progtab[pb].addr, + ef->progtab[pb].size); +#endif /* Update all symbol values with the offset. */ for (j = 0; j < ef->ddbsymcnt; j++) { es = &ef->ddbsymtab[j]; diff --git a/sys/net/vnet.c b/sys/net/vnet.c index c4a623698341..ac937125a19d 100644 --- a/sys/net/vnet.c +++ b/sys/net/vnet.c @@ -178,6 +178,11 @@ static MALLOC_DEFINE(M_VNET_DATA, "vnet_data", "VNET data"); */ VNET_DEFINE_STATIC(char, modspace[VNET_MODMIN] __aligned(__alignof(void *))); +/* + * A copy of the initial values of all virtualized global variables. + */ +static uintptr_t vnet_init_var; + /* * Global lists of subsystem constructor and destructors for vnets. They are * registered via VNET_SYSINIT() and VNET_SYSUNINIT(). Both lists are @@ -356,6 +361,7 @@ vnet_data_startup(void *dummy __unused) df->vnd_len = VNET_MODMIN; TAILQ_INSERT_HEAD(&vnet_data_free_head, df, vnd_link); sx_init(&vnet_data_free_lock, "vnet_data alloc lock"); + vnet_init_var = (uintptr_t)malloc(VNET_BYTES, M_VNET_DATA, M_WAITOK); } SYSINIT(vnet_data, SI_SUB_KLD, SI_ORDER_FIRST, vnet_data_startup, NULL); @@ -473,6 +479,33 @@ vnet_data_copy(void *start, int size) VNET_LIST_RUNLOCK(); } +/* + * Save a copy of the initial values of virtualized global variables. + */ +void +vnet_save_init(void *start, size_t size) +{ + MPASS(vnet_init_var != 0); + MPASS(VNET_START <= (uintptr_t)start && + (uintptr_t)start + size <= VNET_STOP); + memcpy((void *)(vnet_init_var + ((uintptr_t)start - VNET_START)), + start, size); +} + +/* + * Restore the 'master' copies of virtualized global variables to theirs + * initial values. + */ +void +vnet_restore_init(void *start, size_t size) +{ + MPASS(vnet_init_var != 0); + MPASS(VNET_START <= (uintptr_t)start && + (uintptr_t)start + size <= VNET_STOP); + memcpy(start, + (void *)(vnet_init_var + ((uintptr_t)start - VNET_START)), size); +} + /* * Support for special SYSINIT handlers registered via VNET_SYSINIT() * and VNET_SYSUNINIT(). diff --git a/sys/net/vnet.h b/sys/net/vnet.h index 1d37fe85eec3..5485889ceaa7 100644 --- a/sys/net/vnet.h +++ b/sys/net/vnet.h @@ -311,6 +311,12 @@ void *vnet_data_alloc(int size); void vnet_data_copy(void *start, int size); void vnet_data_free(void *start_arg, int size); +/* + * Interfaces to manipulate the initial values of virtualized global variables. + */ +void vnet_save_init(void *, size_t); +void vnet_restore_init(void *, size_t); + /* * Virtual sysinit mechanism, allowing network stack components to declare * startup and shutdown methods to be run when virtual network stack diff --git a/sys/sys/eventhandler.h b/sys/sys/eventhandler.h index 47024ecf87a9..c0d9811dd1b9 100644 --- a/sys/sys/eventhandler.h +++ b/sys/sys/eventhandler.h @@ -326,4 +326,9 @@ struct ifaddr; typedef void (*rt_addrmsg_fn)(void *, struct ifaddr *, int); EVENTHANDLER_DECLARE(rt_addrmsg, rt_addrmsg_fn); +/* Kernel environment variable change event */ +typedef void (*env_change_fn)(void *, const char *); +EVENTHANDLER_DECLARE(setenv, env_change_fn); +EVENTHANDLER_DECLARE(unsetenv, env_change_fn); + #endif /* _SYS_EVENTHANDLER_H_ */