git: 2bed14192cec - main - pvclock: Export a vDSO page even without rdtscp available
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 03 Feb 2023 16:50:32 UTC
The branch main has been updated by markj: URL: https://cgit.FreeBSD.org/src/commit/?id=2bed14192cec1abcdc4e02ca7b18dafd868095ef commit 2bed14192cec1abcdc4e02ca7b18dafd868095ef Author: Mark Johnston <markj@FreeBSD.org> AuthorDate: 2023-02-03 15:54:23 +0000 Commit: Mark Johnston <markj@FreeBSD.org> CommitDate: 2023-02-03 16:48:25 +0000 pvclock: Export a vDSO page even without rdtscp available When the cycle counter is "stable", i.e., synchronized across vCPUs by the hypervisor, userspace can use a serialized rdtsc instead of relying on rdtscp, just like the kernel timecounter does. This can be useful for performance in guests where the hypervisor hides rdtscp for some reason. To avoid breaking compatibility with older userspace which expects rdtscp to be usable when pvclock exports timekeeping info, hide this feature behind a sysctl. Reviewed by: kib Tested by: Shrikanth R Kamath <kshrikanth@juniper.net> MFC after: 2 weeks Sponsored by: Klara, Inc. Sponsored by: Juniper Networks, Inc. Differential Revision: https://reviews.freebsd.org/D38342 --- sys/x86/include/pvclock.h | 1 + sys/x86/x86/pvclock.c | 26 ++++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/sys/x86/include/pvclock.h b/sys/x86/include/pvclock.h index 023acdb80d9c..1306f11fc960 100644 --- a/sys/x86/include/pvclock.h +++ b/sys/x86/include/pvclock.h @@ -121,6 +121,7 @@ struct pvclock { /* Private; initialized by the 'pvclock' API: */ bool vdso_force_unstable; + bool vdso_enable_without_rdtscp; struct timecounter tc; struct cdev *cdev; }; diff --git a/sys/x86/x86/pvclock.c b/sys/x86/x86/pvclock.c index cc2377bdbcf0..3da3373bb2ee 100644 --- a/sys/x86/x86/pvclock.c +++ b/sys/x86/x86/pvclock.c @@ -224,6 +224,9 @@ pvclock_tc_vdso_timehands(struct vdso_timehands *vdso_th, { struct pvclock *pvc = tc->tc_priv; + if (pvc->cdev == NULL) + return (0); + vdso_th->th_algo = VDSO_TH_ALGO_X86_PVCLK; vdso_th->th_x86_shift = 0; vdso_th->th_x86_hpet_idx = 0; @@ -232,7 +235,9 @@ pvclock_tc_vdso_timehands(struct vdso_timehands *vdso_th, vdso_th->th_x86_pvc_stable_mask = !pvc->vdso_force_unstable && pvc->stable_flag_supported ? PVCLOCK_FLAG_TSC_STABLE : 0; bzero(vdso_th->th_res, sizeof(vdso_th->th_res)); - return (pvc->cdev != NULL && amd_feature & AMDID_RDTSCP); + return ((amd_feature & AMDID_RDTSCP) != 0 || + ((vdso_th->th_x86_pvc_stable_mask & PVCLOCK_FLAG_TSC_STABLE) != 0 && + pvc->vdso_enable_without_rdtscp)); } #ifdef COMPAT_FREEBSD32 @@ -242,6 +247,9 @@ pvclock_tc_vdso_timehands32(struct vdso_timehands32 *vdso_th, { struct pvclock *pvc = tc->tc_priv; + if (pvc->cdev == NULL) + return (0); + vdso_th->th_algo = VDSO_TH_ALGO_X86_PVCLK; vdso_th->th_x86_shift = 0; vdso_th->th_x86_hpet_idx = 0; @@ -250,7 +258,9 @@ pvclock_tc_vdso_timehands32(struct vdso_timehands32 *vdso_th, vdso_th->th_x86_pvc_stable_mask = !pvc->vdso_force_unstable && pvc->stable_flag_supported ? PVCLOCK_FLAG_TSC_STABLE : 0; bzero(vdso_th->th_res, sizeof(vdso_th->th_res)); - return (pvc->cdev != NULL && amd_feature & AMDID_RDTSCP); + return ((amd_feature & AMDID_RDTSCP) != 0 || + ((vdso_th->th_x86_pvc_stable_mask & PVCLOCK_FLAG_TSC_STABLE) != 0 && + pvc->vdso_enable_without_rdtscp)); } #endif @@ -284,6 +294,18 @@ pvclock_init(struct pvclock *pvc, device_t dev, const char *tc_name, "vdso_force_unstable", CTLFLAG_RW, &pvc->vdso_force_unstable, 0, "Forcibly deassert stable flag in vDSO codepath"); + /* + * Make it possible to use the vDSO page even when the hypervisor does + * not support the rdtscp instruction. This is disabled by default for + * compatibility with old libc. + */ + pvc->vdso_enable_without_rdtscp = false; + SYSCTL_ADD_BOOL(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, + "vdso_enable_without_rdtscp", CTLFLAG_RWTUN, + &pvc->vdso_enable_without_rdtscp, 0, + "Allow the use of a vDSO when rdtscp is not available"); + /* Set up timecounter and timecounter-supporting members: */ pvc->tc.tc_get_timecount = pvclock_tc_get_timecount; pvc->tc.tc_poll_pps = NULL;