git: 2bed14192cec - main - pvclock: Export a vDSO page even without rdtscp available

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Fri, 03 Feb 2023 16:50:32 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=2bed14192cec1abcdc4e02ca7b18dafd868095ef

commit 2bed14192cec1abcdc4e02ca7b18dafd868095ef
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2023-02-03 15:54:23 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2023-02-03 16:48:25 +0000

    pvclock: Export a vDSO page even without rdtscp available
    
    When the cycle counter is "stable", i.e., synchronized across vCPUs by
    the hypervisor, userspace can use a serialized rdtsc instead of relying
    on rdtscp, just like the kernel timecounter does.  This can be useful
    for performance in guests where the hypervisor hides rdtscp for some
    reason.
    
    To avoid breaking compatibility with older userspace which expects
    rdtscp to be usable when pvclock exports timekeeping info, hide this
    feature behind a sysctl.
    
    Reviewed by:    kib
    Tested by:      Shrikanth R Kamath <kshrikanth@juniper.net>
    MFC after:      2 weeks
    Sponsored by:   Klara, Inc.
    Sponsored by:   Juniper Networks, Inc.
    Differential Revision:  https://reviews.freebsd.org/D38342
---
 sys/x86/include/pvclock.h |  1 +
 sys/x86/x86/pvclock.c     | 26 ++++++++++++++++++++++++--
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/sys/x86/include/pvclock.h b/sys/x86/include/pvclock.h
index 023acdb80d9c..1306f11fc960 100644
--- a/sys/x86/include/pvclock.h
+++ b/sys/x86/include/pvclock.h
@@ -121,6 +121,7 @@ struct pvclock {
 
 	/* Private; initialized by the 'pvclock' API: */
 	bool				 vdso_force_unstable;
+	bool				 vdso_enable_without_rdtscp;
 	struct timecounter		 tc;
 	struct cdev			*cdev;
 };
diff --git a/sys/x86/x86/pvclock.c b/sys/x86/x86/pvclock.c
index cc2377bdbcf0..3da3373bb2ee 100644
--- a/sys/x86/x86/pvclock.c
+++ b/sys/x86/x86/pvclock.c
@@ -224,6 +224,9 @@ pvclock_tc_vdso_timehands(struct vdso_timehands *vdso_th,
 {
 	struct pvclock *pvc = tc->tc_priv;
 
+	if (pvc->cdev == NULL)
+		return (0);
+
 	vdso_th->th_algo = VDSO_TH_ALGO_X86_PVCLK;
 	vdso_th->th_x86_shift = 0;
 	vdso_th->th_x86_hpet_idx = 0;
@@ -232,7 +235,9 @@ pvclock_tc_vdso_timehands(struct vdso_timehands *vdso_th,
 	vdso_th->th_x86_pvc_stable_mask = !pvc->vdso_force_unstable &&
 	    pvc->stable_flag_supported ? PVCLOCK_FLAG_TSC_STABLE : 0;
 	bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
-	return (pvc->cdev != NULL && amd_feature & AMDID_RDTSCP);
+	return ((amd_feature & AMDID_RDTSCP) != 0 ||
+	    ((vdso_th->th_x86_pvc_stable_mask & PVCLOCK_FLAG_TSC_STABLE) != 0 &&
+	    pvc->vdso_enable_without_rdtscp));
 }
 
 #ifdef COMPAT_FREEBSD32
@@ -242,6 +247,9 @@ pvclock_tc_vdso_timehands32(struct vdso_timehands32 *vdso_th,
 {
 	struct pvclock *pvc = tc->tc_priv;
 
+	if (pvc->cdev == NULL)
+		return (0);
+
 	vdso_th->th_algo = VDSO_TH_ALGO_X86_PVCLK;
 	vdso_th->th_x86_shift = 0;
 	vdso_th->th_x86_hpet_idx = 0;
@@ -250,7 +258,9 @@ pvclock_tc_vdso_timehands32(struct vdso_timehands32 *vdso_th,
 	vdso_th->th_x86_pvc_stable_mask = !pvc->vdso_force_unstable &&
 	    pvc->stable_flag_supported ? PVCLOCK_FLAG_TSC_STABLE : 0;
 	bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
-	return (pvc->cdev != NULL && amd_feature & AMDID_RDTSCP);
+	return ((amd_feature & AMDID_RDTSCP) != 0 ||
+	    ((vdso_th->th_x86_pvc_stable_mask & PVCLOCK_FLAG_TSC_STABLE) != 0 &&
+	    pvc->vdso_enable_without_rdtscp));
 }
 #endif
 
@@ -284,6 +294,18 @@ pvclock_init(struct pvclock *pvc, device_t dev, const char *tc_name,
 	    "vdso_force_unstable", CTLFLAG_RW, &pvc->vdso_force_unstable, 0,
 	    "Forcibly deassert stable flag in vDSO codepath");
 
+	/*
+	 * Make it possible to use the vDSO page even when the hypervisor does
+	 * not support the rdtscp instruction.  This is disabled by default for
+	 * compatibility with old libc.
+	 */
+	pvc->vdso_enable_without_rdtscp = false;
+	SYSCTL_ADD_BOOL(device_get_sysctl_ctx(dev),
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
+	    "vdso_enable_without_rdtscp", CTLFLAG_RWTUN,
+	    &pvc->vdso_enable_without_rdtscp, 0,
+	    "Allow the use of a vDSO when rdtscp is not available");
+
 	/* Set up timecounter and timecounter-supporting members: */
 	pvc->tc.tc_get_timecount = pvclock_tc_get_timecount;
 	pvc->tc.tc_poll_pps = NULL;