git: d4b2d3035a23 - main - pvclock: Add vDSO support
Konstantin Belousov
kib at FreeBSD.org
Sat Aug 14 12:58:16 UTC 2021
The branch main has been updated by kib:
URL: https://cgit.FreeBSD.org/src/commit/?id=d4b2d3035a23d5dc468d41151487a8299bf45cdc
commit d4b2d3035a23d5dc468d41151487a8299bf45cdc
Author: Adam Fenn <adam at fenn.io>
AuthorDate: 2021-08-07 20:10:04 +0000
Commit: Konstantin Belousov <kib at FreeBSD.org>
CommitDate: 2021-08-14 12:57:54 +0000
pvclock: Add vDSO support
Add vDSO support for timekeeping devices that support the KVM/XEN
paravirtual clock API.
Also, expose, in the userspace-accessible '<machine/pvclock.h>',
definitions that will be needed by 'libc' to support
'VDSO_TH_ALGO_X86_PVCLK'.
Sponsored by: Juniper Networks, Inc.
Sponsored by: Klara, Inc.
Reviewed by: kib
Differential Revision: https://reviews.freebsd.org/D31418
---
sys/dev/acpica/acpi_hpet.c | 4 +
sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c | 2 +
sys/x86/include/pvclock.h | 62 +++++++++++
sys/x86/include/vdso.h | 5 +-
sys/x86/x86/pvclock.c | 165 +++++++++++++++++++---------
sys/x86/x86/tsc.c | 4 +
6 files changed, 188 insertions(+), 54 deletions(-)
diff --git a/sys/dev/acpica/acpi_hpet.c b/sys/dev/acpica/acpi_hpet.c
index 9f92521437fd..0f0a16f336f2 100644
--- a/sys/dev/acpica/acpi_hpet.c
+++ b/sys/dev/acpica/acpi_hpet.c
@@ -156,6 +156,8 @@ hpet_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc)
vdso_th->th_algo = VDSO_TH_ALGO_X86_HPET;
vdso_th->th_x86_shift = 0;
vdso_th->th_x86_hpet_idx = device_get_unit(sc->dev);
+ vdso_th->th_x86_pvc_last_systime = 0;
+ vdso_th->th_x86_pvc_stable_mask = 0;
bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
return (sc->mmap_allow != 0);
}
@@ -171,6 +173,8 @@ hpet_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
vdso_th32->th_algo = VDSO_TH_ALGO_X86_HPET;
vdso_th32->th_x86_shift = 0;
vdso_th32->th_x86_hpet_idx = device_get_unit(sc->dev);
+ vdso_th32->th_x86_pvc_last_systime = 0;
+ vdso_th32->th_x86_pvc_stable_mask = 0;
bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res));
return (sc->mmap_allow != 0);
}
diff --git a/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c b/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c
index c08098138805..11d549dc18d2 100644
--- a/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c
+++ b/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c
@@ -128,6 +128,8 @@ hyperv_tsc_vdso_timehands(struct vdso_timehands *vdso_th,
vdso_th->th_algo = VDSO_TH_ALGO_X86_HVTSC;
vdso_th->th_x86_shift = 0;
vdso_th->th_x86_hpet_idx = 0;
+ vdso_th->th_x86_pvc_last_systime = 0;
+ vdso_th->th_x86_pvc_stable_mask = 0;
bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
return (1);
}
diff --git a/sys/x86/include/pvclock.h b/sys/x86/include/pvclock.h
index 399017039dd0..023acdb80d9c 100644
--- a/sys/x86/include/pvclock.h
+++ b/sys/x86/include/pvclock.h
@@ -30,7 +30,12 @@
#define X86_PVCLOCK
#include <sys/types.h>
+
+#ifdef _KERNEL
#include <sys/timetc.h>
+#endif /* _KERNEL */
+
+#define PVCLOCK_CDEVNAME "pvclock"
struct pvclock_vcpu_time_info {
uint32_t version;
@@ -46,6 +51,59 @@ struct pvclock_vcpu_time_info {
#define PVCLOCK_FLAG_TSC_STABLE 0x01
#define PVCLOCK_FLAG_GUEST_PASUED 0x02
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline uint64_t
+pvclock_scale_delta(uint64_t delta, uint32_t mul_frac, int shift)
+{
+ uint64_t product;
+
+ if (shift < 0)
+ delta >>= -shift;
+ else
+ delta <<= shift;
+#if defined(__i386__)
+ {
+ uint32_t tmp1, tmp2;
+
+ /**
+ * For i386, the formula looks like:
+ *
+ * lower = (mul_frac * (delta & UINT_MAX)) >> 32
+ * upper = mul_frac * (delta >> 32)
+ * product = lower + upper
+ */
+ __asm__ (
+ "mul %5 ; "
+ "mov %4,%%eax ; "
+ "mov %%edx,%4 ; "
+ "mul %5 ; "
+ "xor %5,%5 ; "
+ "add %4,%%eax ; "
+ "adc %5,%%edx ; "
+ : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+ : "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)),
+ "2" (mul_frac) );
+ }
+#elif defined(__amd64__)
+ {
+ unsigned long tmp;
+
+ __asm__ (
+ "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]"
+ : [lo]"=a" (product), [hi]"=d" (tmp)
+ : "0" (delta), [mul_frac]"rm"((uint64_t)mul_frac));
+ }
+#else
+#error "pvclock: unsupported x86 architecture?"
+#endif
+ return (product);
+}
+
+#ifdef _KERNEL
+
typedef struct pvclock_wall_clock *pvclock_get_wallclock_t(void *arg);
struct pvclock_wall_clock {
@@ -62,7 +120,9 @@ struct pvclock {
bool stable_flag_supported;
/* Private; initialized by the 'pvclock' API: */
+ bool vdso_force_unstable;
struct timecounter tc;
+ struct cdev *cdev;
};
/*
@@ -81,4 +141,6 @@ void pvclock_init(struct pvclock *pvc, device_t dev,
void pvclock_gettime(struct pvclock *pvc, struct timespec *ts);
int pvclock_destroy(struct pvclock *pvc);
+#endif /* _KERNEL */
+
#endif
diff --git a/sys/x86/include/vdso.h b/sys/x86/include/vdso.h
index 97972c660dde..ace63cbe9f62 100644
--- a/sys/x86/include/vdso.h
+++ b/sys/x86/include/vdso.h
@@ -37,11 +37,14 @@
#define VDSO_TIMEHANDS_MD \
uint32_t th_x86_shift; \
uint32_t th_x86_hpet_idx; \
- uint32_t th_res[6];
+ uint64_t th_x86_pvc_last_systime;\
+ uint8_t th_x86_pvc_stable_mask; \
+ uint8_t th_res[15];
#define VDSO_TH_ALGO_X86_TSC VDSO_TH_ALGO_1
#define VDSO_TH_ALGO_X86_HPET VDSO_TH_ALGO_2
#define VDSO_TH_ALGO_X86_HVTSC VDSO_TH_ALGO_3 /* Hyper-V ref. TSC */
+#define VDSO_TH_ALGO_X86_PVCLK VDSO_TH_ALGO_4 /* KVM/XEN paravirtual clock */
#ifdef _KERNEL
#ifdef COMPAT_FREEBSD32
diff --git a/sys/x86/x86/pvclock.c b/sys/x86/x86/pvclock.c
index e0ad65d906b8..cc2377bdbcf0 100644
--- a/sys/x86/x86/pvclock.c
+++ b/sys/x86/x86/pvclock.c
@@ -31,11 +31,22 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/bus.h>
#include <sys/clock.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
#include <sys/limits.h>
+#include <sys/mman.h>
#include <sys/proc.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/vdso.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
#include <machine/atomic.h>
+#include <machine/cpufunc.h>
#include <machine/md_var.h>
#include <machine/pvclock.h>
@@ -54,6 +65,22 @@ static void pvclock_read_time_info(
static void pvclock_read_wall_clock(struct pvclock_wall_clock *wc,
struct timespec *ts);
static u_int pvclock_tc_get_timecount(struct timecounter *tc);
+static uint32_t pvclock_tc_vdso_timehands(
+ struct vdso_timehands *vdso_th, struct timecounter *tc);
+#ifdef COMPAT_FREEBSD32
+static uint32_t pvclock_tc_vdso_timehands32(
+ struct vdso_timehands32 *vdso_th, struct timecounter *tc);
+#endif
+
+static d_open_t pvclock_cdev_open;
+static d_mmap_t pvclock_cdev_mmap;
+
+static struct cdevsw pvclock_cdev_cdevsw = {
+ .d_version = D_VERSION,
+ .d_name = PVCLOCK_CDEVNAME,
+ .d_open = pvclock_cdev_open,
+ .d_mmap = pvclock_cdev_mmap,
+};
void
pvclock_resume(void)
@@ -74,57 +101,6 @@ pvclock_tsc_freq(struct pvclock_vcpu_time_info *ti)
return (freq);
}
-/*
- * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
- * yielding a 64-bit result.
- */
-static inline uint64_t
-pvclock_scale_delta(uint64_t delta, uint32_t mul_frac, int shift)
-{
- uint64_t product;
-
- if (shift < 0)
- delta >>= -shift;
- else
- delta <<= shift;
-#if defined(__i386__)
- {
- uint32_t tmp1, tmp2;
-
- /**
- * For i386, the formula looks like:
- *
- * lower = (mul_frac * (delta & UINT_MAX)) >> 32
- * upper = mul_frac * (delta >> 32)
- * product = lower + upper
- */
- __asm__ (
- "mul %5 ; "
- "mov %4,%%eax ; "
- "mov %%edx,%4 ; "
- "mul %5 ; "
- "xor %5,%5 ; "
- "add %4,%%eax ; "
- "adc %5,%%edx ; "
- : "=A" (product), "=r" (tmp1), "=r" (tmp2)
- : "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)),
- "2" (mul_frac) );
- }
-#elif defined(__amd64__)
- {
- unsigned long tmp;
-
- __asm__ (
- "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]"
- : [lo]"=a" (product), [hi]"=d" (tmp)
- : "0" (delta), [mul_frac]"rm"((uint64_t)mul_frac));
- }
-#else
-#error "pvclock: unsupported x86 architecture?"
-#endif
- return (product);
-}
-
static void
pvclock_read_time_info(struct pvclock_vcpu_time_info *ti,
uint64_t *ns, uint8_t *flags)
@@ -213,6 +189,27 @@ pvclock_get_wallclock(struct pvclock_wall_clock *wc, struct timespec *ts)
pvclock_read_wall_clock(wc, ts);
}
+static int
+pvclock_cdev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+ if (oflags & FWRITE)
+ return (EPERM);
+ return (0);
+}
+
+static int
+pvclock_cdev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
+ int nprot, vm_memattr_t *memattr)
+{
+ if (offset >= mp_ncpus * sizeof(struct pvclock_vcpu_time_info))
+ return (EINVAL);
+ if (PROT_EXTRACT(nprot) != PROT_READ)
+ return (EACCES);
+ *paddr = vtophys((uintptr_t)dev->si_drv1 + offset);
+ *memattr = VM_MEMATTR_DEFAULT;
+ return (0);
+}
+
static u_int
pvclock_tc_get_timecount(struct timecounter *tc)
{
@@ -221,6 +218,42 @@ pvclock_tc_get_timecount(struct timecounter *tc)
return (pvclock_getsystime(pvc) & UINT_MAX);
}
+static uint32_t
+pvclock_tc_vdso_timehands(struct vdso_timehands *vdso_th,
+ struct timecounter *tc)
+{
+ struct pvclock *pvc = tc->tc_priv;
+
+ vdso_th->th_algo = VDSO_TH_ALGO_X86_PVCLK;
+ vdso_th->th_x86_shift = 0;
+ vdso_th->th_x86_hpet_idx = 0;
+ vdso_th->th_x86_pvc_last_systime =
+ atomic_load_acq_64(&pvclock_last_systime);
+ vdso_th->th_x86_pvc_stable_mask = !pvc->vdso_force_unstable &&
+ pvc->stable_flag_supported ? PVCLOCK_FLAG_TSC_STABLE : 0;
+ bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
+ return (pvc->cdev != NULL && amd_feature & AMDID_RDTSCP);
+}
+
+#ifdef COMPAT_FREEBSD32
+static uint32_t
+pvclock_tc_vdso_timehands32(struct vdso_timehands32 *vdso_th,
+ struct timecounter *tc)
+{
+ struct pvclock *pvc = tc->tc_priv;
+
+ vdso_th->th_algo = VDSO_TH_ALGO_X86_PVCLK;
+ vdso_th->th_x86_shift = 0;
+ vdso_th->th_x86_hpet_idx = 0;
+ vdso_th->th_x86_pvc_last_systime =
+ atomic_load_acq_64(&pvclock_last_systime);
+ vdso_th->th_x86_pvc_stable_mask = !pvc->vdso_force_unstable &&
+ pvc->stable_flag_supported ? PVCLOCK_FLAG_TSC_STABLE : 0;
+ bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
+ return (pvc->cdev != NULL && amd_feature & AMDID_RDTSCP);
+}
+#endif
+
void
pvclock_gettime(struct pvclock *pvc, struct timespec *ts)
{
@@ -238,9 +271,19 @@ void
pvclock_init(struct pvclock *pvc, device_t dev, const char *tc_name,
int tc_quality, u_int tc_flags)
{
+ struct make_dev_args mda;
+ int err;
+
KASSERT(((uintptr_t)pvc->timeinfos & PAGE_MASK) == 0,
("Specified time info page(s) address is not page-aligned."));
+ /* Set up vDSO stable-flag suppression test facility: */
+ pvc->vdso_force_unstable = false;
+ SYSCTL_ADD_BOOL(device_get_sysctl_ctx(dev),
+ SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
+ "vdso_force_unstable", CTLFLAG_RW, &pvc->vdso_force_unstable, 0,
+ "Forcibly deassert stable flag in vDSO codepath");
+
/* Set up timecounter and timecounter-supporting members: */
pvc->tc.tc_get_timecount = pvclock_tc_get_timecount;
pvc->tc.tc_poll_pps = NULL;
@@ -250,11 +293,27 @@ pvclock_init(struct pvclock *pvc, device_t dev, const char *tc_name,
pvc->tc.tc_quality = tc_quality;
pvc->tc.tc_flags = tc_flags;
pvc->tc.tc_priv = pvc;
- pvc->tc.tc_fill_vdso_timehands = NULL;
+ pvc->tc.tc_fill_vdso_timehands = pvclock_tc_vdso_timehands;
#ifdef COMPAT_FREEBSD32
- pvc->tc.tc_fill_vdso_timehands32 = NULL;
+ pvc->tc.tc_fill_vdso_timehands32 = pvclock_tc_vdso_timehands32;
#endif
+ /* Set up cdev for userspace mmapping of vCPU 0 time info page: */
+ make_dev_args_init(&mda);
+ mda.mda_devsw = &pvclock_cdev_cdevsw;
+ mda.mda_uid = UID_ROOT;
+ mda.mda_gid = GID_WHEEL;
+ mda.mda_mode = 0444;
+ mda.mda_si_drv1 = pvc->timeinfos;
+ err = make_dev_s(&mda, &pvc->cdev, PVCLOCK_CDEVNAME);
+ if (err != 0) {
+ device_printf(dev, "Could not create /dev/%s, error %d. Fast "
+ "time of day will be unavailable for this timecounter.\n",
+ PVCLOCK_CDEVNAME, err);
+ KASSERT(pvc->cdev == NULL,
+ ("Failed make_dev_s() unexpectedly inited cdev."));
+ }
+
/* Register timecounter: */
tc_init(&pvc->tc);
diff --git a/sys/x86/x86/tsc.c b/sys/x86/x86/tsc.c
index 5ffbb64229e9..0ebcea895cd3 100644
--- a/sys/x86/x86/tsc.c
+++ b/sys/x86/x86/tsc.c
@@ -870,6 +870,8 @@ x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc)
vdso_th->th_algo = VDSO_TH_ALGO_X86_TSC;
vdso_th->th_x86_shift = (int)(intptr_t)tc->tc_priv;
vdso_th->th_x86_hpet_idx = 0xffffffff;
+ vdso_th->th_x86_pvc_last_systime = 0;
+ vdso_th->th_x86_pvc_stable_mask = 0;
bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
return (1);
}
@@ -883,6 +885,8 @@ x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
vdso_th32->th_algo = VDSO_TH_ALGO_X86_TSC;
vdso_th32->th_x86_shift = (int)(intptr_t)tc->tc_priv;
vdso_th32->th_x86_hpet_idx = 0xffffffff;
+ vdso_th32->th_x86_pvc_last_systime = 0;
+ vdso_th32->th_x86_pvc_stable_mask = 0;
bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res));
return (1);
}
More information about the dev-commits-src-all
mailing list