Use fences for kernel tsc reads.
Konstantin Belousov
kostikbel at gmail.com
Sat Jul 28 16:02:08 UTC 2012
Hi,
This was discussed on somewhat unwieldly thread on svn-src@ as a followup
to the commit r238755 which uncovered the problem in the first place.
Below is the commit candidate. It changes the fix in r238755 to use CPUID
instead of rmb(), since rmb() translates to locked operation on i386,
and experimentation shown that Nehalem's RDTSC can pass LOCK.
Also, I explicitely use LFENCE and MFENCE instead using rmb() and mb(),
for the same reason that on i386 implementation of *mb() is a locked
operation instead of fence.
Jim, could you please, recheck that on your machine there is no regression
in the TSC synchronization test ?
Handling of usermode will follow later.
diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
index 94d4133..881fcd2 100644
--- a/sys/amd64/include/cpufunc.h
+++ b/sys/amd64/include/cpufunc.h
@@ -290,6 +290,13 @@ popcntq(u_long mask)
}
static __inline void
+lfence(void)
+{
+
+ __asm __volatile("lfence" : : : "memory");
+}
+
+static __inline void
mfence(void)
{
diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h
index 62d268d..7cd3663 100644
--- a/sys/i386/include/cpufunc.h
+++ b/sys/i386/include/cpufunc.h
@@ -155,6 +155,13 @@ cpu_mwait(u_long extensions, u_int hints)
}
static __inline void
+lfence(void)
+{
+
+ __asm __volatile("lfence" : : : "memory");
+}
+
+static __inline void
mfence(void)
{
diff --git a/sys/x86/x86/tsc.c b/sys/x86/x86/tsc.c
index c253a96..101cbb3 100644
--- a/sys/x86/x86/tsc.c
+++ b/sys/x86/x86/tsc.c
@@ -83,6 +83,10 @@ static void tsc_freq_changing(void *arg, const struct cf_level *level,
int *status);
static unsigned tsc_get_timecount(struct timecounter *tc);
static unsigned tsc_get_timecount_low(struct timecounter *tc);
+static unsigned tsc_get_timecount_lfence(struct timecounter *tc);
+static unsigned tsc_get_timecount_low_lfence(struct timecounter *tc);
+static unsigned tsc_get_timecount_mfence(struct timecounter *tc);
+static unsigned tsc_get_timecount_low_mfence(struct timecounter *tc);
static void tsc_levels_changed(void *arg, int unit);
static struct timecounter tsc_timecounter = {
@@ -262,6 +266,10 @@ probe_tsc_freq(void)
(vm_guest == VM_GUEST_NO &&
CPUID_TO_FAMILY(cpu_id) >= 0x10))
tsc_is_invariant = 1;
+ if (cpu_feature & CPUID_SSE2) {
+ tsc_timecounter.tc_get_timecount =
+ tsc_get_timecount_mfence;
+ }
break;
case CPU_VENDOR_INTEL:
if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
@@ -271,6 +279,10 @@ probe_tsc_freq(void)
(CPUID_TO_FAMILY(cpu_id) == 0xf &&
CPUID_TO_MODEL(cpu_id) >= 0x3))))
tsc_is_invariant = 1;
+ if (cpu_feature & CPUID_SSE2) {
+ tsc_timecounter.tc_get_timecount =
+ tsc_get_timecount_lfence;
+ }
break;
case CPU_VENDOR_CENTAUR:
if (vm_guest == VM_GUEST_NO &&
@@ -278,6 +290,10 @@ probe_tsc_freq(void)
CPUID_TO_MODEL(cpu_id) >= 0xf &&
(rdmsr(0x1203) & 0x100000000ULL) == 0)
tsc_is_invariant = 1;
+ if (cpu_feature & CPUID_SSE2) {
+ tsc_timecounter.tc_get_timecount =
+ tsc_get_timecount_lfence;
+ }
break;
}
@@ -328,15 +344,26 @@ init_TSC(void)
#ifdef SMP
-/* rmb is required here because rdtsc is not a serializing instruction. */
+/*
+ * RDTSC is not a serializing instruction, so we need to drain
+ * instruction stream before executing it. It could be fixed by use of
+ * RDTSCP, except the instruction is not available everywhere.
+ *
+ * Use CPUID for draining. The timecounters use MFENCE for AMD CPUs,
+ * and LFENCE for others (Intel and VIA) when SSE2 is present, and
+ * nothing on older machines which also do not issue RDTSC
+ * prematurely. There, testing for SSE2 and vendor is too cumbersome,
+ * and we learn about TSC presence from CPUID.
+ */
#define TSC_READ(x) \
static void \
tsc_read_##x(void *arg) \
{ \
uint32_t *tsc = arg; \
+ u_int p[4]; \
u_int cpu = PCPU_GET(cpuid); \
\
- rmb(); \
+ do_cpuid(0, p); \
tsc[cpu * 3 + x] = rdtsc32(); \
}
TSC_READ(0)
@@ -487,7 +514,16 @@ init:
for (shift = 0; shift < 31 && (tsc_freq >> shift) > max_freq; shift++)
;
if (shift > 0) {
- tsc_timecounter.tc_get_timecount = tsc_get_timecount_low;
+ if (cpu_feature & CPUID_SSE2) {
+ if (cpu_vendor_id == CPU_VENDOR_AMD) {
+ tsc_timecounter.tc_get_timecount =
+ tsc_get_timecount_low_mfence;
+ } else {
+ tsc_timecounter.tc_get_timecount =
+ tsc_get_timecount_low_lfence;
+ }
+ } else
+ tsc_timecounter.tc_get_timecount = tsc_get_timecount_low;
tsc_timecounter.tc_name = "TSC-low";
if (bootverbose)
printf("TSC timecounter discards lower %d bit(s)\n",
@@ -592,23 +628,55 @@ sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW,
0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency");
-static u_int
+static inline u_int
tsc_get_timecount(struct timecounter *tc __unused)
{
return (rdtsc32());
}
-static u_int
+static inline u_int
tsc_get_timecount_low(struct timecounter *tc)
{
uint32_t rv;
__asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
- : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx");
+ : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx");
return (rv);
}
+static inline u_int
+tsc_get_timecount_lfence(struct timecounter *tc __unused)
+{
+
+ lfence();
+ return (rdtsc32());
+}
+
+static inline u_int
+tsc_get_timecount_low_lfence(struct timecounter *tc)
+{
+
+ lfence();
+ return (tsc_get_timecount_low(tc));
+}
+
+static inline u_int
+tsc_get_timecount_mfence(struct timecounter *tc __unused)
+{
+
+ mfence();
+ return (rdtsc32());
+}
+
+static inline u_int
+tsc_get_timecount_low_mfence(struct timecounter *tc)
+{
+
+ mfence();
+ return (tsc_get_timecount_low(tc));
+}
+
uint32_t
cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th)
{
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 196 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-amd64/attachments/20120728/a50d0ca1/attachment.pgp
More information about the freebsd-amd64
mailing list