svn commit: r219679 - head/sys/i386/include
Jung-uk Kim
jkim at FreeBSD.org
Fri Mar 18 18:40:00 UTC 2011
On Friday 18 March 2011 01:19 am, Bruce Evans wrote:
> On Thu, 17 Mar 2011, Jung-uk Kim wrote:
> > On Thursday 17 March 2011 03:57 pm, Peter Jeremy wrote:
> >> On 2011-Mar-16 16:34:04 -0400, Jung-uk Kim <jkim at FreeBSD.org>
wrote:
> >>> On Wednesday 16 March 2011 01:45 pm, Roman Divacky wrote:
> >>>> if we drop i486 I think it makes sense to require something
> >>>> that has at least SSE2, thus we can have the same expectations
> >>>> as on amd64.
> >>
> >> I think it's stil a bit early for that - especially the SSE2
> >> requirement.
> >>
> >>> This is a proof-of-concept patch for sys/x86/isa/clock.c:
> >>>
> >>> http://people.freebsd.org/~jkim/clock.diff
> >>>
> >>> You see the complexity, just because I wanted to load 64-bit
> >>> value atomically... :-(
> >>
> >> An alternative approach is to have _fetch_frequency() be
> >> uint64_t (*_fetch_frequency)(uint64_t *);
> >> if i386 and I486 are defined (otherwise it's just the #define
> >> (*(p))) then initialise it to either atomic_fetch_quad_i386 or
> >> atomic_fetch_quad_i586 as part of the CPU detection process.
> >> This is the way bcopy() is/was handled on Pentium.
> >>
> >> Another approach would be to always have cmpxchg8b instructions
> >> (followed by a suitably large NOP) always inlined in the code
> >> and if it traps, patch the code to call a function that emulates
> >> it.
> >
> > I think the former makes more sense for atomic read/write because
> > we don't need complete cmpxchg8b support but kind of movq
> > support, actually.
>
> Both require a function call. With a function call, patching
> becomes much easier since there is only 1 place to patch, so
> patching is almost as easy as changing a function pointer (might
> need an instruction queue flush and/or prevention of the function
> being called before or while it is being patched).
>
> Patching the code also makes it easier to null out the lock prefix
> in the !SMP case when it is presumably not needed. The function
> call to a function without a lock prefix will then be faster than
> inline code with a lock prefix. With a function pointer, you start
> getting combinatorial explosion in the number of separate functions
> needed (1 without cmpxchg8b or a lock prefix (for i486), 1 with
> cmpxchg8b without a lock prefix (for !SMP i586+), and 1 with both
> (for SMP i586+).
I already implemented the function pointer thing last night. You can
see the current work-in-progress patch here:
http://people.freebsd.org/~jkim/tsc_cleanup.diff
Also, it's attached here as well. I didn't notice any problem so far
but I am sure you will find some. ;-)
Please note the patch includes get_cyclecount() to cpu_ticks()
conversion to give you a complete picture.
Jung-uk Kim
-------------- next part --------------
Index: sys/kern/kern_ktr.c
===================================================================
--- sys/kern/kern_ktr.c (revision 219741)
+++ sys/kern/kern_ktr.c (working copy)
@@ -73,7 +73,7 @@ __FBSDID("$FreeBSD$");
#endif
#ifndef KTR_TIME
-#define KTR_TIME get_cyclecount()
+#define KTR_TIME cpu_ticks()
#endif
#ifndef KTR_CPU
Index: sys/kern/init_main.c
===================================================================
--- sys/kern/init_main.c (revision 219741)
+++ sys/kern/init_main.c (working copy)
@@ -560,7 +560,7 @@ SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST,
static void
proc0_post(void *dummy __unused)
{
- struct timespec ts;
+ struct bintime bt;
struct proc *p;
struct rusage ru;
struct thread *td;
@@ -590,8 +590,8 @@ proc0_post(void *dummy __unused)
/*
* Give the ``random'' number generator a thump.
*/
- nanotime(&ts);
- srandom(ts.tv_sec ^ ts.tv_nsec);
+ bintime(&bt);
+ srandom(bt.sec ^ bt.frac);
}
SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL);
@@ -601,10 +601,10 @@ random_init(void *dummy __unused)
/*
* After CPU has been started we have some randomness on most
- * platforms via get_cyclecount(). For platforms that don't
- * we will reseed random(9) in proc0_post() as well.
+ * platforms via cpu_ticks(). For platforms that don't we will
+ * reseed random(9) in proc0_post() as well.
*/
- srandom(get_cyclecount());
+ srandom(cpu_ticks());
}
SYSINIT(random, SI_SUB_RANDOM, SI_ORDER_FIRST, random_init, NULL);
Index: sys/netinet/sctp_os_bsd.h
===================================================================
--- sys/netinet/sctp_os_bsd.h (revision 219741)
+++ sys/netinet/sctp_os_bsd.h (working copy)
@@ -129,7 +129,7 @@ MALLOC_DECLARE(SCTP_M_MCORE);
#if defined(SCTP_LOCAL_TRACE_BUF)
-#define SCTP_GET_CYCLECOUNT get_cyclecount()
+#define SCTP_GET_CYCLECOUNT cpu_ticks()
#define SCTP_CTR6 sctp_log_trace
#else
Index: sys/dev/acpica/acpi_cpu.c
===================================================================
--- sys/dev/acpica/acpi_cpu.c (revision 219741)
+++ sys/dev/acpica/acpi_cpu.c (working copy)
@@ -516,7 +516,7 @@ acpi_cpu_read_ivar(device_t dev, device_t child, i
#if defined(__amd64__) || defined(__i386__)
case CPU_IVAR_NOMINAL_MHZ:
if (tsc_is_invariant) {
- *result = (uintptr_t)(tsc_freq / 1000000);
+ *result = (uintptr_t)(GET_TSC_FREQ() / 1000000);
break;
}
/* FALLTHROUGH */
Index: sys/dev/de/if_devar.h
===================================================================
--- sys/dev/de/if_devar.h (revision 219741)
+++ sys/dev/de/if_devar.h (working copy)
@@ -903,7 +903,7 @@ typedef u_long tulip_cycle_t;
static __inline tulip_cycle_t
TULIP_PERFREAD(void)
{
- return (get_cyclecount());
+ return (cpu_ticks());
}
#define TULIP_PERFDIFF(s, f) ((f) - (s))
Index: sys/dev/random/randomdev_soft.c
===================================================================
--- sys/dev/random/randomdev_soft.c (revision 219741)
+++ sys/dev/random/randomdev_soft.c (working copy)
@@ -353,8 +353,8 @@ random_yarrow_write(void *buf, int count)
chunk = HARVESTSIZE;
if (i + chunk >= count)
chunk = (u_int)(count - i);
- random_harvest_internal(get_cyclecount(), (char *)buf + i,
- chunk, 0, 0, RANDOM_WRITE);
+ random_harvest_internal(cpu_ticks(), (char *)buf + i, chunk,
+ 0, 0, RANDOM_WRITE);
}
}
Index: sys/dev/random/harvest.c
===================================================================
--- sys/dev/random/harvest.c (revision 219741)
+++ sys/dev/random/harvest.c (working copy)
@@ -78,17 +78,16 @@ random_yarrow_deinit_harvester(void)
* Implemented as in indirect call to allow non-inclusion of
* the entropy device.
*
- * XXXRW: get_cyclecount() is cheap on most modern hardware, where cycle
- * counters are built in, but on older hardware it will do a real time clock
- * read which can be quite expensive.
+ * XXXRW: cpu_ticks() is cheap on most modern hardware, where cycle counters
+ * are built in, but on older hardware it will do a real time clock read
+ * which can be quite expensive.
*/
void
random_harvest(void *entropy, u_int count, u_int bits, u_int frac,
enum esource origin)
{
if (reap_func)
- (*reap_func)(get_cyclecount(), entropy, count, bits, frac,
- origin);
+ (*reap_func)(cpu_ticks(), entropy, count, bits, frac, origin);
}
/* Userland-visible version of read_random */
Index: sys/compat/linprocfs/linprocfs.c
===================================================================
--- sys/compat/linprocfs/linprocfs.c (revision 219741)
+++ sys/compat/linprocfs/linprocfs.c (working copy)
@@ -221,6 +221,7 @@ linprocfs_docpuinfo(PFS_FILL_ARGS)
{
int hw_model[2];
char model[128];
+ uint64_t freq;
size_t size;
int class, fqmhz, fqkhz;
int i;
@@ -303,9 +304,10 @@ linprocfs_docpuinfo(PFS_FILL_ARGS)
if (cpu_feature & (1 << i))
sbuf_printf(sb, " %s", flags[i]);
sbuf_cat(sb, "\n");
- if (class >= 5) {
- fqmhz = (tsc_freq + 4999) / 1000000;
- fqkhz = ((tsc_freq + 4999) / 10000) % 100;
+ freq = GET_TSC_FREQ();
+ if (class >=5 && freq != 0) {
+ fqmhz = (freq + 4999) / 1000000;
+ fqkhz = ((freq + 4999) / 10000) % 100;
sbuf_printf(sb,
"cpu MHz\t\t: %d.%02d\n"
"bogomips\t: %d.%02d\n",
Index: sys/pc98/pc98/machdep.c
===================================================================
--- sys/pc98/pc98/machdep.c (revision 219741)
+++ sys/pc98/pc98/machdep.c (working copy)
@@ -1072,16 +1072,17 @@ int
cpu_est_clockrate(int cpu_id, uint64_t *rate)
{
register_t reg;
- uint64_t tsc1, tsc2;
+ uint64_t freq, tsc1, tsc2;
if (pcpu_find(cpu_id) == NULL || rate == NULL)
return (EINVAL);
if ((cpu_feature & CPUID_TSC) == 0)
return (EOPNOTSUPP);
+ freq = GET_TSC_FREQ();
/* If we're booting, trust the rate calibrated moments ago. */
- if (cold && tsc_freq != 0) {
- *rate = tsc_freq;
+ if (cold && freq != 0) {
+ *rate = freq;
return (0);
}
@@ -1109,17 +1110,7 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
}
#endif
- tsc2 -= tsc1;
- if (tsc_freq != 0) {
- *rate = tsc2 * 1000;
- return (0);
- }
-
- /*
- * Subtract 0.5% of the total. Empirical testing has shown that
- * overhead in DELAY() works out to approximately this value.
- */
- *rate = tsc2 * 1000 - tsc2 * 5;
+ *rate = (tsc2 - tsc1) * 1000;
return (0);
}
Index: sys/x86/cpufreq/est.c
===================================================================
--- sys/x86/cpufreq/est.c (revision 219741)
+++ sys/x86/cpufreq/est.c (working copy)
@@ -1215,7 +1215,7 @@ est_msr_info(device_t dev, uint64_t msr, freq_info
return (EOPNOTSUPP);
/* Figure out the bus clock. */
- freq = tsc_freq / 1000000;
+ freq = GET_TSC_FREQ() / 1000000;
id = msr >> 32;
bus = freq / (id >> 8);
device_printf(dev, "Guessed bus clock (high) of %d MHz\n", bus);
Index: sys/x86/x86/tsc.c
===================================================================
--- sys/x86/x86/tsc.c (revision 219741)
+++ sys/x86/x86/tsc.c (working copy)
@@ -245,14 +245,16 @@ tsc_freq_changing(void *arg, const struct cf_level
static void
tsc_freq_changed(void *arg, const struct cf_level *level, int status)
{
+ uint64_t freq;
/* If there was an error during the transition, don't do anything. */
if (tsc_disabled || status != 0)
return;
/* Total setting for this level gives the new frequency in MHz. */
- tsc_freq = (uint64_t)level->total_set.freq * 1000000;
- tsc_timecounter.tc_frequency = tsc_freq;
+ freq = (uint64_t)level->total_set.freq * 1000000;
+ SET_TSC_FREQ(freq);
+ atomic_store_64(&tsc_timecounter.tc_frequency, freq);
}
static int
@@ -261,13 +263,13 @@ sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
int error;
uint64_t freq;
- if (tsc_timecounter.tc_frequency == 0)
+ freq = GET_TSC_FREQ();
+ if (freq == 0)
return (EOPNOTSUPP);
- freq = tsc_freq;
error = sysctl_handle_64(oidp, &freq, 0, req);
if (error == 0 && req->newptr != NULL) {
- tsc_freq = freq;
- tsc_timecounter.tc_frequency = tsc_freq;
+ SET_TSC_FREQ(freq);
+ atomic_store_64(&tsc_timecounter.tc_frequency, freq);
}
return (error);
}
Index: sys/x86/isa/clock.c
===================================================================
--- sys/x86/isa/clock.c (revision 219741)
+++ sys/x86/isa/clock.c (working copy)
@@ -245,40 +245,43 @@ getit(void)
return ((high << 8) | low);
}
-static __inline void
-delay_tsc(int n)
+static __inline int
+_delay(int n)
{
- uint64_t start, end, now;
-
- sched_pin();
- start = rdtsc();
- end = start + (tsc_freq * n) / 1000000;
- do {
- cpu_spinwait();
- now = rdtsc();
- } while (now < end || (now > start && end < start));
- sched_unpin();
-}
-
-static __inline void
-delay_timecounter(struct timecounter *tc, int n)
-{
- uint64_t end, now;
+ struct timecounter *tc;
+ uint64_t end, freq, now;
u_int last, mask, u;
+ int use_tsc;
- mask = tc->tc_counter_mask;
- last = tc->tc_get_timecount(tc) & mask;
- end = tc->tc_frequency * n / 1000000;
+ tc = timecounter;
+ freq = GET_TSC_FREQ();
+ use_tsc = tsc_is_invariant && freq != 0;
+ if (use_tsc) {
+ mask = ~0u;
+ sched_pin();
+ last = rdtsc();
+ } else {
+ if (tc->tc_quality <= 0)
+ return (0);
+ freq = atomic_load_64(&tc->tc_frequency);
+ mask = tc->tc_counter_mask;
+ last = tc->tc_get_timecount(tc);
+ }
+ last &= mask;
+ end = freq * n / 1000000;
now = 0;
do {
cpu_spinwait();
- u = tc->tc_get_timecount(tc) & mask;
+ u = (use_tsc ? rdtsc() : tc->tc_get_timecount(tc)) & mask;
if (u < last)
now += mask - last + u + 1;
else
now += u - last;
last = u;
} while (now < end);
+ if (use_tsc)
+ sched_unpin();
+ return (1);
}
/*
@@ -289,7 +292,6 @@ getit(void)
void
DELAY(int n)
{
- struct timecounter *tc;
int delta, prev_tick, tick, ticks_left;
#ifdef DELAYDEBUG
@@ -298,15 +300,8 @@ DELAY(int n)
static int state = 0;
#endif
- if (tsc_freq != 0) {
- delay_tsc(n);
+ if (_delay(n))
return;
- }
- tc = timecounter;
- if (tc->tc_quality > 0) {
- delay_timecounter(tc, n);
- return;
- }
#ifdef DELAYDEBUG
if (state == 0) {
state = 1;
Index: sys/i386/include/clock.h
===================================================================
--- sys/i386/include/clock.h (revision 219741)
+++ sys/i386/include/clock.h (working copy)
@@ -20,6 +20,9 @@ extern int i8254_max_count;
extern uint64_t tsc_freq;
extern int tsc_is_invariant;
+#define GET_TSC_FREQ() atomic_load_64(&tsc_freq)
+#define SET_TSC_FREQ(f) atomic_store_64(&tsc_freq, (f))
+
void i8254_init(void);
/*
Index: sys/i386/include/atomic.h
===================================================================
--- sys/i386/include/atomic.h (revision 219741)
+++ sys/i386/include/atomic.h (working copy)
@@ -120,6 +120,76 @@ atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p,
} \
struct __hack
+#if defined(_KERNEL) && !defined(WANT_FUNCTIONS)
+
+/* I486 does not support SMP or CMPXCHG8B. */
+static __inline uint64_t
+atomic_load_64_i386(uint64_t *p)
+{
+ uint64_t v;
+
+ __asm __volatile(
+ " pushfl ; "
+ " cli ; "
+ " movl (%1),%%eax ; "
+ " movl 4(%1),%%edx ; "
+ " popfl"
+ : "=A" (v) /* 0 */
+ : "c" (p)); /* 1 */
+ return (v);
+}
+
+static __inline void
+atomic_store_64_i386(uint64_t *p, uint64_t v)
+{
+
+ __asm __volatile(
+ " pushfl ; "
+ " cli ; "
+ " movl %%eax,(%0) ; "
+ " movl %%edx,4(%0) ; "
+ " popfl"
+ :
+ : "r" (p), /* 0 */
+ "A" (v) /* 1 */
+ : "memory");
+}
+
+/* For Pentium and above, use CMPXCHG8B to emulate MOVQ. */
+static __inline uint64_t
+atomic_load_64_i586(uint64_t *p)
+{
+ uint64_t v;
+
+ __asm __volatile(
+ " movl %%ebx,%%eax ; "
+ " movl %%ecx,%%edx ; "
+ " " MPLOCKED " "
+ " cmpxchg8b (%1)"
+ : "=A" (v) /* 0 */
+ : "c" (p) /* 1 */
+ : "cc");
+ return (v);
+}
+
+static __inline void
+atomic_store_64_i586(uint64_t *p, uint64_t v)
+{
+
+ __asm __volatile(
+ " movl %%eax,%%ebx ; "
+ " movl %%edx,%%ecx ; "
+ "1: "
+ " cmpxchg8b (%0) ; "
+ " jne 1b"
+ :
+ : "r" (p), /* 0 */
+ "A" (v) /* 1 */
+ : "ebx", "ecx", "memory", "cc");
+}
+
+#endif /* _KERNEL && !WANT_FUNCTIONS */
+
/*
* Atomic compare and set, used by the mutex functions
*
@@ -292,6 +362,11 @@ ATOMIC_STORE_LOAD(long, "cmpxchgl %0,%1", "xchgl
#ifndef WANT_FUNCTIONS
+#ifdef _KERNEL
+extern uint64_t (*atomic_load_64)(uint64_t *);
+extern void (*atomic_store_64)(uint64_t *, uint64_t);
+#endif
+
static __inline int
atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
{
Index: sys/i386/include/cpu.h
===================================================================
--- sys/i386/include/cpu.h (revision 219741)
+++ sys/i386/include/cpu.h (working copy)
@@ -39,7 +39,6 @@
/*
* Definitions unique to i386 cpu support.
*/
-#include <machine/cputypes.h>
#include <machine/psl.h>
#include <machine/frame.h>
#include <machine/segments.h>
@@ -70,13 +69,8 @@ void swi_vm(void *);
static __inline uint64_t
get_cyclecount(void)
{
- struct bintime bt;
- if (cpu_class == CPUCLASS_486) {
- binuptime(&bt);
- return ((uint64_t)bt.sec << 56 | bt.frac >> 8);
- }
- return (rdtsc());
+ return (cpu_ticks());
}
#endif
Index: sys/i386/i386/legacy.c
===================================================================
--- sys/i386/i386/legacy.c (revision 219741)
+++ sys/i386/i386/legacy.c (working copy)
@@ -342,7 +342,7 @@ cpu_read_ivar(device_t dev, device_t child, int in
break;
case CPU_IVAR_NOMINAL_MHZ:
if (tsc_is_invariant) {
- *result = (uintptr_t)(tsc_freq / 1000000);
+ *result = (uintptr_t)(GET_TSC_FREQ() / 1000000);
break;
}
/* FALLTHROUGH */
Index: sys/i386/i386/perfmon.c
===================================================================
--- sys/i386/i386/perfmon.c (revision 219741)
+++ sys/i386/i386/perfmon.c (working copy)
@@ -336,6 +336,7 @@ perfmon_ioctl(struct cdev *dev, u_long cmd, caddr_
struct pmc *pmc;
struct pmc_data *pmcd;
struct pmc_tstamp *pmct;
+ uint64_t freq;
int *ip;
int rv;
@@ -386,13 +387,14 @@ perfmon_ioctl(struct cdev *dev, u_long cmd, caddr_
break;
case PMIOTSTAMP:
- if (!tsc_freq) {
+ freq = GET_TSC_FREQ();
+ if (freq == 0) {
rv = ENOTTY;
break;
}
pmct = (struct pmc_tstamp *)param;
/* XXX interface loses precision. */
- pmct->pmct_rate = tsc_freq / 1000000;
+ pmct->pmct_rate = freq / 1000000;
pmct->pmct_value = rdtsc();
rv = 0;
break;
Index: sys/i386/i386/machdep.c
===================================================================
--- sys/i386/i386/machdep.c (revision 219741)
+++ sys/i386/i386/machdep.c (working copy)
@@ -1137,20 +1137,21 @@ int
cpu_est_clockrate(int cpu_id, uint64_t *rate)
{
register_t reg;
- uint64_t tsc1, tsc2;
+ uint64_t freq, tsc1, tsc2;
if (pcpu_find(cpu_id) == NULL || rate == NULL)
return (EINVAL);
if ((cpu_feature & CPUID_TSC) == 0)
return (EOPNOTSUPP);
+ freq = GET_TSC_FREQ();
/* If TSC is P-state invariant, DELAY(9) based logic fails. */
- if (tsc_is_invariant && tsc_freq != 0)
+ if (tsc_is_invariant && freq != 0)
return (EOPNOTSUPP);
/* If we're booting, trust the rate calibrated moments ago. */
- if (cold && tsc_freq != 0) {
- *rate = tsc_freq;
+ if (cold && freq != 0) {
+ *rate = freq;
return (0);
}
@@ -1178,17 +1179,7 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
}
#endif
- tsc2 -= tsc1;
- if (tsc_freq != 0) {
- *rate = tsc2 * 1000;
- return (0);
- }
-
- /*
- * Subtract 0.5% of the total. Empirical testing has shown that
- * overhead in DELAY() works out to approximately this value.
- */
- *rate = tsc2 * 1000 - tsc2 * 5;
+ *rate = (tsc2 - tsc1) * 1000;
return (0);
}
@@ -1419,6 +1410,19 @@ cpu_idle_wakeup(int cpu)
return (1);
}
+uint64_t (*atomic_load_64)(uint64_t *) = atomic_load_64_i386;
+void (*atomic_store_64)(uint64_t *, uint64_t) = atomic_store_64_i386;
+
+static void
+cpu_probe_cx8(void)
+{
+
+ if ((cpu_feature & CPUID_CX8) != 0) {
+ atomic_load_64 = atomic_load_64_i586;
+ atomic_store_64 = atomic_store_64_i586;
+ }
+}
+
/*
* Ordered by speed/power consumption.
*/
@@ -2730,6 +2734,7 @@ init386(first)
thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
cpu_probe_amdc1e();
+ cpu_probe_cx8();
}
#else
@@ -3006,6 +3011,7 @@ init386(first)
thread0.td_frame = &proc0_tf;
cpu_probe_amdc1e();
+ cpu_probe_cx8();
}
#endif
Index: sys/contrib/altq/altq/altq_subr.c
===================================================================
--- sys/contrib/altq/altq/altq_subr.c (revision 219741)
+++ sys/contrib/altq/altq/altq_subr.c (working copy)
@@ -929,7 +929,7 @@ init_machclk_setup(void)
#if defined(__amd64__) || defined(__i386__)
/* check if TSC is available */
#ifdef __FreeBSD__
- if ((cpu_feature & CPUID_TSC) == 0 || tsc_freq == 0)
+ if ((cpu_feature & CPUID_TSC) == 0 || GET_TSC_FREQ() == 0)
#else
if ((cpu_feature & CPUID_TSC) == 0)
#endif
@@ -964,7 +964,7 @@ init_machclk(void)
*/
#if defined(__amd64__) || defined(__i386__)
#ifdef __FreeBSD__
- machclk_freq = tsc_freq;
+ machclk_freq = GET_TSC_FREQ();
#elif defined(__NetBSD__)
machclk_freq = (u_int32_t)cpu_tsc_freq;
#elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU))
Index: sys/cddl/dev/dtrace/i386/dtrace_subr.c
===================================================================
--- sys/cddl/dev/dtrace/i386/dtrace_subr.c (revision 219741)
+++ sys/cddl/dev/dtrace/i386/dtrace_subr.c (working copy)
@@ -403,7 +403,7 @@ dtrace_gethrtime_init(void *arg)
* Otherwise tick->time conversion will be inaccurate, but
* will preserve monotonic property of TSC.
*/
- tsc_f = tsc_freq;
+ tsc_f = GET_TSC_FREQ();
/*
* The following line checks that nsec_scale calculated below
Index: sys/cddl/dev/dtrace/amd64/dtrace_subr.c
===================================================================
--- sys/cddl/dev/dtrace/amd64/dtrace_subr.c (revision 219741)
+++ sys/cddl/dev/dtrace/amd64/dtrace_subr.c (working copy)
@@ -403,7 +403,7 @@ dtrace_gethrtime_init(void *arg)
* Otherwise tick->time conversion will be inaccurate, but
* will preserve monotonic property of TSC.
*/
- tsc_f = tsc_freq;
+ tsc_f = GET_TSC_FREQ();
/*
* The following line checks that nsec_scale calculated below
Index: sys/amd64/include/clock.h
===================================================================
--- sys/amd64/include/clock.h (revision 219741)
+++ sys/amd64/include/clock.h (working copy)
@@ -20,6 +20,9 @@ extern int i8254_max_count;
extern uint64_t tsc_freq;
extern int tsc_is_invariant;
+#define GET_TSC_FREQ() atomic_load_64(&tsc_freq)
+#define SET_TSC_FREQ(f) atomic_store_64(&tsc_freq, (f))
+
void i8254_init(void);
/*
Index: sys/amd64/include/atomic.h
===================================================================
--- sys/amd64/include/atomic.h (revision 219741)
+++ sys/amd64/include/atomic.h (working copy)
@@ -303,6 +303,11 @@ ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq
#ifndef WANT_FUNCTIONS
+#ifdef _KERNEL
+#define atomic_load_64(p) (*(p))
+#define atomic_store_64(p, v) do { *(p) = (v); } while (0)
+#endif
+
/* Read the current value and store a zero in the destination. */
#ifdef __GNUCLIKE_ASM
Index: sys/amd64/amd64/legacy.c
===================================================================
--- sys/amd64/amd64/legacy.c (revision 219741)
+++ sys/amd64/amd64/legacy.c (working copy)
@@ -321,7 +321,7 @@ cpu_read_ivar(device_t dev, device_t child, int in
break;
case CPU_IVAR_NOMINAL_MHZ:
if (tsc_is_invariant) {
- *result = (uintptr_t)(tsc_freq / 1000000);
+ *result = (uintptr_t)(GET_TSC_FREQ() / 1000000);
break;
}
/* FALLTHROUGH */
Index: sys/amd64/amd64/machdep.c
===================================================================
--- sys/amd64/amd64/machdep.c (revision 219741)
+++ sys/amd64/amd64/machdep.c (working copy)
@@ -541,18 +541,19 @@ int
cpu_est_clockrate(int cpu_id, uint64_t *rate)
{
register_t reg;
- uint64_t tsc1, tsc2;
+ uint64_t freq, tsc1, tsc2;
if (pcpu_find(cpu_id) == NULL || rate == NULL)
return (EINVAL);
+ freq = GET_TSC_FREQ();
/* If TSC is P-state invariant, DELAY(9) based logic fails. */
- if (tsc_is_invariant && tsc_freq != 0)
+ if (tsc_is_invariant && freq != 0)
return (EOPNOTSUPP);
/* If we're booting, trust the rate calibrated moments ago. */
- if (cold && tsc_freq != 0) {
- *rate = tsc_freq;
+ if (cold && freq != 0) {
+ *rate = freq;
return (0);
}
@@ -580,17 +581,7 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
}
#endif
- tsc2 -= tsc1;
- if (tsc_freq != 0) {
- *rate = tsc2 * 1000;
- return (0);
- }
-
- /*
- * Subtract 0.5% of the total. Empirical testing has shown that
- * overhead in DELAY() works out to approximately this value.
- */
- *rate = tsc2 * 1000 - tsc2 * 5;
+ *rate = (tsc2 - tsc1) * 1000;
return (0);
}
More information about the svn-src-head
mailing list