Move x86 idle code to the x86/ common place.
Konstantin Belousov
kostikbel at gmail.com
Mon Apr 20 16:21:59 UTC 2015
Below is the patch which unifies some code from
sys/{amd64/amd64,i386/i386}/machdep.c into the new shared file
sys/x86/x86/cpu_machdep.c. Most of the code is related to handling
the idle CPU state, but there is some additional trivialities like
cpu_boot() etc.
The move is mostly a preparation for some other changes to the idle
infrastructure. I did not wanted to make same changes twice.
Make universe passed with the patch, I successfully booted debug amd64
kernel and UP i386.
Comments ?
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 4c20e4f..3230937 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -578,375 +578,6 @@ freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
}
#endif
-
-/*
- * Machine dependent boot() routine
- *
- * I haven't seen anything to put here yet
- * Possibly some stuff might be grafted back here from boot()
- */
-void
-cpu_boot(int howto)
-{
-}
-
-/*
- * Flush the D-cache for non-DMA I/O so that the I-cache can
- * be made coherent later.
- */
-void
-cpu_flush_dcache(void *ptr, size_t len)
-{
- /* Not applicable */
-}
-
-/* Get current clock frequency for the given cpu id. */
-int
-cpu_est_clockrate(int cpu_id, uint64_t *rate)
-{
- uint64_t tsc1, tsc2;
- uint64_t acnt, mcnt, perf;
- register_t reg;
-
- if (pcpu_find(cpu_id) == NULL || rate == NULL)
- return (EINVAL);
-
- /*
- * If TSC is P-state invariant and APERF/MPERF MSRs do not exist,
- * DELAY(9) based logic fails.
- */
- if (tsc_is_invariant && !tsc_perf_stat)
- return (EOPNOTSUPP);
-
-#ifdef SMP
- if (smp_cpus > 1) {
- /* Schedule ourselves on the indicated cpu. */
- thread_lock(curthread);
- sched_bind(curthread, cpu_id);
- thread_unlock(curthread);
- }
-#endif
-
- /* Calibrate by measuring a short delay. */
- reg = intr_disable();
- if (tsc_is_invariant) {
- wrmsr(MSR_MPERF, 0);
- wrmsr(MSR_APERF, 0);
- tsc1 = rdtsc();
- DELAY(1000);
- mcnt = rdmsr(MSR_MPERF);
- acnt = rdmsr(MSR_APERF);
- tsc2 = rdtsc();
- intr_restore(reg);
- perf = 1000 * acnt / mcnt;
- *rate = (tsc2 - tsc1) * perf;
- } else {
- tsc1 = rdtsc();
- DELAY(1000);
- tsc2 = rdtsc();
- intr_restore(reg);
- *rate = (tsc2 - tsc1) * 1000;
- }
-
-#ifdef SMP
- if (smp_cpus > 1) {
- thread_lock(curthread);
- sched_unbind(curthread);
- thread_unlock(curthread);
- }
-#endif
-
- return (0);
-}
-
-/*
- * Shutdown the CPU as much as possible
- */
-void
-cpu_halt(void)
-{
- for (;;)
- halt();
-}
-
-void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */
-static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */
-static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */
-SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RWTUN, &idle_mwait,
- 0, "Use MONITOR/MWAIT for short idle");
-
-#define STATE_RUNNING 0x0
-#define STATE_MWAIT 0x1
-#define STATE_SLEEPING 0x2
-
-static void
-cpu_idle_acpi(sbintime_t sbt)
-{
- int *state;
-
- state = (int *)PCPU_PTR(monitorbuf);
- *state = STATE_SLEEPING;
-
- /* See comments in cpu_idle_hlt(). */
- disable_intr();
- if (sched_runnable())
- enable_intr();
- else if (cpu_idle_hook)
- cpu_idle_hook(sbt);
- else
- __asm __volatile("sti; hlt");
- *state = STATE_RUNNING;
-}
-
-static void
-cpu_idle_hlt(sbintime_t sbt)
-{
- int *state;
-
- state = (int *)PCPU_PTR(monitorbuf);
- *state = STATE_SLEEPING;
-
- /*
- * Since we may be in a critical section from cpu_idle(), if
- * an interrupt fires during that critical section we may have
- * a pending preemption. If the CPU halts, then that thread
- * may not execute until a later interrupt awakens the CPU.
- * To handle this race, check for a runnable thread after
- * disabling interrupts and immediately return if one is
- * found. Also, we must absolutely guarentee that hlt is
- * the next instruction after sti. This ensures that any
- * interrupt that fires after the call to disable_intr() will
- * immediately awaken the CPU from hlt. Finally, please note
- * that on x86 this works fine because of interrupts enabled only
- * after the instruction following sti takes place, while IF is set
- * to 1 immediately, allowing hlt instruction to acknowledge the
- * interrupt.
- */
- disable_intr();
- if (sched_runnable())
- enable_intr();
- else
- __asm __volatile("sti; hlt");
- *state = STATE_RUNNING;
-}
-
-static void
-cpu_idle_mwait(sbintime_t sbt)
-{
- int *state;
-
- state = (int *)PCPU_PTR(monitorbuf);
- *state = STATE_MWAIT;
-
- /* See comments in cpu_idle_hlt(). */
- disable_intr();
- if (sched_runnable()) {
- enable_intr();
- *state = STATE_RUNNING;
- return;
- }
- cpu_monitor(state, 0, 0);
- if (*state == STATE_MWAIT)
- __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0));
- else
- enable_intr();
- *state = STATE_RUNNING;
-}
-
-static void
-cpu_idle_spin(sbintime_t sbt)
-{
- int *state;
- int i;
-
- state = (int *)PCPU_PTR(monitorbuf);
- *state = STATE_RUNNING;
-
- /*
- * The sched_runnable() call is racy but as long as there is
- * a loop missing it one time will have just a little impact if any
- * (and it is much better than missing the check at all).
- */
- for (i = 0; i < 1000; i++) {
- if (sched_runnable())
- return;
- cpu_spinwait();
- }
-}
-
-/*
- * C1E renders the local APIC timer dead, so we disable it by
- * reading the Interrupt Pending Message register and clearing
- * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
- *
- * Reference:
- * "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors"
- * #32559 revision 3.00+
- */
-#define MSR_AMDK8_IPM 0xc0010055
-#define AMDK8_SMIONCMPHALT (1ULL << 27)
-#define AMDK8_C1EONCMPHALT (1ULL << 28)
-#define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
-
-static void
-cpu_probe_amdc1e(void)
-{
-
- /*
- * Detect the presence of C1E capability mostly on latest
- * dual-cores (or future) k8 family.
- */
- if (cpu_vendor_id == CPU_VENDOR_AMD &&
- (cpu_id & 0x00000f00) == 0x00000f00 &&
- (cpu_id & 0x0fff0000) >= 0x00040000) {
- cpu_ident_amdc1e = 1;
- }
-}
-
-void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi;
-
-void
-cpu_idle(int busy)
-{
- uint64_t msr;
- sbintime_t sbt = -1;
-
- CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
- busy, curcpu);
-#ifdef MP_WATCHDOG
- ap_watchdog(PCPU_GET(cpuid));
-#endif
- /* If we are busy - try to use fast methods. */
- if (busy) {
- if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
- cpu_idle_mwait(busy);
- goto out;
- }
- }
-
- /* If we have time - switch timers into idle mode. */
- if (!busy) {
- critical_enter();
- sbt = cpu_idleclock();
- }
-
- /* Apply AMD APIC timer C1E workaround. */
- if (cpu_ident_amdc1e && cpu_disable_c3_sleep) {
- msr = rdmsr(MSR_AMDK8_IPM);
- if (msr & AMDK8_CMPHALT)
- wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
- }
-
- /* Call main idle method. */
- cpu_idle_fn(sbt);
-
- /* Switch timers back into active mode. */
- if (!busy) {
- cpu_activeclock();
- critical_exit();
- }
-out:
- CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
- busy, curcpu);
-}
-
-int
-cpu_idle_wakeup(int cpu)
-{
- struct pcpu *pcpu;
- int *state;
-
- pcpu = pcpu_find(cpu);
- state = (int *)pcpu->pc_monitorbuf;
- /*
- * This doesn't need to be atomic since missing the race will
- * simply result in unnecessary IPIs.
- */
- if (*state == STATE_SLEEPING)
- return (0);
- if (*state == STATE_MWAIT)
- *state = STATE_RUNNING;
- return (1);
-}
-
-/*
- * Ordered by speed/power consumption.
- */
-struct {
- void *id_fn;
- char *id_name;
-} idle_tbl[] = {
- { cpu_idle_spin, "spin" },
- { cpu_idle_mwait, "mwait" },
- { cpu_idle_hlt, "hlt" },
- { cpu_idle_acpi, "acpi" },
- { NULL, NULL }
-};
-
-static int
-idle_sysctl_available(SYSCTL_HANDLER_ARGS)
-{
- char *avail, *p;
- int error;
- int i;
-
- avail = malloc(256, M_TEMP, M_WAITOK);
- p = avail;
- for (i = 0; idle_tbl[i].id_name != NULL; i++) {
- if (strstr(idle_tbl[i].id_name, "mwait") &&
- (cpu_feature2 & CPUID2_MON) == 0)
- continue;
- if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
- cpu_idle_hook == NULL)
- continue;
- p += sprintf(p, "%s%s", p != avail ? ", " : "",
- idle_tbl[i].id_name);
- }
- error = sysctl_handle_string(oidp, avail, 0, req);
- free(avail, M_TEMP);
- return (error);
-}
-
-SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
- 0, 0, idle_sysctl_available, "A", "list of available idle functions");
-
-static int
-idle_sysctl(SYSCTL_HANDLER_ARGS)
-{
- char buf[16];
- int error;
- char *p;
- int i;
-
- p = "unknown";
- for (i = 0; idle_tbl[i].id_name != NULL; i++) {
- if (idle_tbl[i].id_fn == cpu_idle_fn) {
- p = idle_tbl[i].id_name;
- break;
- }
- }
- strncpy(buf, p, sizeof(buf));
- error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
- if (error != 0 || req->newptr == NULL)
- return (error);
- for (i = 0; idle_tbl[i].id_name != NULL; i++) {
- if (strstr(idle_tbl[i].id_name, "mwait") &&
- (cpu_feature2 & CPUID2_MON) == 0)
- continue;
- if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
- cpu_idle_hook == NULL)
- continue;
- if (strcmp(idle_tbl[i].id_name, buf))
- continue;
- cpu_idle_fn = idle_tbl[i].id_fn;
- return (0);
- }
- return (EINVAL);
-}
-
-SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
- idle_sysctl, "A", "currently selected idle function");
-
/*
* Reset registers to default values on exec.
*/
diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h
index ccde0e3..9083421 100644
--- a/sys/amd64/include/md_var.h
+++ b/sys/amd64/include/md_var.h
@@ -91,6 +91,7 @@ struct dumperinfo;
void *alloc_fpusave(int flags);
void amd64_syscall(struct thread *td, int traced);
void busdma_swi(void);
+void cpu_probe_amdc1e(void);
void cpu_setregs(void);
void doreti_iret(void) __asm(__STRING(doreti_iret));
void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index 4910903..ae71c39 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -558,6 +558,7 @@ x86/pci/pci_bus.c optional pci
x86/pci/qpi.c optional pci
x86/x86/busdma_bounce.c standard
x86/x86/busdma_machdep.c standard
+x86/x86/cpu_machdep.c standard
x86/x86/dump_machdep.c standard
x86/x86/fdt_machdep.c optional fdt
x86/x86/identcpu.c standard
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index 1873514..f072247 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -576,6 +576,7 @@ x86/pci/pci_bus.c optional pci
x86/pci/qpi.c optional pci
x86/x86/busdma_bounce.c standard
x86/x86/busdma_machdep.c standard
+x86/x86/cpu_machdep.c standard
x86/x86/dump_machdep.c standard
x86/x86/fdt_machdep.c optional fdt
x86/x86/identcpu.c standard
diff --git a/sys/conf/files.pc98 b/sys/conf/files.pc98
index be67ce4..f95d0bb 100644
--- a/sys/conf/files.pc98
+++ b/sys/conf/files.pc98
@@ -248,6 +248,7 @@ x86/isa/isa.c optional isa
x86/pci/pci_bus.c optional pci
x86/x86/busdma_bounce.c standard
x86/x86/busdma_machdep.c standard
+x86/x86/cpu_machdep.c standard
x86/x86/dump_machdep.c standard
x86/x86/identcpu.c standard
x86/x86/intr_machdep.c standard
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index 123db4e..72f7685 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -1176,427 +1176,6 @@ sys_sigreturn(td, uap)
}
/*
- * Machine dependent boot() routine
- *
- * I haven't seen anything to put here yet
- * Possibly some stuff might be grafted back here from boot()
- */
-void
-cpu_boot(int howto)
-{
-}
-
-/*
- * Flush the D-cache for non-DMA I/O so that the I-cache can
- * be made coherent later.
- */
-void
-cpu_flush_dcache(void *ptr, size_t len)
-{
- /* Not applicable */
-}
-
-/* Get current clock frequency for the given cpu id. */
-int
-cpu_est_clockrate(int cpu_id, uint64_t *rate)
-{
- uint64_t tsc1, tsc2;
- uint64_t acnt, mcnt, perf;
- register_t reg;
-
- if (pcpu_find(cpu_id) == NULL || rate == NULL)
- return (EINVAL);
- if ((cpu_feature & CPUID_TSC) == 0)
- return (EOPNOTSUPP);
-
- /*
- * If TSC is P-state invariant and APERF/MPERF MSRs do not exist,
- * DELAY(9) based logic fails.
- */
- if (tsc_is_invariant && !tsc_perf_stat)
- return (EOPNOTSUPP);
-
-#ifdef SMP
- if (smp_cpus > 1) {
- /* Schedule ourselves on the indicated cpu. */
- thread_lock(curthread);
- sched_bind(curthread, cpu_id);
- thread_unlock(curthread);
- }
-#endif
-
- /* Calibrate by measuring a short delay. */
- reg = intr_disable();
- if (tsc_is_invariant) {
- wrmsr(MSR_MPERF, 0);
- wrmsr(MSR_APERF, 0);
- tsc1 = rdtsc();
- DELAY(1000);
- mcnt = rdmsr(MSR_MPERF);
- acnt = rdmsr(MSR_APERF);
- tsc2 = rdtsc();
- intr_restore(reg);
- perf = 1000 * acnt / mcnt;
- *rate = (tsc2 - tsc1) * perf;
- } else {
- tsc1 = rdtsc();
- DELAY(1000);
- tsc2 = rdtsc();
- intr_restore(reg);
- *rate = (tsc2 - tsc1) * 1000;
- }
-
-#ifdef SMP
- if (smp_cpus > 1) {
- thread_lock(curthread);
- sched_unbind(curthread);
- thread_unlock(curthread);
- }
-#endif
-
- return (0);
-}
-
-#ifdef XEN
-
-static void
-idle_block(void)
-{
-
- HYPERVISOR_sched_op(SCHEDOP_block, 0);
-}
-
-void
-cpu_halt(void)
-{
- HYPERVISOR_shutdown(SHUTDOWN_poweroff);
-}
-
-int scheduler_running;
-
-static void
-cpu_idle_hlt(sbintime_t sbt)
-{
-
- scheduler_running = 1;
- enable_intr();
- idle_block();
-}
-
-#else
-/*
- * Shutdown the CPU as much as possible
- */
-void
-cpu_halt(void)
-{
- for (;;)
- halt();
-}
-
-#endif
-
-void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */
-static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */
-static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */
-SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RWTUN, &idle_mwait,
- 0, "Use MONITOR/MWAIT for short idle");
-
-#define STATE_RUNNING 0x0
-#define STATE_MWAIT 0x1
-#define STATE_SLEEPING 0x2
-
-#ifndef PC98
-static void
-cpu_idle_acpi(sbintime_t sbt)
-{
- int *state;
-
- state = (int *)PCPU_PTR(monitorbuf);
- *state = STATE_SLEEPING;
-
- /* See comments in cpu_idle_hlt(). */
- disable_intr();
- if (sched_runnable())
- enable_intr();
- else if (cpu_idle_hook)
- cpu_idle_hook(sbt);
- else
- __asm __volatile("sti; hlt");
- *state = STATE_RUNNING;
-}
-#endif /* !PC98 */
-
-#ifndef XEN
-static void
-cpu_idle_hlt(sbintime_t sbt)
-{
- int *state;
-
- state = (int *)PCPU_PTR(monitorbuf);
- *state = STATE_SLEEPING;
-
- /*
- * Since we may be in a critical section from cpu_idle(), if
- * an interrupt fires during that critical section we may have
- * a pending preemption. If the CPU halts, then that thread
- * may not execute until a later interrupt awakens the CPU.
- * To handle this race, check for a runnable thread after
- * disabling interrupts and immediately return if one is
- * found. Also, we must absolutely guarentee that hlt is
- * the next instruction after sti. This ensures that any
- * interrupt that fires after the call to disable_intr() will
- * immediately awaken the CPU from hlt. Finally, please note
- * that on x86 this works fine because of interrupts enabled only
- * after the instruction following sti takes place, while IF is set
- * to 1 immediately, allowing hlt instruction to acknowledge the
- * interrupt.
- */
- disable_intr();
- if (sched_runnable())
- enable_intr();
- else
- __asm __volatile("sti; hlt");
- *state = STATE_RUNNING;
-}
-#endif
-
-static void
-cpu_idle_mwait(sbintime_t sbt)
-{
- int *state;
-
- state = (int *)PCPU_PTR(monitorbuf);
- *state = STATE_MWAIT;
-
- /* See comments in cpu_idle_hlt(). */
- disable_intr();
- if (sched_runnable()) {
- enable_intr();
- *state = STATE_RUNNING;
- return;
- }
- cpu_monitor(state, 0, 0);
- if (*state == STATE_MWAIT)
- __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0));
- else
- enable_intr();
- *state = STATE_RUNNING;
-}
-
-static void
-cpu_idle_spin(sbintime_t sbt)
-{
- int *state;
- int i;
-
- state = (int *)PCPU_PTR(monitorbuf);
- *state = STATE_RUNNING;
-
- /*
- * The sched_runnable() call is racy but as long as there is
- * a loop missing it one time will have just a little impact if any
- * (and it is much better than missing the check at all).
- */
- for (i = 0; i < 1000; i++) {
- if (sched_runnable())
- return;
- cpu_spinwait();
- }
-}
-
-/*
- * C1E renders the local APIC timer dead, so we disable it by
- * reading the Interrupt Pending Message register and clearing
- * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
- *
- * Reference:
- * "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors"
- * #32559 revision 3.00+
- */
-#define MSR_AMDK8_IPM 0xc0010055
-#define AMDK8_SMIONCMPHALT (1ULL << 27)
-#define AMDK8_C1EONCMPHALT (1ULL << 28)
-#define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
-
-static void
-cpu_probe_amdc1e(void)
-{
-
- /*
- * Detect the presence of C1E capability mostly on latest
- * dual-cores (or future) k8 family.
- */
- if (cpu_vendor_id == CPU_VENDOR_AMD &&
- (cpu_id & 0x00000f00) == 0x00000f00 &&
- (cpu_id & 0x0fff0000) >= 0x00040000) {
- cpu_ident_amdc1e = 1;
- }
-}
-
-#if defined(PC98) || defined(XEN)
-void (*cpu_idle_fn)(sbintime_t) = cpu_idle_hlt;
-#else
-void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi;
-#endif
-
-void
-cpu_idle(int busy)
-{
-#ifndef XEN
- uint64_t msr;
-#endif
- sbintime_t sbt = -1;
-
- CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
- busy, curcpu);
-#if defined(MP_WATCHDOG) && !defined(XEN)
- ap_watchdog(PCPU_GET(cpuid));
-#endif
-#ifndef XEN
- /* If we are busy - try to use fast methods. */
- if (busy) {
- if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
- cpu_idle_mwait(busy);
- goto out;
- }
- }
-#endif
-
- /* If we have time - switch timers into idle mode. */
- if (!busy) {
- critical_enter();
- sbt = cpu_idleclock();
- }
-
-#ifndef XEN
- /* Apply AMD APIC timer C1E workaround. */
- if (cpu_ident_amdc1e && cpu_disable_c3_sleep) {
- msr = rdmsr(MSR_AMDK8_IPM);
- if (msr & AMDK8_CMPHALT)
- wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
- }
-#endif
-
- /* Call main idle method. */
- cpu_idle_fn(sbt);
-
- /* Switch timers back into active mode. */
- if (!busy) {
- cpu_activeclock();
- critical_exit();
- }
-#ifndef XEN
-out:
-#endif
- CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
- busy, curcpu);
-}
-
-int
-cpu_idle_wakeup(int cpu)
-{
- struct pcpu *pcpu;
- int *state;
-
- pcpu = pcpu_find(cpu);
- state = (int *)pcpu->pc_monitorbuf;
- /*
- * This doesn't need to be atomic since missing the race will
- * simply result in unnecessary IPIs.
- */
- if (*state == STATE_SLEEPING)
- return (0);
- if (*state == STATE_MWAIT)
- *state = STATE_RUNNING;
- return (1);
-}
-
-/*
- * Ordered by speed/power consumption.
- */
-struct {
- void *id_fn;
- char *id_name;
-} idle_tbl[] = {
- { cpu_idle_spin, "spin" },
- { cpu_idle_mwait, "mwait" },
- { cpu_idle_hlt, "hlt" },
-#ifndef PC98
- { cpu_idle_acpi, "acpi" },
-#endif
- { NULL, NULL }
-};
-
-static int
-idle_sysctl_available(SYSCTL_HANDLER_ARGS)
-{
- char *avail, *p;
- int error;
- int i;
-
- avail = malloc(256, M_TEMP, M_WAITOK);
- p = avail;
- for (i = 0; idle_tbl[i].id_name != NULL; i++) {
- if (strstr(idle_tbl[i].id_name, "mwait") &&
- (cpu_feature2 & CPUID2_MON) == 0)
- continue;
-#ifndef PC98
- if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
- cpu_idle_hook == NULL)
- continue;
-#endif
- p += sprintf(p, "%s%s", p != avail ? ", " : "",
- idle_tbl[i].id_name);
- }
- error = sysctl_handle_string(oidp, avail, 0, req);
- free(avail, M_TEMP);
- return (error);
-}
-
-SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
- 0, 0, idle_sysctl_available, "A", "list of available idle functions");
-
-static int
-idle_sysctl(SYSCTL_HANDLER_ARGS)
-{
- char buf[16];
- int error;
- char *p;
- int i;
-
- p = "unknown";
- for (i = 0; idle_tbl[i].id_name != NULL; i++) {
- if (idle_tbl[i].id_fn == cpu_idle_fn) {
- p = idle_tbl[i].id_name;
- break;
- }
- }
- strncpy(buf, p, sizeof(buf));
- error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
- if (error != 0 || req->newptr == NULL)
- return (error);
- for (i = 0; idle_tbl[i].id_name != NULL; i++) {
- if (strstr(idle_tbl[i].id_name, "mwait") &&
- (cpu_feature2 & CPUID2_MON) == 0)
- continue;
-#ifndef PC98
- if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
- cpu_idle_hook == NULL)
- continue;
-#endif
- if (strcmp(idle_tbl[i].id_name, buf))
- continue;
- cpu_idle_fn = idle_tbl[i].id_fn;
- return (0);
- }
- return (EINVAL);
-}
-
-SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
- idle_sysctl, "A", "currently selected idle function");
-
-/*
* Reset registers to default values on exec.
*/
void
diff --git a/sys/i386/include/md_var.h b/sys/i386/include/md_var.h
index 339dff3..bffdd57 100644
--- a/sys/i386/include/md_var.h
+++ b/sys/i386/include/md_var.h
@@ -97,6 +97,7 @@ struct dumperinfo;
void *alloc_fpusave(int flags);
void bcopyb(const void *from, void *to, size_t len);
void busdma_swi(void);
+void cpu_probe_amdc1e(void);
void cpu_setregs(void);
void cpu_switch_load_gs(void) __asm(__STRING(cpu_switch_load_gs));
void doreti_iret(void) __asm(__STRING(doreti_iret));
diff --git a/sys/x86/x86/cpu_machdep.c b/sys/x86/x86/cpu_machdep.c
new file mode 100644
index 0000000..846a123
--- /dev/null
+++ b/sys/x86/x86/cpu_machdep.c
@@ -0,0 +1,533 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm.
+ * Copyright (c) 1992 Terrence R. Lambert.
+ * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_atpic.h"
+#include "opt_compat.h"
+#include "opt_cpu.h"
+#include "opt_ddb.h"
+#include "opt_inet.h"
+#include "opt_isa.h"
+#include "opt_kstack_pages.h"
+#include "opt_maxmem.h"
+#include "opt_mp_watchdog.h"
+#include "opt_perfmon.h"
+#include "opt_platform.h"
+#ifdef __i386__
+#include "opt_npx.h"
+#include "opt_apic.h"
+#include "opt_xbox.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/kdb.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/rwlock.h>
+#include <sys/sched.h>
+#ifdef SMP
+#include <sys/smp.h>
+#endif
+#include <sys/sysctl.h>
+
+#include <machine/clock.h>
+#include <machine/cpu.h>
+#include <machine/cputypes.h>
+#include <machine/specialreg.h>
+#include <machine/md_var.h>
+#include <machine/mp_watchdog.h>
+#ifdef PERFMON
+#include <machine/perfmon.h>
+#endif
+#include <machine/tss.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_param.h>
+
+#ifdef XEN
+/* XEN includes */
+#include <xen/xen-os.h>
+#include <xen/hypervisor.h>
+#include <machine/xen/xenvar.h>
+#include <machine/xen/xenfunc.h>
+#include <xen/xen_intr.h>
+#endif
+
+/*
+ * Machine dependent boot() routine
+ *
+ * I haven't seen anything to put here yet
+ * Possibly some stuff might be grafted back here from boot()
+ */
+void
+cpu_boot(int howto)
+{
+}
+
+/*
+ * Flush the D-cache for non-DMA I/O so that the I-cache can
+ * be made coherent later.
+ */
+void
+cpu_flush_dcache(void *ptr, size_t len)
+{
+ /* Not applicable */
+}
+
+/* Get current clock frequency for the given cpu id. */
+int
+cpu_est_clockrate(int cpu_id, uint64_t *rate)
+{
+ uint64_t tsc1, tsc2;
+ uint64_t acnt, mcnt, perf;
+ register_t reg;
+
+ if (pcpu_find(cpu_id) == NULL || rate == NULL)
+ return (EINVAL);
+#ifdef __i386__
+ if ((cpu_feature & CPUID_TSC) == 0)
+ return (EOPNOTSUPP);
+#endif
+
+ /*
+ * If TSC is P-state invariant and APERF/MPERF MSRs do not exist,
+ * DELAY(9) based logic fails.
+ */
+ if (tsc_is_invariant && !tsc_perf_stat)
+ return (EOPNOTSUPP);
+
+#ifdef SMP
+ if (smp_cpus > 1) {
+ /* Schedule ourselves on the indicated cpu. */
+ thread_lock(curthread);
+ sched_bind(curthread, cpu_id);
+ thread_unlock(curthread);
+ }
+#endif
+
+ /* Calibrate by measuring a short delay. */
+ reg = intr_disable();
+ if (tsc_is_invariant) {
+ wrmsr(MSR_MPERF, 0);
+ wrmsr(MSR_APERF, 0);
+ tsc1 = rdtsc();
+ DELAY(1000);
+ mcnt = rdmsr(MSR_MPERF);
+ acnt = rdmsr(MSR_APERF);
+ tsc2 = rdtsc();
+ intr_restore(reg);
+ perf = 1000 * acnt / mcnt;
+ *rate = (tsc2 - tsc1) * perf;
+ } else {
+ tsc1 = rdtsc();
+ DELAY(1000);
+ tsc2 = rdtsc();
+ intr_restore(reg);
+ *rate = (tsc2 - tsc1) * 1000;
+ }
+
+#ifdef SMP
+ if (smp_cpus > 1) {
+ thread_lock(curthread);
+ sched_unbind(curthread);
+ thread_unlock(curthread);
+ }
+#endif
+
+ return (0);
+}
+
+#if defined(__i386__) && defined(XEN)
+
+static void
+idle_block(void)
+{
+
+ HYPERVISOR_sched_op(SCHEDOP_block, 0);
+}
+
+void
+cpu_halt(void)
+{
+ HYPERVISOR_shutdown(SHUTDOWN_poweroff);
+}
+
+int scheduler_running;
+
+static void
+cpu_idle_hlt(sbintime_t sbt)
+{
+
+ scheduler_running = 1;
+ enable_intr();
+ idle_block();
+}
+
+#else
+/*
+ * Shutdown the CPU as much as possible
+ */
+void
+cpu_halt(void)
+{
+ for (;;)
+ halt();
+}
+
+#endif
+
+void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */
+static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */
+static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */
+SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RWTUN, &idle_mwait,
+ 0, "Use MONITOR/MWAIT for short idle");
+
+#define STATE_RUNNING 0x0
+#define STATE_MWAIT 0x1
+#define STATE_SLEEPING 0x2
+
+#ifndef PC98
+static void
+cpu_idle_acpi(sbintime_t sbt)
+{
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
+
+ /* See comments in cpu_idle_hlt(). */
+ disable_intr();
+ if (sched_runnable())
+ enable_intr();
+ else if (cpu_idle_hook)
+ cpu_idle_hook(sbt);
+ else
+ __asm __volatile("sti; hlt");
+ *state = STATE_RUNNING;
+}
+#endif /* !PC98 */
+
+#if !defined(__i386__) || !defined(XEN)
+static void
+cpu_idle_hlt(sbintime_t sbt)
+{
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
+
+ /*
+ * Since we may be in a critical section from cpu_idle(), if
+ * an interrupt fires during that critical section we may have
+ * a pending preemption. If the CPU halts, then that thread
+ * may not execute until a later interrupt awakens the CPU.
+ * To handle this race, check for a runnable thread after
+ * disabling interrupts and immediately return if one is
+ * found. Also, we must absolutely guarentee that hlt is
+ * the next instruction after sti. This ensures that any
+ * interrupt that fires after the call to disable_intr() will
+ * immediately awaken the CPU from hlt. Finally, please note
+ * that on x86 this works fine because of interrupts enabled only
+ * after the instruction following sti takes place, while IF is set
+ * to 1 immediately, allowing hlt instruction to acknowledge the
+ * interrupt.
+ */
+ disable_intr();
+ if (sched_runnable())
+ enable_intr();
+ else
+ __asm __volatile("sti; hlt");
+ *state = STATE_RUNNING;
+}
+#endif
+
+static void
+cpu_idle_mwait(sbintime_t sbt)
+{
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_MWAIT;
+
+ /* See comments in cpu_idle_hlt(). */
+ disable_intr();
+ if (sched_runnable()) {
+ enable_intr();
+ *state = STATE_RUNNING;
+ return;
+ }
+ cpu_monitor(state, 0, 0);
+ if (*state == STATE_MWAIT)
+ __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0));
+ else
+ enable_intr();
+ *state = STATE_RUNNING;
+}
+
+static void
+cpu_idle_spin(sbintime_t sbt)
+{
+ int *state;
+ int i;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_RUNNING;
+
+ /*
+ * The sched_runnable() call is racy but as long as there is
+ * a loop missing it one time will have just a little impact if any
+ * (and it is much better than missing the check at all).
+ */
+ for (i = 0; i < 1000; i++) {
+ if (sched_runnable())
+ return;
+ cpu_spinwait();
+ }
+}
+
+/*
+ * C1E renders the local APIC timer dead, so we disable it by
+ * reading the Interrupt Pending Message register and clearing
+ * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
+ *
+ * Reference:
+ * "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors"
+ * #32559 revision 3.00+
+ */
+#define MSR_AMDK8_IPM 0xc0010055
+#define AMDK8_SMIONCMPHALT (1ULL << 27)
+#define AMDK8_C1EONCMPHALT (1ULL << 28)
+#define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
+
+void
+cpu_probe_amdc1e(void)
+{
+
+ /*
+ * Detect the presence of C1E capability mostly on latest
+ * dual-cores (or future) k8 family.
+ */
+ if (cpu_vendor_id == CPU_VENDOR_AMD &&
+ (cpu_id & 0x00000f00) == 0x00000f00 &&
+ (cpu_id & 0x0fff0000) >= 0x00040000) {
+ cpu_ident_amdc1e = 1;
+ }
+}
+
+#if defined(__i386__) && (defined(PC98) || defined(XEN))
+void (*cpu_idle_fn)(sbintime_t) = cpu_idle_hlt;
+#else
+void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi;
+#endif
+
+void
+cpu_idle(int busy)
+{
+#if !defined(__i386__) || !defined(XEN)
+ uint64_t msr;
+#endif
+ sbintime_t sbt = -1;
+
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+ busy, curcpu);
+#if defined(MP_WATCHDOG) && (!defined(__i386__) || !defined(XEN))
+ ap_watchdog(PCPU_GET(cpuid));
+#endif
+#if !defined(__i386__) || !defined(XEN)
+ /* If we are busy - try to use fast methods. */
+ if (busy) {
+ if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
+ cpu_idle_mwait(busy);
+ goto out;
+ }
+ }
+#endif
+
+ /* If we have time - switch timers into idle mode. */
+ if (!busy) {
+ critical_enter();
+ sbt = cpu_idleclock();
+ }
+
+#if !defined(__i386__) || !defined(XEN)
+ /* Apply AMD APIC timer C1E workaround. */
+ if (cpu_ident_amdc1e && cpu_disable_c3_sleep) {
+ msr = rdmsr(MSR_AMDK8_IPM);
+ if (msr & AMDK8_CMPHALT)
+ wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
+ }
+#endif
+
+ /* Call main idle method. */
+ cpu_idle_fn(sbt);
+
+ /* Switch timers back into active mode. */
+ if (!busy) {
+ cpu_activeclock();
+ critical_exit();
+ }
+#if !defined(__i386__) || !defined(XEN)
+out:
+#endif
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+ busy, curcpu);
+}
+
+int
+cpu_idle_wakeup(int cpu)
+{
+ struct pcpu *pcpu;
+ int *state;
+
+ pcpu = pcpu_find(cpu);
+ state = (int *)pcpu->pc_monitorbuf;
+ /*
+ * This doesn't need to be atomic since missing the race will
+ * simply result in unnecessary IPIs.
+ */
+ if (*state == STATE_SLEEPING)
+ return (0);
+ if (*state == STATE_MWAIT)
+ *state = STATE_RUNNING;
+ return (1);
+}
+
+/*
+ * Ordered by speed/power consumption.
+ */
+struct {
+ void *id_fn;
+ char *id_name;
+} idle_tbl[] = {
+ { cpu_idle_spin, "spin" },
+ { cpu_idle_mwait, "mwait" },
+ { cpu_idle_hlt, "hlt" },
+#if !defined(__i386__) || !defined(PC98)
+ { cpu_idle_acpi, "acpi" },
+#endif
+ { NULL, NULL }
+};
+
+static int
+idle_sysctl_available(SYSCTL_HANDLER_ARGS)
+{
+ char *avail, *p;
+ int error;
+ int i;
+
+ avail = malloc(256, M_TEMP, M_WAITOK);
+ p = avail;
+ for (i = 0; idle_tbl[i].id_name != NULL; i++) {
+ if (strstr(idle_tbl[i].id_name, "mwait") &&
+ (cpu_feature2 & CPUID2_MON) == 0)
+ continue;
+#if !defined(__i386__) || !defined(PC98)
+ if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+ cpu_idle_hook == NULL)
+ continue;
+#endif
+ p += sprintf(p, "%s%s", p != avail ? ", " : "",
+ idle_tbl[i].id_name);
+ }
+ error = sysctl_handle_string(oidp, avail, 0, req);
+ free(avail, M_TEMP);
+ return (error);
+}
+
+SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
+ 0, 0, idle_sysctl_available, "A", "list of available idle functions");
+
+static int
+idle_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ char buf[16];
+ int error;
+ char *p;
+ int i;
+
+ p = "unknown";
+ for (i = 0; idle_tbl[i].id_name != NULL; i++) {
+ if (idle_tbl[i].id_fn == cpu_idle_fn) {
+ p = idle_tbl[i].id_name;
+ break;
+ }
+ }
+ strncpy(buf, p, sizeof(buf));
+ error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ for (i = 0; idle_tbl[i].id_name != NULL; i++) {
+ if (strstr(idle_tbl[i].id_name, "mwait") &&
+ (cpu_feature2 & CPUID2_MON) == 0)
+ continue;
+#if !defined(__i386__) || !defined(PC98)
+ if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+ cpu_idle_hook == NULL)
+ continue;
+#endif
+ if (strcmp(idle_tbl[i].id_name, buf))
+ continue;
+ cpu_idle_fn = idle_tbl[i].id_fn;
+ return (0);
+ }
+ return (EINVAL);
+}
+
+SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
+ idle_sysctl, "A", "currently selected idle function");
More information about the freebsd-amd64
mailing list