svn commit: r238658 - in stable/8/sys: amd64/amd64 i386/i386
pc98/pc98
Alexander Motin
mav at FreeBSD.org
Fri Jul 20 19:35:20 UTC 2012
Author: mav
Date: Fri Jul 20 19:35:20 2012
New Revision: 238658
URL: http://svn.freebsd.org/changeset/base/238658
Log:
Partially MFC r212541:
Refactor cpu_idle() on x86.
Use MONITOR/MWAIT instrunctions (if supported) under high sleep/wakeup rate,
as fast alternative to other methods. It allows SMP scheduler to wake up
sleeping CPUs much faster without using IPI, significantly increasing
performance on some highly task-switching loads. Also on such loads it
hides two ACPI timer reads, otherwise done by acpi_cpu_idle(), that are
reported to be slow on some systems.
MFC r225936 (by attilio):
Add some improvements in the idle table callbacks:
- Replace instances of manual assembly instruction "hlt" call
with halt() function calling.
- In cpu_idle_mwait() avoid races in check to sched_runnable() using
the same pattern used in cpu_idle_hlt() with the 'hlt' instruction.
- Add comments explaining the logic behind the pattern used in
cpu_idle_hlt() and other idle callbacks.
PR: kern/170021
Modified:
stable/8/sys/amd64/amd64/machdep.c
stable/8/sys/i386/i386/machdep.c
stable/8/sys/pc98/pc98/machdep.c
Directory Properties:
stable/8/sys/ (props changed)
Modified: stable/8/sys/amd64/amd64/machdep.c
==============================================================================
--- stable/8/sys/amd64/amd64/machdep.c Fri Jul 20 17:51:20 2012 (r238657)
+++ stable/8/sys/amd64/amd64/machdep.c Fri Jul 20 19:35:20 2012 (r238658)
@@ -629,63 +629,122 @@ void
cpu_halt(void)
{
for (;;)
- __asm__ ("hlt");
+ halt();
}
void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */
+static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */
+static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */
+TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
+SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
+ 0, "Use MONITOR/MWAIT for short idle");
+
+#define STATE_RUNNING 0x0
+#define STATE_MWAIT 0x1
+#define STATE_SLEEPING 0x2
+
+static void
+cpu_idle_acpi(int busy)
+{
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
+
+ /* See comments in cpu_idle_hlt(). */
+ disable_intr();
+ if (sched_runnable())
+ enable_intr();
+ else if (cpu_idle_hook)
+ cpu_idle_hook();
+ else
+ __asm __volatile("sti; hlt");
+ *state = STATE_RUNNING;
+}
static void
cpu_idle_hlt(int busy)
{
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
+
/*
- * we must absolutely guarentee that hlt is the next instruction
- * after sti or we introduce a timing window.
+ * Since we may be in a critical section from cpu_idle(), if
+ * an interrupt fires during that critical section we may have
+ * a pending preemption. If the CPU halts, then that thread
+ * may not execute until a later interrupt awakens the CPU.
+ * To handle this race, check for a runnable thread after
+ * disabling interrupts and immediately return if one is
+ * found. Also, we must absolutely guarentee that hlt is
+ * the next instruction after sti. This ensures that any
+ * interrupt that fires after the call to disable_intr() will
+ * immediately awaken the CPU from hlt. Finally, please note
+ * that on x86 this works fine because of interrupts enabled only
+ * after the instruction following sti takes place, while IF is set
+ * to 1 immediately, allowing hlt instruction to acknowledge the
+ * interrupt.
*/
disable_intr();
- if (sched_runnable())
+ if (sched_runnable())
enable_intr();
else
__asm __volatile("sti; hlt");
+ *state = STATE_RUNNING;
}
+/*
+ * MWAIT cpu power states. Lower 4 bits are sub-states.
+ */
+#define MWAIT_C0 0xf0
+#define MWAIT_C1 0x00
+#define MWAIT_C2 0x10
+#define MWAIT_C3 0x20
+#define MWAIT_C4 0x30
+
static void
-cpu_idle_acpi(int busy)
+cpu_idle_mwait(int busy)
{
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_MWAIT;
+
+ /* See comments in cpu_idle_hlt(). */
disable_intr();
- if (sched_runnable())
+ if (sched_runnable()) {
enable_intr();
- else if (cpu_idle_hook)
- cpu_idle_hook();
+ *state = STATE_RUNNING;
+ return;
+ }
+ cpu_monitor(state, 0, 0);
+ if (*state == STATE_MWAIT)
+ __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0));
else
- __asm __volatile("sti; hlt");
+ enable_intr();
+ *state = STATE_RUNNING;
}
-static int cpu_ident_amdc1e = 0;
-
-static int
-cpu_probe_amdc1e(void)
+static void
+cpu_idle_spin(int busy)
{
+ int *state;
int i;
- /*
- * Forget it, if we're not using local APIC timer.
- */
- if (resource_disabled("apic", 0) ||
- (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0))
- return (0);
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_RUNNING;
/*
- * Detect the presence of C1E capability mostly on latest
- * dual-cores (or future) k8 family.
- */
- if (cpu_vendor_id == CPU_VENDOR_AMD &&
- (cpu_id & 0x00000f00) == 0x00000f00 &&
- (cpu_id & 0x0fff0000) >= 0x00040000) {
- cpu_ident_amdc1e = 1;
- return (1);
+ * The sched_runnable() call is racy but as long as there is
+ * a loop missing it one time will have just a little impact if any
+ * (and it is much better than missing the check at all).
+ */
+ for (i = 0; i < 1000; i++) {
+ if (sched_runnable())
+ return;
+ cpu_spinwait();
}
-
- return (0);
}
/*
@@ -703,110 +762,66 @@ cpu_probe_amdc1e(void)
#define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
static void
-cpu_idle_amdc1e(int busy)
+cpu_probe_amdc1e(void)
{
- disable_intr();
- if (sched_runnable())
- enable_intr();
- else {
- uint64_t msr;
-
- msr = rdmsr(MSR_AMDK8_IPM);
- if (msr & AMDK8_CMPHALT)
- wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
-
- if (cpu_idle_hook)
- cpu_idle_hook();
- else
- __asm __volatile("sti; hlt");
+ /*
+ * Detect the presence of C1E capability mostly on latest
+ * dual-cores (or future) k8 family.
+ */
+ if (cpu_vendor_id == CPU_VENDOR_AMD &&
+ (cpu_id & 0x00000f00) == 0x00000f00 &&
+ (cpu_id & 0x0fff0000) >= 0x00040000) {
+ cpu_ident_amdc1e = 1;
}
}
-static void
-cpu_idle_spin(int busy)
-{
- return;
-}
-
void (*cpu_idle_fn)(int) = cpu_idle_acpi;
void
cpu_idle(int busy)
{
+ uint64_t msr;
+
#ifdef SMP
if (mp_grab_cpu_hlt())
return;
#endif
- cpu_idle_fn(busy);
-}
-
-/*
- * mwait cpu power states. Lower 4 bits are sub-states.
- */
-#define MWAIT_C0 0xf0
-#define MWAIT_C1 0x00
-#define MWAIT_C2 0x10
-#define MWAIT_C3 0x20
-#define MWAIT_C4 0x30
-
-#define MWAIT_DISABLED 0x0
-#define MWAIT_WOKEN 0x1
-#define MWAIT_WAITING 0x2
-
-static void
-cpu_idle_mwait(int busy)
-{
- int *mwait;
-
- mwait = (int *)PCPU_PTR(monitorbuf);
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
- return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
-}
-
-static void
-cpu_idle_mwait_hlt(int busy)
-{
- int *mwait;
+ /* If we are busy - try to use fast methods. */
+ if (busy) {
+ if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
+ cpu_idle_mwait(busy);
+ return;
+ }
+ }
- mwait = (int *)PCPU_PTR(monitorbuf);
- if (busy == 0) {
- *mwait = MWAIT_DISABLED;
- cpu_idle_hlt(busy);
- return;
+ /* Apply AMD APIC timer C1E workaround. */
+ if (cpu_ident_amdc1e) {
+ msr = rdmsr(MSR_AMDK8_IPM);
+ if (msr & AMDK8_CMPHALT)
+ wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
}
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
- return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
+
+ /* Call main idle method. */
+ cpu_idle_fn(busy);
}
int
cpu_idle_wakeup(int cpu)
{
struct pcpu *pcpu;
- int *mwait;
+ int *state;
- if (cpu_idle_fn == cpu_idle_spin)
- return (1);
- if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt)
- return (0);
pcpu = pcpu_find(cpu);
- mwait = (int *)pcpu->pc_monitorbuf;
+ state = (int *)pcpu->pc_monitorbuf;
/*
* This doesn't need to be atomic since missing the race will
* simply result in unnecessary IPIs.
*/
- if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED)
+ if (*state == STATE_SLEEPING)
return (0);
- *mwait = MWAIT_WOKEN;
-
+ if (*state == STATE_MWAIT)
+ *state = STATE_RUNNING;
return (1);
}
@@ -819,8 +834,6 @@ struct {
} idle_tbl[] = {
{ cpu_idle_spin, "spin" },
{ cpu_idle_mwait, "mwait" },
- { cpu_idle_mwait_hlt, "mwait_hlt" },
- { cpu_idle_amdc1e, "amdc1e" },
{ cpu_idle_hlt, "hlt" },
{ cpu_idle_acpi, "acpi" },
{ NULL, NULL }
@@ -839,8 +852,8 @@ idle_sysctl_available(SYSCTL_HANDLER_ARG
if (strstr(idle_tbl[i].id_name, "mwait") &&
(cpu_feature2 & CPUID2_MON) == 0)
continue;
- if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
- cpu_ident_amdc1e == 0)
+ if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+ cpu_idle_hook == NULL)
continue;
p += sprintf(p, "%s, ", idle_tbl[i].id_name);
}
@@ -849,6 +862,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARG
return (error);
}
+SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
+ 0, 0, idle_sysctl_available, "A", "list of available idle functions");
+
static int
idle_sysctl(SYSCTL_HANDLER_ARGS)
{
@@ -872,8 +888,8 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
if (strstr(idle_tbl[i].id_name, "mwait") &&
(cpu_feature2 & CPUID2_MON) == 0)
continue;
- if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
- cpu_ident_amdc1e == 0)
+ if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+ cpu_idle_hook == NULL)
continue;
if (strcmp(idle_tbl[i].id_name, buf))
continue;
@@ -883,9 +899,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
return (EINVAL);
}
-SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
- 0, 0, idle_sysctl_available, "A", "list of available idle functions");
-
SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
idle_sysctl, "A", "currently selected idle function");
@@ -1819,8 +1832,7 @@ hammer_time(u_int64_t modulep, u_int64_t
}
#endif
- if (cpu_probe_amdc1e())
- cpu_idle_fn = cpu_idle_amdc1e;
+ cpu_probe_amdc1e();
/* Location of kernel stack for locore */
return ((u_int64_t)thread0.td_pcb);
Modified: stable/8/sys/i386/i386/machdep.c
==============================================================================
--- stable/8/sys/i386/i386/machdep.c Fri Jul 20 17:51:20 2012 (r238657)
+++ stable/8/sys/i386/i386/machdep.c Fri Jul 20 19:35:20 2012 (r238658)
@@ -1177,9 +1177,6 @@ cpu_est_clockrate(int cpu_id, uint64_t *
return (0);
}
-
-void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */
-
#ifdef XEN
void
@@ -1207,66 +1204,127 @@ void
cpu_halt(void)
{
for (;;)
- __asm__ ("hlt");
+ halt();
}
+#endif
+
+void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */
+static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */
+static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */
+TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
+SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
+ 0, "Use MONITOR/MWAIT for short idle");
+
+#define STATE_RUNNING 0x0
+#define STATE_MWAIT 0x1
+#define STATE_SLEEPING 0x2
+
+static void
+cpu_idle_acpi(int busy)
+{
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
+
+ /* See comments in cpu_idle_hlt(). */
+ disable_intr();
+ if (sched_runnable())
+ enable_intr();
+ else if (cpu_idle_hook)
+ cpu_idle_hook();
+ else
+ __asm __volatile("sti; hlt");
+ *state = STATE_RUNNING;
+}
+
+#ifndef XEN
static void
cpu_idle_hlt(int busy)
{
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
+
/*
- * we must absolutely guarentee that hlt is the next instruction
- * after sti or we introduce a timing window.
+ * Since we may be in a critical section from cpu_idle(), if
+ * an interrupt fires during that critical section we may have
+ * a pending preemption. If the CPU halts, then that thread
+ * may not execute until a later interrupt awakens the CPU.
+ * To handle this race, check for a runnable thread after
+ * disabling interrupts and immediately return if one is
+ * found. Also, we must absolutely guarentee that hlt is
+ * the next instruction after sti. This ensures that any
+ * interrupt that fires after the call to disable_intr() will
+ * immediately awaken the CPU from hlt. Finally, please note
+ * that on x86 this works fine because of interrupts enabled only
+ * after the instruction following sti takes place, while IF is set
+ * to 1 immediately, allowing hlt instruction to acknowledge the
+ * interrupt.
*/
disable_intr();
- if (sched_runnable())
+ if (sched_runnable())
enable_intr();
else
__asm __volatile("sti; hlt");
+ *state = STATE_RUNNING;
}
#endif
+/*
+ * MWAIT cpu power states. Lower 4 bits are sub-states.
+ */
+#define MWAIT_C0 0xf0
+#define MWAIT_C1 0x00
+#define MWAIT_C2 0x10
+#define MWAIT_C3 0x20
+#define MWAIT_C4 0x30
+
static void
-cpu_idle_acpi(int busy)
+cpu_idle_mwait(int busy)
{
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_MWAIT;
+
+ /* See comments in cpu_idle_hlt(). */
disable_intr();
- if (sched_runnable())
+ if (sched_runnable()) {
enable_intr();
- else if (cpu_idle_hook)
- cpu_idle_hook();
+ *state = STATE_RUNNING;
+ return;
+ }
+ cpu_monitor(state, 0, 0);
+ if (*state == STATE_MWAIT)
+ __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0));
else
- __asm __volatile("sti; hlt");
+ enable_intr();
+ *state = STATE_RUNNING;
}
-static int cpu_ident_amdc1e = 0;
-
-#if !defined(XEN) || defined(XEN_PRIVILEGED)
-static int
-cpu_probe_amdc1e(void)
-{
-#ifdef DEV_APIC
+static void
+cpu_idle_spin(int busy)
+{
+ int *state;
int i;
- /*
- * Forget it, if we're not using local APIC timer.
- */
- if (resource_disabled("apic", 0) ||
- (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0))
- return (0);
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_RUNNING;
/*
- * Detect the presence of C1E capability mostly on latest
- * dual-cores (or future) k8 family.
- */
- if (cpu_vendor_id == CPU_VENDOR_AMD &&
- (cpu_id & 0x00000f00) == 0x00000f00 &&
- (cpu_id & 0x0fff0000) >= 0x00040000) {
- cpu_ident_amdc1e = 1;
- return (1);
+ * The sched_runnable() call is racy but as long as there is
+ * a loop missing it one time will have just a little impact if any
+ * (and it is much better than missing the check at all).
+ */
+ for (i = 0; i < 1000; i++) {
+ if (sched_runnable())
+ return;
+ cpu_spinwait();
}
-#endif
- return (0);
}
-#endif
/*
* C1E renders the local APIC timer dead, so we disable it by
@@ -1283,32 +1341,20 @@ cpu_probe_amdc1e(void)
#define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
static void
-cpu_idle_amdc1e(int busy)
+cpu_probe_amdc1e(void)
{
- disable_intr();
- if (sched_runnable())
- enable_intr();
- else {
- uint64_t msr;
-
- msr = rdmsr(MSR_AMDK8_IPM);
- if (msr & AMDK8_CMPHALT)
- wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
-
- if (cpu_idle_hook)
- cpu_idle_hook();
- else
- __asm __volatile("sti; hlt");
+ /*
+ * Detect the presence of C1E capability mostly on latest
+ * dual-cores (or future) k8 family.
+ */
+ if (cpu_vendor_id == CPU_VENDOR_AMD &&
+ (cpu_id & 0x00000f00) == 0x00000f00 &&
+ (cpu_id & 0x0fff0000) >= 0x00040000) {
+ cpu_ident_amdc1e = 1;
}
}
-static void
-cpu_idle_spin(int busy)
-{
- return;
-}
-
#ifdef XEN
void (*cpu_idle_fn)(int) = cpu_idle_hlt;
#else
@@ -1318,79 +1364,51 @@ void (*cpu_idle_fn)(int) = cpu_idle_acpi
void
cpu_idle(int busy)
{
+#ifndef XEN
+ uint64_t msr;
+#endif
+
#if defined(SMP) && !defined(XEN)
if (mp_grab_cpu_hlt())
return;
#endif
- cpu_idle_fn(busy);
-}
-
-/*
- * mwait cpu power states. Lower 4 bits are sub-states.
- */
-#define MWAIT_C0 0xf0
-#define MWAIT_C1 0x00
-#define MWAIT_C2 0x10
-#define MWAIT_C3 0x20
-#define MWAIT_C4 0x30
-
-#define MWAIT_DISABLED 0x0
-#define MWAIT_WOKEN 0x1
-#define MWAIT_WAITING 0x2
-
-static void
-cpu_idle_mwait(int busy)
-{
- int *mwait;
-
- mwait = (int *)PCPU_PTR(monitorbuf);
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
- return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
-}
-
-static void
-cpu_idle_mwait_hlt(int busy)
-{
- int *mwait;
+#ifndef XEN
+ /* If we are busy - try to use fast methods. */
+ if (busy) {
+ if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
+ cpu_idle_mwait(busy);
+ return;
+ }
+ }
- mwait = (int *)PCPU_PTR(monitorbuf);
- if (busy == 0) {
- *mwait = MWAIT_DISABLED;
- cpu_idle_hlt(busy);
- return;
+ /* Apply AMD APIC timer C1E workaround. */
+ if (cpu_ident_amdc1e) {
+ msr = rdmsr(MSR_AMDK8_IPM);
+ if (msr & AMDK8_CMPHALT)
+ wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
}
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
- return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
+#endif
+
+ /* Call main idle method. */
+ cpu_idle_fn(busy);
}
int
cpu_idle_wakeup(int cpu)
{
struct pcpu *pcpu;
- int *mwait;
+ int *state;
- if (cpu_idle_fn == cpu_idle_spin)
- return (1);
- if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt)
- return (0);
pcpu = pcpu_find(cpu);
- mwait = (int *)pcpu->pc_monitorbuf;
+ state = (int *)pcpu->pc_monitorbuf;
/*
* This doesn't need to be atomic since missing the race will
* simply result in unnecessary IPIs.
*/
- if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED)
+ if (*state == STATE_SLEEPING)
return (0);
- *mwait = MWAIT_WOKEN;
-
+ if (*state == STATE_MWAIT)
+ *state = STATE_RUNNING;
return (1);
}
@@ -1403,8 +1421,6 @@ struct {
} idle_tbl[] = {
{ cpu_idle_spin, "spin" },
{ cpu_idle_mwait, "mwait" },
- { cpu_idle_mwait_hlt, "mwait_hlt" },
- { cpu_idle_amdc1e, "amdc1e" },
{ cpu_idle_hlt, "hlt" },
{ cpu_idle_acpi, "acpi" },
{ NULL, NULL }
@@ -1423,8 +1439,8 @@ idle_sysctl_available(SYSCTL_HANDLER_ARG
if (strstr(idle_tbl[i].id_name, "mwait") &&
(cpu_feature2 & CPUID2_MON) == 0)
continue;
- if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
- cpu_ident_amdc1e == 0)
+ if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+ cpu_idle_hook == NULL)
continue;
p += sprintf(p, "%s, ", idle_tbl[i].id_name);
}
@@ -1433,6 +1449,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARG
return (error);
}
+SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
+ 0, 0, idle_sysctl_available, "A", "list of available idle functions");
+
static int
idle_sysctl(SYSCTL_HANDLER_ARGS)
{
@@ -1456,8 +1475,8 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
if (strstr(idle_tbl[i].id_name, "mwait") &&
(cpu_feature2 & CPUID2_MON) == 0)
continue;
- if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
- cpu_ident_amdc1e == 0)
+ if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+ cpu_idle_hook == NULL)
continue;
if (strcmp(idle_tbl[i].id_name, buf))
continue;
@@ -1467,9 +1486,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
return (EINVAL);
}
-SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
- 0, 0, idle_sysctl_available, "A", "list of available idle functions");
-
SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
idle_sysctl, "A", "currently selected idle function");
@@ -2723,8 +2739,7 @@ init386(first)
thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
#if defined(XEN_PRIVILEGED)
- if (cpu_probe_amdc1e())
- cpu_idle_fn = cpu_idle_amdc1e;
+ cpu_probe_amdc1e();
#endif
}
@@ -3001,8 +3016,7 @@ init386(first)
thread0.td_pcb->pcb_ext = 0;
thread0.td_frame = &proc0_tf;
- if (cpu_probe_amdc1e())
- cpu_idle_fn = cpu_idle_amdc1e;
+ cpu_probe_amdc1e();
}
#endif
Modified: stable/8/sys/pc98/pc98/machdep.c
==============================================================================
--- stable/8/sys/pc98/pc98/machdep.c Fri Jul 20 17:51:20 2012 (r238657)
+++ stable/8/sys/pc98/pc98/machdep.c Fri Jul 20 19:35:20 2012 (r238658)
@@ -1122,40 +1122,36 @@ cpu_halt(void)
__asm__ ("hlt");
}
+static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */
+TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
+SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
+ 0, "Use MONITOR/MWAIT for short idle");
+
+#define STATE_RUNNING 0x0
+#define STATE_MWAIT 0x1
+#define STATE_SLEEPING 0x2
+
static void
cpu_idle_hlt(int busy)
{
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
/*
- * we must absolutely guarentee that hlt is the next instruction
+ * We must absolutely guarentee that hlt is the next instruction
* after sti or we introduce a timing window.
*/
disable_intr();
- if (sched_runnable())
+ if (sched_runnable())
enable_intr();
else
__asm __volatile("sti; hlt");
-}
-
-static void
-cpu_idle_spin(int busy)
-{
- return;
-}
-
-void (*cpu_idle_fn)(int) = cpu_idle_hlt;
-
-void
-cpu_idle(int busy)
-{
-#if defined(SMP)
- if (mp_grab_cpu_hlt())
- return;
-#endif
- cpu_idle_fn(busy);
+ *state = STATE_RUNNING;
}
/*
- * mwait cpu power states. Lower 4 bits are sub-states.
+ * MWAIT cpu power states. Lower 4 bits are sub-states.
*/
#define MWAIT_C0 0xf0
#define MWAIT_C1 0x00
@@ -1163,63 +1159,74 @@ cpu_idle(int busy)
#define MWAIT_C3 0x20
#define MWAIT_C4 0x30
-#define MWAIT_DISABLED 0x0
-#define MWAIT_WOKEN 0x1
-#define MWAIT_WAITING 0x2
-
static void
cpu_idle_mwait(int busy)
{
- int *mwait;
+ int *state;
- mwait = (int *)PCPU_PTR(monitorbuf);
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
- return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_MWAIT;
+ if (!sched_runnable()) {
+ cpu_monitor(state, 0, 0);
+ if (*state == STATE_MWAIT)
+ cpu_mwait(0, MWAIT_C1);
+ }
+ *state = STATE_RUNNING;
}
static void
-cpu_idle_mwait_hlt(int busy)
+cpu_idle_spin(int busy)
+{
+ int *state;
+ int i;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_RUNNING;
+ for (i = 0; i < 1000; i++) {
+ if (sched_runnable())
+ return;
+ cpu_spinwait();
+ }
+}
+
+void (*cpu_idle_fn)(int) = cpu_idle_hlt;
+
+void
+cpu_idle(int busy)
{
- int *mwait;
- mwait = (int *)PCPU_PTR(monitorbuf);
- if (busy == 0) {
- *mwait = MWAIT_DISABLED;
- cpu_idle_hlt(busy);
+#ifdef SMP
+ if (mp_grab_cpu_hlt())
return;
+#endif
+ /* If we are busy - try to use fast methods. */
+ if (busy) {
+ if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
+ cpu_idle_mwait(busy);
+ return;
+ }
}
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
- return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
+
+ /* Call main idle method. */
+ cpu_idle_fn(busy);
}
int
cpu_idle_wakeup(int cpu)
{
struct pcpu *pcpu;
- int *mwait;
+ int *state;
- if (cpu_idle_fn == cpu_idle_spin)
- return (1);
- if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt)
- return (0);
pcpu = pcpu_find(cpu);
- mwait = (int *)pcpu->pc_monitorbuf;
+ state = (int *)pcpu->pc_monitorbuf;
/*
* This doesn't need to be atomic since missing the race will
* simply result in unnecessary IPIs.
*/
- if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED)
+ if (*state == STATE_SLEEPING)
return (0);
- *mwait = MWAIT_WOKEN;
-
+ if (*state == STATE_MWAIT)
+ *state = STATE_RUNNING;
return (1);
}
@@ -1232,7 +1239,6 @@ struct {
} idle_tbl[] = {
{ cpu_idle_spin, "spin" },
{ cpu_idle_mwait, "mwait" },
- { cpu_idle_mwait_hlt, "mwait_hlt" },
{ cpu_idle_hlt, "hlt" },
{ NULL, NULL }
};
@@ -1257,6 +1263,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARG
return (error);
}
+SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
+ 0, 0, idle_sysctl_available, "A", "list of available idle functions");
+
static int
idle_sysctl(SYSCTL_HANDLER_ARGS)
{
@@ -1288,9 +1297,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
return (EINVAL);
}
-SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
- 0, 0, idle_sysctl_available, "A", "list of available idle functions");
-
SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
idle_sysctl, "A", "currently selected idle function");
More information about the svn-src-all
mailing list