svn commit: r206462 - in stable/7/sys: amd64/amd64 amd64/include
i386/i386 i386/include
Alan Cox
alc at FreeBSD.org
Sat Apr 10 22:24:04 UTC 2010
Author: alc
Date: Sat Apr 10 22:24:03 2010
New Revision: 206462
URL: http://svn.freebsd.org/changeset/base/206462
Log:
MFC r204907, r204913, r205402, r205573, r205778
Implement AMD's recommended workaround for Erratum 383 on Family 10h
processors.
Enable machine check exceptions by default.
Modified:
stable/7/sys/amd64/amd64/mca.c
stable/7/sys/amd64/amd64/pmap.c
stable/7/sys/amd64/include/md_var.h
stable/7/sys/amd64/include/specialreg.h
stable/7/sys/i386/i386/mca.c
stable/7/sys/i386/i386/pmap.c
stable/7/sys/i386/include/md_var.h
stable/7/sys/i386/include/specialreg.h
Directory Properties:
stable/7/sys/ (props changed)
stable/7/sys/cddl/contrib/opensolaris/ (props changed)
stable/7/sys/contrib/dev/acpica/ (props changed)
stable/7/sys/contrib/pf/ (props changed)
Modified: stable/7/sys/amd64/amd64/mca.c
==============================================================================
--- stable/7/sys/amd64/amd64/mca.c Sat Apr 10 22:11:01 2010 (r206461)
+++ stable/7/sys/amd64/amd64/mca.c Sat Apr 10 22:24:03 2010 (r206462)
@@ -60,11 +60,20 @@ static int mca_count; /* Number of reco
SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check Architecture");
-static int mca_enabled = 0;
+static int mca_enabled = 1;
TUNABLE_INT("hw.mca.enabled", &mca_enabled);
SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0,
"Administrative toggle for machine check support");
+static int amd10h_L1TP = 1;
+TUNABLE_INT("hw.mca.amd10h_L1TP", &amd10h_L1TP);
+SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0,
+ "Administrative toggle for logging of level one TLB parity (L1TP) errors");
+
+int workaround_erratum383;
+SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 0,
+ "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?");
+
static STAILQ_HEAD(, mca_internal) mca_records;
static struct callout mca_timer;
static int mca_ticks = 3600; /* Check hourly by default. */
@@ -527,7 +536,7 @@ void
mca_init(void)
{
uint64_t mcg_cap;
- uint64_t ctl;
+ uint64_t ctl, mask;
int skip;
int i;
@@ -535,6 +544,15 @@ mca_init(void)
if (!mca_enabled || !(cpu_feature & CPUID_MCE))
return;
+ /*
+ * On AMD Family 10h processors, unless logging of level one TLB
+ * parity (L1TP) errors is disabled, enable the recommended workaround
+ * for Erratum 383.
+ */
+ if (cpu_vendor_id == CPU_VENDOR_AMD &&
+ CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP)
+ workaround_erratum383 = 1;
+
if (cpu_feature & CPUID_MCA) {
if (PCPU_GET(cpuid) == 0)
mca_setup();
@@ -545,6 +563,19 @@ mca_init(void)
/* Enable MCA features. */
wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE);
+ /*
+ * Disable logging of level one TLB parity (L1TP) errors by
+ * the data cache as an alternative workaround for AMD Family
+ * 10h Erratum 383. Unlike the recommended workaround, there
+ * is no performance penalty to this workaround. However,
+ * L1TP errors will go unreported.
+ */
+ if (cpu_vendor_id == CPU_VENDOR_AMD &&
+ CPUID_TO_FAMILY(cpu_id) == 0x10 && !amd10h_L1TP) {
+ mask = rdmsr(MSR_MC0_CTL_MASK);
+ if ((mask & (1UL << 5)) == 0)
+ wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5));
+ }
for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
/* By default enable logging of all errors. */
ctl = 0xffffffffffffffffUL;
Modified: stable/7/sys/amd64/amd64/pmap.c
==============================================================================
--- stable/7/sys/amd64/amd64/pmap.c Sat Apr 10 22:11:01 2010 (r206461)
+++ stable/7/sys/amd64/amd64/pmap.c Sat Apr 10 22:24:03 2010 (r206462)
@@ -7,7 +7,7 @@
* All rights reserved.
* Copyright (c) 2003 Peter Wemm
* All rights reserved.
- * Copyright (c) 2005-2008 Alan L. Cox <alc at cs.rice.edu>
+ * Copyright (c) 2005-2010 Alan L. Cox <alc at cs.rice.edu>
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -249,6 +249,9 @@ static void pmap_remove_entry(struct pma
static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
vm_page_t m);
+static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
+ pd_entry_t newpde);
+static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags);
static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
@@ -658,13 +661,13 @@ pmap_init(void)
pv_entry_high_water = 9 * (pv_entry_max / 10);
/*
- * Disable large page mappings by default if the kernel is running in
- * a virtual machine on an AMD Family 10h processor. This is a work-
- * around for Erratum 383.
+ * If the kernel is running in a virtual machine on an AMD Family 10h
+ * processor, then it must assume that MCA is enabled by the virtual
+ * machine monitor.
*/
if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD &&
CPUID_TO_FAMILY(cpu_id) == 0x10)
- pg_ps_enabled = 0;
+ workaround_erratum383 = 1;
/*
* Are large page mappings enabled?
@@ -809,6 +812,45 @@ pmap_cache_bits(int mode, boolean_t is_p
cache_bits |= PG_NC_PWT;
return (cache_bits);
}
+
+/*
+ * After changing the page size for the specified virtual address in the page
+ * table, flush the corresponding entries from the processor's TLB. Only the
+ * calling processor's TLB is affected.
+ *
+ * The calling thread must be pinned to a processor.
+ */
+static void
+pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
+{
+ u_long cr4;
+
+ if ((newpde & PG_PS) == 0)
+ /* Demotion: flush a specific 2MB page mapping. */
+ invlpg(va);
+ else if ((newpde & PG_G) == 0)
+ /*
+ * Promotion: flush every 4KB page mapping from the TLB
+ * because there are too many to flush individually.
+ */
+ invltlb();
+ else {
+ /*
+ * Promotion: flush every 4KB page mapping from the TLB,
+ * including any global (PG_G) mappings.
+ */
+ cr4 = rcr4();
+ load_cr4(cr4 & ~CR4_PGE);
+ /*
+ * Although preemption at this point could be detrimental to
+ * performance, it would not lead to an error. PG_G is simply
+ * ignored if CR4.PGE is clear. Moreover, in case this block
+ * is re-entered, the load_cr4() either above or below will
+ * modify CR4.PGE flushing the TLB.
+ */
+ load_cr4(cr4 | CR4_PGE);
+ }
+}
#ifdef SMP
/*
* For SMP, these functions have to use the IPI mechanism for coherence.
@@ -905,6 +947,69 @@ pmap_invalidate_cache(void)
smp_cache_flush();
sched_unpin();
}
+
+struct pde_action {
+ cpumask_t store; /* processor that updates the PDE */
+ cpumask_t invalidate; /* processors that invalidate their TLB */
+ vm_offset_t va;
+ pd_entry_t *pde;
+ pd_entry_t newpde;
+};
+
+static void
+pmap_update_pde_action(void *arg)
+{
+ struct pde_action *act = arg;
+
+ if (act->store == PCPU_GET(cpumask))
+ pde_store(act->pde, act->newpde);
+}
+
+static void
+pmap_update_pde_teardown(void *arg)
+{
+ struct pde_action *act = arg;
+
+ if ((act->invalidate & PCPU_GET(cpumask)) != 0)
+ pmap_update_pde_invalidate(act->va, act->newpde);
+}
+
+/*
+ * Change the page size for the specified virtual address in a way that
+ * prevents any possibility of the TLB ever having two entries that map the
+ * same virtual address using different page sizes. This is the recommended
+ * workaround for Erratum 383 on AMD Family 10h processors. It prevents a
+ * machine check exception for a TLB state that is improperly diagnosed as a
+ * hardware error.
+ */
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
+{
+ struct pde_action act;
+ cpumask_t active, cpumask;
+
+ sched_pin();
+ cpumask = PCPU_GET(cpumask);
+ if (pmap == kernel_pmap)
+ active = all_cpus;
+ else
+ active = pmap->pm_active;
+ if ((active & PCPU_GET(other_cpus)) != 0) {
+ act.store = cpumask;
+ act.invalidate = active;
+ act.va = va;
+ act.pde = pde;
+ act.newpde = newpde;
+ smp_rendezvous_cpus(cpumask | active,
+ smp_no_rendevous_barrier, pmap_update_pde_action,
+ pmap_update_pde_teardown, &act);
+ } else {
+ pde_store(pde, newpde);
+ if ((active & cpumask) != 0)
+ pmap_update_pde_invalidate(va, newpde);
+ }
+ sched_unpin();
+}
#else /* !SMP */
/*
* Normal, non-SMP, invalidation functions.
@@ -942,6 +1047,15 @@ pmap_invalidate_cache(void)
wbinvd();
}
+
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
+{
+
+ pde_store(pde, newpde);
+ if (pmap == kernel_pmap || pmap->pm_active)
+ pmap_update_pde_invalidate(va, newpde);
+}
#endif /* !SMP */
static void
@@ -2324,7 +2438,10 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t
* processor changing the setting of PG_A and/or PG_M between
* the read above and the store below.
*/
- pde_store(pde, newpde);
+ if (workaround_erratum383)
+ pmap_update_pde(pmap, va, pde, newpde);
+ else
+ pde_store(pde, newpde);
/*
* Invalidate a stale recursive mapping of the page table page.
@@ -2940,7 +3057,10 @@ setpte:
/*
* Map the superpage.
*/
- pde_store(pde, PG_PS | newpde);
+ if (workaround_erratum383)
+ pmap_update_pde(pmap, va, pde, PG_PS | newpde);
+ else
+ pde_store(pde, PG_PS | newpde);
pmap_pde_promotions++;
CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx"
Modified: stable/7/sys/amd64/include/md_var.h
==============================================================================
--- stable/7/sys/amd64/include/md_var.h Sat Apr 10 22:11:01 2010 (r206461)
+++ stable/7/sys/amd64/include/md_var.h Sat Apr 10 22:24:03 2010 (r206462)
@@ -61,6 +61,7 @@ extern char sigcode[];
extern int szsigcode;
extern uint64_t *vm_page_dump;
extern int vm_page_dump_size;
+extern int workaround_erratum383;
typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
struct thread;
Modified: stable/7/sys/amd64/include/specialreg.h
==============================================================================
--- stable/7/sys/amd64/include/specialreg.h Sat Apr 10 22:11:01 2010 (r206461)
+++ stable/7/sys/amd64/include/specialreg.h Sat Apr 10 22:24:03 2010 (r206462)
@@ -499,6 +499,7 @@
#define MSR_TOP_MEM 0xc001001a /* boundary for ram below 4G */
#define MSR_TOP_MEM2 0xc001001d /* boundary for ram above 4G */
#define MSR_K8_UCODE_UPDATE 0xc0010020 /* update microcode */
+#define MSR_MC0_CTL_MASK 0xc0010044
/* VIA ACE crypto featureset: for via_feature_rng */
#define VIA_HAS_RNG 1 /* cpu has RNG */
Modified: stable/7/sys/i386/i386/mca.c
==============================================================================
--- stable/7/sys/i386/i386/mca.c Sat Apr 10 22:11:01 2010 (r206461)
+++ stable/7/sys/i386/i386/mca.c Sat Apr 10 22:24:03 2010 (r206462)
@@ -60,11 +60,20 @@ static int mca_count; /* Number of reco
SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check Architecture");
-static int mca_enabled = 0;
+static int mca_enabled = 1;
TUNABLE_INT("hw.mca.enabled", &mca_enabled);
SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0,
"Administrative toggle for machine check support");
+static int amd10h_L1TP = 1;
+TUNABLE_INT("hw.mca.amd10h_L1TP", &amd10h_L1TP);
+SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0,
+ "Administrative toggle for logging of level one TLB parity (L1TP) errors");
+
+int workaround_erratum383;
+SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 0,
+ "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?");
+
static STAILQ_HEAD(, mca_internal) mca_records;
static struct callout mca_timer;
static int mca_ticks = 3600; /* Check hourly by default. */
@@ -527,7 +536,7 @@ void
mca_init(void)
{
uint64_t mcg_cap;
- uint64_t ctl;
+ uint64_t ctl, mask;
int skip;
int i;
@@ -535,6 +544,15 @@ mca_init(void)
if (!mca_enabled || !(cpu_feature & CPUID_MCE))
return;
+ /*
+ * On AMD Family 10h processors, unless logging of level one TLB
+ * parity (L1TP) errors is disabled, enable the recommended workaround
+ * for Erratum 383.
+ */
+ if (cpu_vendor_id == CPU_VENDOR_AMD &&
+ CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP)
+ workaround_erratum383 = 1;
+
if (cpu_feature & CPUID_MCA) {
if (PCPU_GET(cpuid) == 0)
mca_setup();
@@ -545,6 +563,19 @@ mca_init(void)
/* Enable MCA features. */
wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE);
+ /*
+ * Disable logging of level one TLB parity (L1TP) errors by
+ * the data cache as an alternative workaround for AMD Family
+ * 10h Erratum 383. Unlike the recommended workaround, there
+ * is no performance penalty to this workaround. However,
+ * L1TP errors will go unreported.
+ */
+ if (cpu_vendor_id == CPU_VENDOR_AMD &&
+ CPUID_TO_FAMILY(cpu_id) == 0x10 && !amd10h_L1TP) {
+ mask = rdmsr(MSR_MC0_CTL_MASK);
+ if ((mask & (1UL << 5)) == 0)
+ wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5));
+ }
for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
/* By default enable logging of all errors. */
ctl = 0xffffffffffffffffUL;
Modified: stable/7/sys/i386/i386/pmap.c
==============================================================================
--- stable/7/sys/i386/i386/pmap.c Sat Apr 10 22:11:01 2010 (r206461)
+++ stable/7/sys/i386/i386/pmap.c Sat Apr 10 22:24:03 2010 (r206462)
@@ -5,7 +5,7 @@
* All rights reserved.
* Copyright (c) 1994 David Greenman
* All rights reserved.
- * Copyright (c) 2005-2008 Alan L. Cox <alc at cs.rice.edu>
+ * Copyright (c) 2005-2010 Alan L. Cox <alc at cs.rice.edu>
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -290,6 +290,7 @@ static void pmap_insert_pt_page(pmap_t p
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
+static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde);
static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
@@ -308,6 +309,9 @@ static void pmap_remove_entry(struct pma
static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
vm_page_t m);
+static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
+ pd_entry_t newpde);
+static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
@@ -392,6 +396,13 @@ pmap_bootstrap(vm_paddr_t firstaddr)
kernel_pmap->pm_active = -1; /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
LIST_INIT(&allpmaps);
+
+ /*
+ * Request a spin mutex so that changes to allpmaps cannot be
+ * preempted by smp_rendezvous_cpus(). Otherwise,
+ * pmap_update_pde_kernel() could access allpmaps while it is
+ * being changed.
+ */
mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
mtx_lock_spin(&allpmaps_lock);
LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
@@ -672,19 +683,21 @@ pmap_init(void)
pv_entry_high_water = 9 * (pv_entry_max / 10);
/*
- * Disable large page mappings by default if the kernel is running in
- * a virtual machine on an AMD Family 10h processor. This is a work-
- * around for Erratum 383.
+ * If the kernel is running in a virtual machine on an AMD Family 10h
+ * processor, then it must assume that MCA is enabled by the virtual
+ * machine monitor.
*/
if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD &&
CPUID_TO_FAMILY(cpu_id) == 0x10)
- pg_ps_enabled = 0;
+ workaround_erratum383 = 1;
/*
- * Are large page mappings enabled?
+ * Are large page mappings supported and enabled?
*/
TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
- if (pg_ps_enabled) {
+ if (pseflag == 0)
+ pg_ps_enabled = 0;
+ else if (pg_ps_enabled) {
KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
("pmap_init: can't assign to pagesizes[1]"));
pagesizes[1] = NBPDR;
@@ -826,6 +839,69 @@ pmap_cache_bits(int mode, boolean_t is_p
cache_bits |= PG_NC_PWT;
return (cache_bits);
}
+
+/*
+ * The caller is responsible for maintaining TLB consistency.
+ */
+static void
+pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde)
+{
+ pd_entry_t *pde;
+ pmap_t pmap;
+ boolean_t PTD_updated;
+
+ PTD_updated = FALSE;
+ mtx_lock_spin(&allpmaps_lock);
+ LIST_FOREACH(pmap, &allpmaps, pm_list) {
+ if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] &
+ PG_FRAME))
+ PTD_updated = TRUE;
+ pde = pmap_pde(pmap, va);
+ pde_store(pde, newpde);
+ }
+ mtx_unlock_spin(&allpmaps_lock);
+ KASSERT(PTD_updated,
+ ("pmap_kenter_pde: current page table is not in allpmaps"));
+}
+
+/*
+ * After changing the page size for the specified virtual address in the page
+ * table, flush the corresponding entries from the processor's TLB. Only the
+ * calling processor's TLB is affected.
+ *
+ * The calling thread must be pinned to a processor.
+ */
+static void
+pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
+{
+ u_long cr4;
+
+ if ((newpde & PG_PS) == 0)
+ /* Demotion: flush a specific 2MB page mapping. */
+ invlpg(va);
+ else if ((newpde & PG_G) == 0)
+ /*
+ * Promotion: flush every 4KB page mapping from the TLB
+ * because there are too many to flush individually.
+ */
+ invltlb();
+ else {
+ /*
+ * Promotion: flush every 4KB page mapping from the TLB,
+ * including any global (PG_G) mappings.
+ */
+ cr4 = rcr4();
+ load_cr4(cr4 & ~CR4_PGE);
+ /*
+ * Although preemption at this point could be detrimental to
+ * performance, it would not lead to an error. PG_G is simply
+ * ignored if CR4.PGE is clear. Moreover, in case this block
+ * is re-entered, the load_cr4() either above or below will
+ * modify CR4.PGE flushing the TLB.
+ */
+ load_cr4(cr4 | CR4_PGE);
+ }
+}
#ifdef SMP
/*
* For SMP, these functions have to use the IPI mechanism for coherence.
@@ -922,6 +998,92 @@ pmap_invalidate_cache(void)
smp_cache_flush();
sched_unpin();
}
+
+struct pde_action {
+ cpumask_t store; /* processor that updates the PDE */
+ cpumask_t invalidate; /* processors that invalidate their TLB */
+ vm_offset_t va;
+ pd_entry_t *pde;
+ pd_entry_t newpde;
+};
+
+static void
+pmap_update_pde_kernel(void *arg)
+{
+ struct pde_action *act = arg;
+ pd_entry_t *pde;
+ pmap_t pmap;
+
+ if (act->store == PCPU_GET(cpumask))
+ /*
+ * Elsewhere, this operation requires allpmaps_lock for
+ * synchronization. Here, it does not because it is being
+ * performed in the context of an all_cpus rendezvous.
+ */
+ LIST_FOREACH(pmap, &allpmaps, pm_list) {
+ pde = pmap_pde(pmap, act->va);
+ pde_store(pde, act->newpde);
+ }
+}
+
+static void
+pmap_update_pde_user(void *arg)
+{
+ struct pde_action *act = arg;
+
+ if (act->store == PCPU_GET(cpumask))
+ pde_store(act->pde, act->newpde);
+}
+
+static void
+pmap_update_pde_teardown(void *arg)
+{
+ struct pde_action *act = arg;
+
+ if ((act->invalidate & PCPU_GET(cpumask)) != 0)
+ pmap_update_pde_invalidate(act->va, act->newpde);
+}
+
+/*
+ * Change the page size for the specified virtual address in a way that
+ * prevents any possibility of the TLB ever having two entries that map the
+ * same virtual address using different page sizes. This is the recommended
+ * workaround for Erratum 383 on AMD Family 10h processors. It prevents a
+ * machine check exception for a TLB state that is improperly diagnosed as a
+ * hardware error.
+ */
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
+{
+ struct pde_action act;
+ cpumask_t active, cpumask;
+
+ sched_pin();
+ cpumask = PCPU_GET(cpumask);
+ if (pmap == kernel_pmap)
+ active = all_cpus;
+ else
+ active = pmap->pm_active;
+ if ((active & PCPU_GET(other_cpus)) != 0) {
+ act.store = cpumask;
+ act.invalidate = active;
+ act.va = va;
+ act.pde = pde;
+ act.newpde = newpde;
+ smp_rendezvous_cpus(cpumask | active,
+ smp_no_rendevous_barrier, pmap == kernel_pmap ?
+ pmap_update_pde_kernel : pmap_update_pde_user,
+ pmap_update_pde_teardown, &act);
+ } else {
+ if (pmap == kernel_pmap)
+ pmap_kenter_pde(va, newpde);
+ else
+ pde_store(pde, newpde);
+ if ((active & cpumask) != 0)
+ pmap_update_pde_invalidate(va, newpde);
+ }
+ sched_unpin();
+}
#else /* !SMP */
/*
* Normal, non-SMP, 486+ invalidation functions.
@@ -959,6 +1121,18 @@ pmap_invalidate_cache(void)
wbinvd();
}
+
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
+{
+
+ if (pmap == kernel_pmap)
+ pmap_kenter_pde(va, newpde);
+ else
+ pde_store(pde, newpde);
+ if (pmap == kernel_pmap || pmap->pm_active)
+ pmap_update_pde_invalidate(va, newpde);
+}
#endif /* !SMP */
void
@@ -1833,12 +2007,9 @@ SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTL
void
pmap_growkernel(vm_offset_t addr)
{
- struct pmap *pmap;
vm_paddr_t ptppaddr;
vm_page_t nkpg;
pd_entry_t newpdir;
- pt_entry_t *pde;
- boolean_t updated_PTD;
mtx_assert(&kernel_map->system_mtx, MA_OWNED);
if (kernel_vm_end == 0) {
@@ -1880,18 +2051,7 @@ pmap_growkernel(vm_offset_t addr)
newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
pdir_pde(KPTD, kernel_vm_end) = pgeflag | newpdir;
- updated_PTD = FALSE;
- mtx_lock_spin(&allpmaps_lock);
- LIST_FOREACH(pmap, &allpmaps, pm_list) {
- if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] &
- PG_FRAME))
- updated_PTD = TRUE;
- pde = pmap_pde(pmap, kernel_vm_end);
- pde_store(pde, newpdir);
- }
- mtx_unlock_spin(&allpmaps_lock);
- KASSERT(updated_PTD,
- ("pmap_growkernel: current page table is not in allpmaps"));
+ pmap_kenter_pde(kernel_vm_end, newpdir);
kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
if (kernel_vm_end - 1 >= kernel_map->max_offset) {
kernel_vm_end = kernel_map->max_offset;
@@ -2335,7 +2495,6 @@ static boolean_t
pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
{
pd_entry_t newpde, oldpde;
- pmap_t allpmaps_entry;
pt_entry_t *firstpte, newpte;
vm_paddr_t mptepa;
vm_page_t free, mpte;
@@ -2441,25 +2600,11 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t
* processor changing the setting of PG_A and/or PG_M between
* the read above and the store below.
*/
- if (pmap == kernel_pmap) {
- /*
- * A harmless race exists between this loop and the bcopy()
- * in pmap_pinit() that initializes the kernel segment of
- * the new page table directory. Specifically, that bcopy()
- * may copy the new PDE from the PTD to the new page table
- * before this loop updates that new page table.
- */
- mtx_lock_spin(&allpmaps_lock);
- LIST_FOREACH(allpmaps_entry, &allpmaps, pm_list) {
- pde = pmap_pde(allpmaps_entry, va);
- KASSERT(*pde == newpde || (*pde & PG_PTE_PROMOTE) ==
- (oldpde & PG_PTE_PROMOTE),
- ("pmap_demote_pde: pde was %#jx, expected %#jx",
- (uintmax_t)*pde, (uintmax_t)oldpde));
- pde_store(pde, newpde);
- }
- mtx_unlock_spin(&allpmaps_lock);
- } else
+ if (workaround_erratum383)
+ pmap_update_pde(pmap, va, pde, newpde);
+ else if (pmap == kernel_pmap)
+ pmap_kenter_pde(va, newpde);
+ else
pde_store(pde, newpde);
if (firstpte == PADDR2)
mtx_unlock(&PMAP2mutex);
@@ -2978,7 +3123,6 @@ static void
pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
{
pd_entry_t newpde;
- pmap_t allpmaps_entry;
pt_entry_t *firstpte, oldpte, pa, *pte;
vm_offset_t oldpteva;
vm_page_t mpte;
@@ -3082,14 +3226,11 @@ setpte:
/*
* Map the superpage.
*/
- if (pmap == kernel_pmap) {
- mtx_lock_spin(&allpmaps_lock);
- LIST_FOREACH(allpmaps_entry, &allpmaps, pm_list) {
- pde = pmap_pde(allpmaps_entry, va);
- pde_store(pde, PG_PS | newpde);
- }
- mtx_unlock_spin(&allpmaps_lock);
- } else
+ if (workaround_erratum383)
+ pmap_update_pde(pmap, va, pde, PG_PS | newpde);
+ else if (pmap == kernel_pmap)
+ pmap_kenter_pde(va, PG_PS | newpde);
+ else
pde_store(pde, PG_PS | newpde);
pmap_pde_promotions++;
Modified: stable/7/sys/i386/include/md_var.h
==============================================================================
--- stable/7/sys/i386/include/md_var.h Sat Apr 10 22:11:01 2010 (r206461)
+++ stable/7/sys/i386/include/md_var.h Sat Apr 10 22:24:03 2010 (r206462)
@@ -73,6 +73,7 @@ extern int szosigcode;
#endif
extern uint32_t *vm_page_dump;
extern int vm_page_dump_size;
+extern int workaround_erratum383;
typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
struct thread;
Modified: stable/7/sys/i386/include/specialreg.h
==============================================================================
--- stable/7/sys/i386/include/specialreg.h Sat Apr 10 22:11:01 2010 (r206461)
+++ stable/7/sys/i386/include/specialreg.h Sat Apr 10 22:24:03 2010 (r206462)
@@ -544,6 +544,7 @@
/* AMD64 MSR's */
#define MSR_EFER 0xc0000080 /* extended features */
#define MSR_K8_UCODE_UPDATE 0xc0010020 /* update microcode */
+#define MSR_MC0_CTL_MASK 0xc0010044
/* VIA ACE crypto featureset: for via_feature_rng */
#define VIA_HAS_RNG 1 /* cpu has RNG */
More information about the svn-src-all
mailing list