svn commit: r206162 - in stable/8/sys/sparc64: include sparc64
Marius Strobl
marius at FreeBSD.org
Sun Apr 4 14:57:47 UTC 2010
Author: marius
Date: Sun Apr 4 14:57:46 2010
New Revision: 206162
URL: http://svn.freebsd.org/changeset/base/206162
Log:
MFC: r205269
o Add support for UltraSparc-IV+:
- Swap the configuration of the first and second large dTLB as with
US-IV+ these can only hold entries of certain page sizes each, which
we happened to chose the non-working way around.
- Additionally ensure that the large iTLB is set up to hold 8k pages
(currently this happens to be a NOP though).
- Add a workaround for US-IV+ erratum #2.
- Turn off dTLB parity error reporting as otherwise we get seemingly
false positives when copying in the user window by simulating a
fill trap on return to usermode. Given that these parity errors can
be avoided by disabling multi issue mode and the problem could be
reproduced with a second machine this appears to be a silicon bug of
some sort.
- Add a membar #Sync also before the stores to ASI_DCACHE_TAG. While
at it, turn of interrupts across the whole cheetah_cache_flush() for
simplicity instead of around every flush. This should have next to no
impact as for cheetah-class machines we typically only need to flush
the caches a few times during boot when recovering from peeking/poking
non-existent PCI devices, if at all.
- Just use KERNBASE for FLUSH as we also do elsewhere as the US-IV+
documentation doesn't seem to mention that these CPUs also ignore the
address like previous cheetah-class CPUs do. Again the code changing
LSU_IC is executed seldom enough that the negligible optimization of
using %g0 instead should have no real impact.
With these changes FreeBSD runs stable on V890 equipped with US-IV+
and -j128 buildworlds in a loop for days are no problem. Unfortunately,
the performance isn't were it should be as a buildworld on a 4x1.5GHz
US-IV+ V890 takes nearly 3h while on a V440 with (theoretically) less
powerfull 4x1.5GHz US-IIIi it takes just over 1h. It's unclear whether
this is related to the supposed silicon bug mentioned above or due to
another issue. The documentation (which contains a sever bug in the
description of the bits added to the context registers though) at least
doesn't mention any requirements for changes in the CPU handling besides
those implemented and the cache as well as the TLB configurations and
handling look fine.
o Re-arrange cheetah_init() so it's easier to add support for SPARC64
V up to VIIIfx CPUs, which only require parts of this initialization.
Committed from: EH2010
Modified:
stable/8/sys/sparc64/include/dcr.h
stable/8/sys/sparc64/sparc64/cheetah.c
Directory Properties:
stable/8/sys/ (props changed)
stable/8/sys/amd64/include/xen/ (props changed)
stable/8/sys/cddl/contrib/opensolaris/ (props changed)
stable/8/sys/contrib/dev/acpica/ (props changed)
stable/8/sys/contrib/pf/ (props changed)
stable/8/sys/dev/xen/xenpci/ (props changed)
Modified: stable/8/sys/sparc64/include/dcr.h
==============================================================================
--- stable/8/sys/sparc64/include/dcr.h Sun Apr 4 14:57:45 2010 (r206161)
+++ stable/8/sys/sparc64/include/dcr.h Sun Apr 4 14:57:46 2010 (r206162)
@@ -57,6 +57,10 @@
#define DCR_BPM_BITS 2
#define DCR_BPM_MASK \
(((1UL << DCR_BPM_BITS) - 1) << DCR_BPM_SHIFT)
+#define DCR_BPM_1HIST_GSHARE (0UL << DCR_BPM_SHIFT)
+#define DCR_BPM_2HIST_GSHARE (1UL << DCR_BPM_SHIFT)
+#define DCR_BPM_PC (2UL << DCR_BPM_SHIFT)
+#define DCR_BPM_2HIST_MIXED (3UL << DCR_BPM_SHIFT)
#define DCR_JPE (1UL << 15)
#define DCR_ITPE (1UL << 16)
Modified: stable/8/sys/sparc64/sparc64/cheetah.c
==============================================================================
--- stable/8/sys/sparc64/sparc64/cheetah.c Sun Apr 4 14:57:45 2010 (r206161)
+++ stable/8/sys/sparc64/sparc64/cheetah.c Sun Apr 4 14:57:46 2010 (r206162)
@@ -1,6 +1,6 @@
/*-
* Copyright (c) 2003 Jake Burkholder.
- * Copyright (c) 2005, 2008 Marius Strobl <marius at FreeBSD.org>
+ * Copyright (c) 2005, 2008, 2010 Marius Strobl <marius at FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,9 +49,6 @@ __FBSDID("$FreeBSD$");
#include <machine/ver.h>
#include <machine/vmparam.h>
-/* A FLUSH is required after changing LSU_IC (the address is ignored). */
-#define CHEETAH_FLUSH_LSU_IC() __asm __volatile("flush %%g0" : :)
-
#define CHEETAH_ICACHE_TAG_LOWER 0x30
/*
@@ -60,6 +57,7 @@ __FBSDID("$FreeBSD$");
void
cheetah_init(u_int cpu_impl)
{
+ u_long val;
register_t s;
/*
@@ -68,14 +66,6 @@ cheetah_init(u_int cpu_impl)
*/
s = intr_disable();
- /*
- * Ensure DCR_IFPOE is disabled as long as we haven't implemented
- * support for it (if ever) as most if not all firmware versions
- * apparently turn it on. Not making use of DCR_IFPOE should also
- * avoid Cheetah erratum #109.
- */
- wr(asr18, rd(asr18) & ~DCR_IFPOE, 0);
-
/* Ensure the TSB Extension Registers hold 0 as TSB_Base. */
stxa(AA_DMMU_TSB_PEXT_REG, ASI_DMMU, 0);
@@ -93,25 +83,58 @@ cheetah_init(u_int cpu_impl)
membar(Sync);
/*
- * Ensure that the dt512_0 is set to hold 8k pages for all three
- * contexts and configure the dt512_1 to hold 4MB pages for them
- * (e.g. for direct mappings).
- * NB: according to documentation, this requires a contex demap
- * _before_ changing the corresponding page size, but we hardly
- * can flush our locked pages here, so we use a demap all instead.
+ * Configure the first large dTLB to hold 4MB pages (e.g. for direct
+ * mappings) for all three contexts and ensure the second one is set
+ * up to hold 8k pages for them. Note that this is constraint by
+ * US-IV+, whose large dTLBs can only hold entries of certain page
+ * sizes each.
+ * For US-IV+, additionally ensure that the large iTLB is set up to
+ * hold 8k pages for nucleus and primary context (still no secondary
+ * iMMU context.
+ * NB: according to documentation, changing the page size of the same
+ * context requires a context demap before changing the corresponding
+ * page size, but we hardly can flush our locked pages here, so we use
+ * a demap all instead.
*/
stxa(TLB_DEMAP_ALL, ASI_DMMU_DEMAP, 0);
membar(Sync);
- stxa(AA_DMMU_PCXR, ASI_DMMU,
- (TS_8K << TLB_PCXR_N_PGSZ0_SHIFT) |
- (TS_4M << TLB_PCXR_N_PGSZ1_SHIFT) |
- (TS_8K << TLB_PCXR_P_PGSZ0_SHIFT) |
- (TS_4M << TLB_PCXR_P_PGSZ1_SHIFT));
- stxa(AA_DMMU_SCXR, ASI_DMMU,
- (TS_8K << TLB_SCXR_S_PGSZ0_SHIFT) |
- (TS_4M << TLB_SCXR_S_PGSZ1_SHIFT));
+ val = (TS_4M << TLB_PCXR_N_PGSZ0_SHIFT) |
+ (TS_8K << TLB_PCXR_N_PGSZ1_SHIFT) |
+ (TS_4M << TLB_PCXR_P_PGSZ0_SHIFT) |
+ (TS_8K << TLB_PCXR_P_PGSZ1_SHIFT);
+ if (cpu_impl == CPU_IMPL_ULTRASPARCIVp)
+ val |= (TS_8K << TLB_PCXR_N_PGSZ_I_SHIFT) |
+ (TS_8K << TLB_PCXR_P_PGSZ_I_SHIFT);
+ stxa(AA_DMMU_PCXR, ASI_DMMU, val);
+ val = (TS_4M << TLB_SCXR_S_PGSZ0_SHIFT) |
+ (TS_8K << TLB_SCXR_S_PGSZ1_SHIFT);
+ stxa(AA_DMMU_SCXR, ASI_DMMU, val);
flush(KERNBASE);
+ /*
+ * Ensure DCR_IFPOE is disabled as long as we haven't implemented
+ * support for it (if ever) as most if not all firmware versions
+ * apparently turn it on. Not making use of DCR_IFPOE should also
+ * avoid Cheetah erratum #109.
+ */
+ val = rd(asr18) & ~DCR_IFPOE;
+ if (cpu_impl == CPU_IMPL_ULTRASPARCIVp) {
+ /*
+ * Ensure the branch prediction mode is set to PC indexing
+ * in order to work around US-IV+ erratum #2.
+ */
+ val = (val & ~DCR_BPM_MASK) | DCR_BPM_PC;
+ /*
+ * XXX disable dTLB parity error reporting as otherwise we
+ * get seemingly false positives when copying in the user
+ * window by simulating a fill trap on return to usermode in
+ * case single issue is disabled, which thus appears to be
+ * a CPU bug.
+ */
+ val &= ~DCR_DTPE;
+ }
+ wr(asr18, val, 0);
+
intr_restore(s);
}
@@ -125,11 +148,11 @@ cheetah_cache_enable(u_int cpu_impl)
lsu = ldxa(0, ASI_LSU_CTL_REG);
if (cpu_impl == CPU_IMPL_ULTRASPARCIII) {
- /* Disable P$ due to Cheetah erratum #18. */
+ /* Disable P$ due to US-III erratum #18. */
lsu &= ~LSU_PE;
}
stxa(0, ASI_LSU_CTL_REG, lsu | LSU_IC | LSU_DC);
- CHEETAH_FLUSH_LSU_IC();
+ flush(KERNBASE);
}
/*
@@ -139,21 +162,35 @@ void
cheetah_cache_flush(void)
{
u_long addr, lsu;
+ register_t s;
+ s = intr_disable();
for (addr = 0; addr < PCPU_GET(cache.dc_size);
addr += PCPU_GET(cache.dc_linesize))
- stxa_sync(addr, ASI_DCACHE_TAG, 0);
+ /*
+ * Note that US-IV+ additionally require a membar #Sync before
+ * a load or store to ASI_DCACHE_TAG.
+ */
+ __asm __volatile(
+ "membar #Sync;"
+ "stxa %%g0, [%0] %1;"
+ "membar #Sync"
+ : : "r" (addr), "n" (ASI_DCACHE_TAG));
/* The I$ must be disabled when flushing it so ensure it's off. */
lsu = ldxa(0, ASI_LSU_CTL_REG);
stxa(0, ASI_LSU_CTL_REG, lsu & ~(LSU_IC));
- CHEETAH_FLUSH_LSU_IC();
+ flush(KERNBASE);
for (addr = CHEETAH_ICACHE_TAG_LOWER;
addr < PCPU_GET(cache.ic_size) * 2;
addr += PCPU_GET(cache.ic_linesize) * 2)
- stxa_sync(addr, ASI_ICACHE_TAG, 0);
+ __asm __volatile(
+ "stxa %%g0, [%0] %1;"
+ "membar #Sync"
+ : : "r" (addr), "n" (ASI_ICACHE_TAG));
stxa(0, ASI_LSU_CTL_REG, lsu);
- CHEETAH_FLUSH_LSU_IC();
+ flush(KERNBASE);
+ intr_restore(s);
}
/*
@@ -165,9 +202,11 @@ cheetah_dcache_page_inval(vm_paddr_t spa
vm_paddr_t pa;
void *cookie;
- KASSERT((spa & PAGE_MASK) == 0, ("%s: pa not page aligned", __func__));
+ KASSERT((spa & PAGE_MASK) == 0,
+ ("%s: pa not page aligned", __func__));
cookie = ipi_dcache_page_inval(tl_ipi_cheetah_dcache_page_inval, spa);
- for (pa = spa; pa < spa + PAGE_SIZE; pa += PCPU_GET(cache.dc_linesize))
+ for (pa = spa; pa < spa + PAGE_SIZE;
+ pa += PCPU_GET(cache.dc_linesize))
stxa_sync(pa, ASI_DCACHE_INVALIDATE, 0);
ipi_wait(cookie);
}
More information about the svn-src-stable-8
mailing list