PERFORCE change 94514 for review
Kip Macy
kmacy at FreeBSD.org
Mon Apr 3 07:19:57 UTC 2006
http://perforce.freebsd.org/chv.cgi?CH=94514
Change 94514 by kmacy at kmacy_storage:sun4v_work on 2006/04/03 07:19:10
rework for sun4v
ifdef out sun4u specific code in exception.S
Affected files ...
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/mp_exception.S#2 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/mp_locore.S#2 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/mp_machdep.c#2 edit
Differences ...
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/mp_exception.S#2 (text+ko) ====
@@ -44,6 +44,7 @@
/*
* Invalidate a physical page in the data cache. For UltraSPARC I and II.
*/
+#if 0
ENTRY(tl_ipi_spitfire_dcache_page_inval)
#if KTR_COMPILE & KTR_SMP
CATR(KTR_SMP, "ipi_dcache_page_inval: pa=%#lx"
@@ -153,7 +154,8 @@
IPI_DONE(%g5, %g1, %g2, %g3)
retry
END(tl_ipi_cheetah_dcache_page_inval)
-
+#endif
+
/*
* Trigger a softint at the desired level.
*/
@@ -170,15 +172,16 @@
9:
#endif
- mov 1, %g1
- sllx %g1, %g5, %g1
- wr %g1, 0, %set_softint
+ mov 1, %g2
+ sllx %g2, %g1, %g2
+ wr %g2, 0, %set_softint
retry
END(tl_ipi_level)
/*
* Demap a page from the dtlb and/or itlb.
*/
+#if 0
ENTRY(tl_ipi_tlb_page_demap)
#if KTR_COMPILE & KTR_SMP
CATR(KTR_SMP, "ipi_tlb_page_demap: pm=%p va=%#lx"
@@ -274,3 +277,4 @@
IPI_DONE(%g5, %g1, %g2, %g3)
retry
END(tl_ipi_tlb_context_demap)
+#endif
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/mp_locore.S#2 (text+ko) ====
@@ -31,7 +31,7 @@
#include <machine/asmacros.h>
#include <machine/ktr.h>
#include <machine/pstate.h>
-#include <machine/upa.h>
+#include <machine/hypervisorvar.h>
#include "assym.s"
@@ -40,46 +40,69 @@
.text
_ALIGN_TEXT
-1: rd %pc, %l0
- ldx [%l0 + (4f-1b)], %l1
- add %l0, (6f-1b), %l2
+
+1: rd %pc, %l0 ! startpc
+ mov %o0, %g1 ! save arg to to mp_tramp_func
+ ldx [%l0 + (6f-1b)], %l1 ! read mp_tramp_tte_slots
+ add %l0, (7f-1b), %l2 ! %l2 points to start of slot area
clr %l3
-2: cmp %l3, %l1
+2: cmp %l3, %l1 ! number of slots entered == total?
be %xcc, 3f
- nop
- ldx [%l2 + TTE_VPN], %l4
- ldx [%l2 + TTE_DATA], %l5
- srlx %l4, TV_SIZE_BITS, %l4
- sllx %l4, PAGE_SHIFT_4M, %l4
- wr %g0, ASI_DMMU, %asi
- stxa %l4, [%g0 + AA_DMMU_TAR] %asi
- stxa %l5, [%g0] ASI_DTLB_DATA_IN_REG
- wr %g0, ASI_IMMU, %asi
- stxa %l4, [%g0 + AA_IMMU_TAR] %asi
- stxa %l5, [%g0] ASI_ITLB_DATA_IN_REG
- membar #Sync
- flush %l4
- add %l2, 1 << TTE_SHIFT, %l2
- add %l3, 1, %l3
+ nop
+ ldx [%l2], %o0 ! VA
+ mov 0, %o1 ! ctx0
+ ldx [%l2 + 8], %o2 ! TTE
+ mov MAP_ITLB|MAP_DTLB, %o3
+ mov MAP_PERM_ADDR, %o5
+ ta FAST_TRAP
+#ifdef DEBUG
+ brz %o0, 9f
+ nop
+ ta 0x77
+ ta 0x71
+9:
+#endif
+ add %l2, 16, %l2 ! point %l2 at next slot
+ inc %l3
ba %xcc, 2b
- nop
-3: ldx [%l0 + (5f-1b)], %l1
- jmpl %l1, %g0
- nop
+ nop
+3:
+ ldx [%l0 + (4f-1b)], %o1 ! read mp_tramp_tsb_ra
+ mov 2, %o0
+ mov MMU_TSB_CTX0, %o5 ! set ctx0 TSBs
+ ta FAST_TRAP
+#ifdef DEBUG
+ brz %o0, 9f
+ nop
+ ta 0x77
+ ta 0x71
+9:
+#endif
+ ldx [%l0 + (5f-1b)], %l1 ! fetch mp_tramp_func
+ jmpl %l1, %g0 ! and off we go
+ mov %g1, %o0
_ALIGN_DATA
4: .xword 0x0
5: .xword 0x0
-6:
+6: .xword 0x0
+7:
DATA(mp_tramp_code)
.xword 1b
DATA(mp_tramp_code_len)
- .xword 6b-1b
-DATA(mp_tramp_tlb_slots)
+ .xword 7b-1b
+DATA(mp_tramp_tsb_desc_ra)
.xword 4b-1b
DATA(mp_tramp_func)
.xword 5b-1b
+DATA(mp_tramp_tte_slots)
+ .xword 6b-1b
+
+#define PUTCHAR(x) \
+ mov x, %o0 ; \
+ mov CONS_WRITE, %o5 ; \
+ ta FAST_TRAP
/*
* void mp_startup(void)
*/
@@ -90,18 +113,6 @@
wr %g0, 0, %fprs
SET(cpu_start_args, %l1, %l0)
-
- mov CPU_CLKSYNC, %l1
- membar #StoreLoad
- stw %l1, [%l0 + CSA_STATE]
-
-1: ldx [%l0 + CSA_TICK], %l1
- brz %l1, 1b
- nop
- wrpr %l1, 0, %tick
-
- UPA_GET_MID(%o0)
-
#if KTR_COMPILE & KTR_SMP
CATR(KTR_SMP, "mp_start: cpu %d entered kernel"
, %g1, %g2, %g3, 7, 8, 9)
@@ -109,9 +120,6 @@
9:
#endif
- rdpr %ver, %l1
- stx %l1, [%l0 + CSA_VER]
-
/*
* Inform the boot processor we have inited.
*/
@@ -119,59 +127,31 @@
membar #LoadStore
stw %l1, [%l0 + CSA_STATE]
- /*
- * Wait till its our turn to bootstrap.
- */
-2: lduw [%l0 + CSA_MID], %l1
- cmp %l1, %o0
- bne %xcc, 2b
- nop
-
#if KTR_COMPILE & KTR_SMP
CATR(KTR_SMP, "_mp_start: cpu %d got start signal"
, %g1, %g2, %g3, 7, 8, 9)
stx %o0, [%g1 + KTR_PARM1]
9:
#endif
-
- add %l0, CSA_TTES, %l1
- clr %l2
-
+2:
+ ld [%l0 + CSA_CPUID], %l1
+ cmp %l1, %o0
+ bne %xcc, 2b
+ nop
+
+ PUTCHAR(0x2e)
/*
- * Map the per-cpu pages.
- */
-3: sllx %l2, TTE_SHIFT, %l3
- add %l1, %l3, %l3
-
- ldx [%l3 + TTE_VPN], %l4
- ldx [%l3 + TTE_DATA], %l5
-
- wr %g0, ASI_DMMU, %asi
- srlx %l4, TV_SIZE_BITS, %l4
- sllx %l4, PAGE_SHIFT_8K, %l4
- stxa %l4, [%g0 + AA_DMMU_TAR] %asi
- stxa %l5, [%g0] ASI_DTLB_DATA_IN_REG
- membar #Sync
-
- add %l2, 1, %l2
- cmp %l2, PCPU_PAGES
- bne %xcc, 3b
- nop
-
- /*
* Get onto our per-cpu panic stack, which precedes the struct pcpu
* in the per-cpu page.
*/
ldx [%l0 + CSA_PCPU], %l1
+ PUTCHAR(0x2e)
set PCPU_PAGES * PAGE_SIZE - PC_SIZEOF, %l2
add %l1, %l2, %l1
sub %l1, SPOFF + CCFSZ, %sp
-
- /*
- * Enable interrupts.
- */
+ PUTCHAR(0x2e)
wrpr %g0, PSTATE_KERNEL, %pstate
-
+ PUTCHAR(0x2e)
#if KTR_COMPILE & KTR_SMP
CATR(KTR_SMP,
"_mp_start: bootstrap cpuid=%d mid=%d pcpu=%#lx data=%#lx sp=%#lx"
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/mp_machdep.c#2 (text+ko) ====
@@ -85,19 +85,21 @@
#include <machine/pcb.h>
#include <machine/smp.h>
#include <machine/tick.h>
+#include <machine/pstate.h>
#include <machine/tlb.h>
#include <machine/tte.h>
-
-static ih_func_t cpu_ipi_ast;
-static ih_func_t cpu_ipi_stop;
+#include <machine/tte_hash.h>
+#include <machine/tsb.h>
+#include <machine/trap.h>
+#include <machine/hypervisor_api.h>
+#include <machine/asm.h>
/*
* Argument area used to pass data to non-boot processors as they start up.
- * This must be statically initialized with a known invalid upa module id,
- * since the other processors will use it before the boot cpu enters the
- * kernel.
+ * This must be statically initialized with a known invalid cpuid,
+ *
*/
-struct cpu_start_args cpu_start_args = { 0, -1, -1, 0, 0 };
+struct cpu_start_args cpu_start_args = { 0, -1, 0, -1 };
struct ipi_cache_args ipi_cache_args;
struct ipi_tlb_args ipi_tlb_args;
struct pcb stoppcbs[MAXCPU];
@@ -116,7 +118,6 @@
vm_offset_t
mp_tramp_alloc(void)
{
- struct tte *tp;
char *v;
int i;
@@ -124,19 +125,33 @@
if (v == NULL)
panic("mp_tramp_alloc");
bcopy(mp_tramp_code, v, mp_tramp_code_len);
- *(u_long *)(v + mp_tramp_tlb_slots) = kernel_tlb_slots;
+
*(u_long *)(v + mp_tramp_func) = (u_long)mp_startup;
- tp = (struct tte *)(v + mp_tramp_code_len);
- for (i = 0; i < kernel_tlb_slots; i++) {
- tp[i].tte_vpn = TV_VPN(kernel_tlbs[i].te_va, TS_4M);
- tp[i].tte_data = TD_V | TD_4M | TD_PA(kernel_tlbs[i].te_pa) |
- TD_L | TD_CP | TD_CV | TD_P | TD_W;
- }
- for (i = 0; i < PAGE_SIZE; i += sizeof(long))
+
+ for (i = 0; i < PAGE_SIZE; i += sizeof(long)*4 /* XXX L1 cacheline size */)
flush(v + i);
return (vm_offset_t)v;
}
+void
+mp_set_tsb_desc_ra(vm_paddr_t tsb_desc_ra)
+{
+ *(u_long *)(mp_tramp + mp_tramp_tsb_desc_ra) = tsb_desc_ra;
+}
+
+void
+mp_add_nucleus_mapping(vm_offset_t va, tte_t tte_data)
+{
+ static int slot;
+ uint64_t *entry;
+
+ entry = (uint64_t *)(mp_tramp + mp_tramp_code_len + slot*sizeof(*entry)*2);
+ *(entry) = va;
+ *(entry + 1) = tte_data;
+ *(uint64_t *)(mp_tramp + mp_tramp_tte_slots) = slot + 1;
+ slot++;
+}
+
/*
* Probe for other cpus.
*/
@@ -160,17 +175,17 @@
cpus++;
}
mp_maxid = cpus - 1;
+
}
int
cpu_mp_probe(void)
{
-
return (mp_maxid > 0);
}
-
+#if 0
static void
-sun4u_startcpu(phandle_t cpu, void *func, u_long arg)
+start_ap(phandle_t cpu, void *func, u_long arg)
{
static struct {
cell_t name;
@@ -193,13 +208,43 @@
args.arg = (cell_t)arg;
openfirmware(&args);
}
+#else
+static int
+start_ap_bycpuid(int cpuid, void *func, u_long arg)
+{
+ static struct {
+ cell_t name;
+ cell_t nargs;
+ cell_t nreturns;
+ cell_t cpuid;
+ cell_t func;
+ cell_t arg;
+ cell_t result;
+ } args = {
+ (cell_t)"SUNW,start-cpu-by-cpuid",
+ 3,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0
+ };
+ args.cpuid = cpuid;
+ args.func = (cell_t)func;
+ args.arg = (cell_t)arg;
+ openfirmware(&args);
+ return (int)args.result;
+
+}
+#endif
/*
* Stop the calling CPU.
*/
static void
sun4u_stopself(void)
{
+#if 0
static struct {
cell_t name;
cell_t nargs;
@@ -211,6 +256,7 @@
};
openfirmware_exit(&args);
+#endif
panic("sun4u_stopself: failed.");
}
@@ -227,55 +273,53 @@
vm_offset_t va;
char buf[128];
u_int clock;
- int cpuid;
- u_int mid;
+ int cpuid, bp_skipped;
u_long s;
-
+ printf("cpu_mp_start\n");
mtx_init(&ipi_mtx, "ipi", NULL, MTX_SPIN);
- intr_setup(PIL_AST, cpu_ipi_ast, -1, NULL, NULL);
- intr_setup(PIL_RENDEZVOUS, (ih_func_t *)smp_rendezvous_action,
- -1, NULL, NULL);
- intr_setup(PIL_STOP, cpu_ipi_stop, -1, NULL, NULL);
-
root = OF_peer(0);
csa = &cpu_start_args;
+ cpuid = bp_skipped = 0;
for (child = OF_child(root); child != 0; child = OF_peer(child)) {
if (OF_getprop(child, "device_type", buf, sizeof(buf)) <= 0 ||
strcmp(buf, "cpu") != 0)
continue;
- if (OF_getprop(child, "upa-portid", &mid, sizeof(mid)) <= 0 &&
- OF_getprop(child, "portid", &mid, sizeof(mid)) <= 0)
- panic("cpu_mp_start: can't get module id");
- if (mid == mp_boot_mid)
+ /* skip boot processor */
+ if (!bp_skipped) {
+ bp_skipped = 1;
continue;
+ }
+ cpuid++;
+
if (OF_getprop(child, "clock-frequency", &clock,
sizeof(clock)) <= 0)
panic("cpu_mp_start: can't get clock");
csa->csa_state = 0;
- sun4u_startcpu(child, (void *)mp_tramp, 0);
+ start_ap_bycpuid(cpuid, (void *)mp_tramp, (uint64_t)cpuid);
s = intr_disable();
- while (csa->csa_state != CPU_CLKSYNC)
- ;
- membar(StoreLoad);
- csa->csa_tick = rd(tick);
while (csa->csa_state != CPU_INIT)
;
- csa->csa_tick = 0;
intr_restore(s);
- cpuid = mp_ncpus++;
- cpu_identify(csa->csa_ver, clock, cpuid);
+ mp_ncpus = cpuid + 1;
+#if 0
+ cpu_identify(0, clock, cpuid);
+#else
+ printf("cpu%d: UltraSparc T1 Processor (%d.%02d MHz CPU)\n", cpuid,
+ (clock + 4999) / 1000000, ((clock + 4999) / 10000) % 100);
+#endif
va = kmem_alloc(kernel_map, PCPU_PAGES * PAGE_SIZE);
pc = (struct pcpu *)(va + (PCPU_PAGES * PAGE_SIZE)) - 1;
pcpu_init(pc, cpuid, sizeof(*pc));
pc->pc_addr = va;
- pc->pc_mid = mid;
pc->pc_node = child;
all_cpus |= 1 << cpuid;
+ if (mp_ncpus == 8)
+ break;
}
PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
smp_active = 1;
@@ -291,41 +335,26 @@
{
volatile struct cpu_start_args *csa;
struct pcpu *pc;
- vm_offset_t va;
- vm_paddr_t pa;
- u_int ctx_min;
- u_int ctx_inc;
u_long s;
- int i;
- ctx_min = TLB_CTX_USER_MIN;
- ctx_inc = (TLB_CTX_USER_MAX - 1) / mp_ncpus;
csa = &cpu_start_args;
csa->csa_count = mp_ncpus;
+ printf("mp_ncpus=%d\n", mp_ncpus);
SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
- pc->pc_tlb_ctx = ctx_min;
- pc->pc_tlb_ctx_min = ctx_min;
- pc->pc_tlb_ctx_max = ctx_min + ctx_inc;
- ctx_min += ctx_inc;
+ if (pc->pc_cpuid == PCPU_GET(cpuid))
+ continue;
- if (pc->pc_cpuid == PCPU_GET(cpuid))
- continue;
KASSERT(pc->pc_idlethread != NULL,
- ("cpu_mp_unleash: idlethread"));
+ ("cpu_mp_unleash: idlethread is NULL"));
pc->pc_curthread = pc->pc_idlethread;
pc->pc_curpcb = pc->pc_curthread->td_pcb;
- for (i = 0; i < PCPU_PAGES; i++) {
- va = pc->pc_addr + i * PAGE_SIZE;
- pa = pmap_kextract(va);
- if (pa == 0)
- panic("cpu_mp_unleash: pmap_kextract\n");
- csa->csa_ttes[i].tte_vpn = TV_VPN(va, TS_8K);
- csa->csa_ttes[i].tte_data = TD_V | TD_8K | TD_PA(pa) |
- TD_L | TD_CP | TD_CV | TD_P | TD_W;
- }
+ pc->pc_curpmap = kernel_pmap;
csa->csa_state = 0;
- csa->csa_pcpu = pc->pc_addr;
- csa->csa_mid = pc->pc_mid;
+#if 1
+ printf("unleashing cpuid=%d\n", pc->pc_cpuid);
+#endif
+ csa->csa_cpuid = pc->pc_cpuid;
+ csa->csa_pcpu = TLB_PHYS_TO_DIRECT(vtophys(pc->pc_addr));
s = intr_disable();
while (csa->csa_state != CPU_BOOTSTRAP)
;
@@ -343,8 +372,11 @@
volatile struct cpu_start_args *csa;
csa = &cpu_start_args;
- pmap_map_tsb();
cpu_setregs(pc);
+ tsb_set_scratchpad_kernel(&kernel_pmap->pm_tsb);
+ tte_hash_set_scratchpad_kernel(kernel_pmap->pm_hash);
+ trap_init();
+ cpu_intrq_init();
tick_start();
smp_cpus++;
@@ -357,12 +389,20 @@
csa->csa_state = CPU_BOOTSTRAP;
while (csa->csa_count != 0)
;
-
+#ifdef SIMULATOR
+ DELAY(300000*PCPU_GET(cpuid));
+#else
+ DELAY(300*PCPU_GET(cpuid));
+#endif
/* ok, now grab sched_lock and enter the scheduler */
+#if 0
+ printf("entering scheduler\n");
+#endif
mtx_lock_spin(&sched_lock);
spinlock_exit();
PCPU_SET(switchtime, cpu_ticks());
PCPU_SET(switchticks, ticks);
+
cpu_throw(NULL, choosethread()); /* doesn't return */
}
@@ -387,12 +427,12 @@
critical_exit();
}
-static void
+void
cpu_ipi_ast(struct trapframe *tf)
{
}
-static void
+void
cpu_ipi_stop(struct trapframe *tf)
{
@@ -411,60 +451,58 @@
}
void
-cpu_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
+cpu_ipi_selected(u_int icpus, u_long d0, u_long d1, u_long d2)
{
- struct pcpu *pc;
- u_int cpu;
+
+ int i, cpu_count, retries;
+ uint16_t *cpulist;
+ u_int cpus;
- while (cpus) {
- cpu = ffs(cpus) - 1;
- cpus &= ~(1 << cpu);
- pc = pcpu_find(cpu);
- cpu_ipi_send(pc->pc_mid, d0, d1, d2);
- }
-}
+ cpulist = PCPU_GET(cpulist);
+ init_mondo(d0, d1, d2);
-void
-cpu_ipi_send(u_int mid, u_long d0, u_long d1, u_long d2)
-{
- u_long s;
- int i;
+ for (cpu_count = 0, i = 0, cpus = icpus & ~PCPU_GET(cpumask); i < 32 && cpus;
+ cpus = cpus >> 1, i++) {
+ if (!(cpus & 0x1))
+ continue;
- KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) & IDR_BUSY) == 0,
- ("cpu_ipi_send: outstanding dispatch"));
- for (i = 0; i < IPI_RETRIES; i++) {
- s = intr_disable();
- stxa(AA_SDB_INTR_D0, ASI_SDB_INTR_W, d0);
- stxa(AA_SDB_INTR_D1, ASI_SDB_INTR_W, d1);
- stxa(AA_SDB_INTR_D2, ASI_SDB_INTR_W, d2);
- stxa(AA_INTR_SEND | (mid << 14), ASI_SDB_INTR_W, 0);
- membar(Sync);
- while (ldxa(0, ASI_INTR_DISPATCH_STATUS) & IDR_BUSY)
- ;
- intr_restore(s);
- if ((ldxa(0, ASI_INTR_DISPATCH_STATUS) & IDR_NACK) == 0)
- return;
+ cpulist[cpu_count] = (uint16_t)i;
+ cpu_count++;
+ }
+ retries = 0;
+retry:
+ if (cpu_count) {
+ int error, new_cpu_count;
+ vm_paddr_t cpulist_ra;
+ cpulist_ra = TLB_DIRECT_TO_PHYS((vm_offset_t)cpulist);
+ if ((error = hv_cpu_mondo_send(cpu_count, cpulist_ra)) == H_EWOULDBLOCK) {
+ new_cpu_count = 0;
+ for (i = 0; i < cpu_count; i++) {
+ if (cpulist[i] != 0xffff)
+ cpulist[new_cpu_count++] = cpulist[i];
+ }
+ cpu_count = new_cpu_count;
+ retries++;
+ if (cpu_count == 0) {
+ printf("no more cpus to send to but mondo_send returned EWOULDBLOCK\n");
+ return;
+ }
+ if (retries < 5000)
+ goto retry;
+ }
+ if (error == H_ENOCPU)
+ printf("cpuid==%d not considered valid - cpus=0x%x\n", cpulist[0], icpus);
+ if (error)
+ panic("can't handle error %d from cpu_mondo_send\n", error);
}
- if (
-#ifdef KDB
- kdb_active ||
-#endif
- panicstr != NULL)
- printf("cpu_ipi_send: couldn't send ipi to module %u\n", mid);
- else
- panic("cpu_ipi_send: couldn't send ipi");
+
}
+
void
ipi_selected(u_int cpus, u_int ipi)
{
- cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_level, ipi);
-}
-
-void
-ipi_all(u_int ipi)
-{
- panic("ipi_all");
+ cpu_ipi_selected(cpus, (u_long)tl_ipi_level, ipi, 0);
}
void
More information about the p4-projects
mailing list