svn commit: r330845 - in head/sys/powerpc: aim ofw powerpc
Nathan Whitehorn
nwhitehorn at FreeBSD.org
Tue Mar 13 15:04:00 UTC 2018
Author: nwhitehorn
Date: Tue Mar 13 15:03:58 2018
New Revision: 330845
URL: https://svnweb.freebsd.org/changeset/base/330845
Log:
Execute PowerPC64/AIM kernel from direct map region when possible.
When the kernel can be in real mode in early boot, we can execute from
high addresses aliased to the kernel's physical memory. If that high
address has the first two bits set to 1 (0xc...), those addresses will
automatically become part of the direct map. This reduces page table
pressure from the kernel and it sets up the kernel to be used with
radix translation, for which it has to be up here.
This is accomplished by exploiting the fact that all PowerPC kernels are
built as position-independent executables and relocate themselves
on start. Before this patch, the kernel runs at 1:1 VA:PA, but that
VA/PA is random and set by the bootloader. Very early, it processes
its ELF relocations to operate wherever it happens to find itself.
This patch uses that mechanism to re-enter and re-relocate the kernel
a second time witha new base address set up in the early parts of
powerpc_init().
Reviewed by: jhibbits
Differential Revision: D14647
Modified:
head/sys/powerpc/aim/aim_machdep.c
head/sys/powerpc/aim/locore64.S
head/sys/powerpc/aim/mmu_oea64.c
head/sys/powerpc/ofw/ofwcall64.S
head/sys/powerpc/powerpc/machdep.c
Modified: head/sys/powerpc/aim/aim_machdep.c
==============================================================================
--- head/sys/powerpc/aim/aim_machdep.c Tue Mar 13 15:02:46 2018 (r330844)
+++ head/sys/powerpc/aim/aim_machdep.c Tue Mar 13 15:03:58 2018 (r330845)
@@ -160,15 +160,72 @@ extern void *dlmisstrap, *dlmisssize;
extern void *dsmisstrap, *dsmisssize;
extern void *ap_pcpu;
+extern void __restartkernel(vm_offset_t, vm_offset_t, vm_offset_t, void *, uint32_t, register_t offset, register_t msr);
+void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry,
+ void *mdp, uint32_t mdp_cookie);
void aim_cpu_init(vm_offset_t toc);
void
+aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp,
+ uint32_t mdp_cookie)
+{
+ register_t scratch;
+
+ /*
+ * If running from an FDT, make sure we are in real mode to avoid
+ * tromping on firmware page tables. Everything in the kernel assumes
+ * 1:1 mappings out of firmware, so this won't break anything not
+ * already broken. This doesn't work if there is live OF, since OF
+ * may internally use non-1:1 mappings.
+ */
+ if (ofentry == 0)
+ mtmsr(mfmsr() & ~(PSL_IR | PSL_DR));
+
+#ifdef __powerpc64__
+ /*
+ * If in real mode, relocate to high memory so that the kernel
+ * can execute from the direct map.
+ */
+ if (!(mfmsr() & PSL_DR) &&
+ (vm_offset_t)&aim_early_init < DMAP_BASE_ADDRESS)
+ __restartkernel(fdt, 0, ofentry, mdp, mdp_cookie,
+ DMAP_BASE_ADDRESS, mfmsr());
+#endif
+
+ /* Various very early CPU fix ups */
+ switch (mfpvr() >> 16) {
+ /*
+ * PowerPC 970 CPUs have a misfeature requested by Apple that
+ * makes them pretend they have a 32-byte cacheline. Turn this
+ * off before we measure the cacheline size.
+ */
+ case IBM970:
+ case IBM970FX:
+ case IBM970MP:
+ case IBM970GX:
+ scratch = mfspr(SPR_HID5);
+ scratch &= ~HID5_970_DCBZ_SIZE_HI;
+ mtspr(SPR_HID5, scratch);
+ break;
+ #ifdef __powerpc64__
+ case IBMPOWER7:
+ case IBMPOWER7PLUS:
+ case IBMPOWER8:
+ case IBMPOWER8E:
+ /* XXX: get from ibm,slb-size in device tree */
+ n_slbs = 32;
+ break;
+ #endif
+ }
+}
+
+void
aim_cpu_init(vm_offset_t toc)
{
size_t trap_offset, trapsize;
vm_offset_t trap;
- register_t msr, scratch;
+ register_t msr;
uint8_t *cache_check;
int cacheline_warn;
#ifndef __powerpc64__
@@ -198,32 +255,6 @@ aim_cpu_init(vm_offset_t toc)
* Bits 1-4, 10-15 (ppc32), 33-36, 42-47 (ppc64)
*/
psl_userstatic &= ~0x783f0000UL;
-
- /* Various very early CPU fix ups */
- switch (mfpvr() >> 16) {
- /*
- * PowerPC 970 CPUs have a misfeature requested by Apple that
- * makes them pretend they have a 32-byte cacheline. Turn this
- * off before we measure the cacheline size.
- */
- case IBM970:
- case IBM970FX:
- case IBM970MP:
- case IBM970GX:
- scratch = mfspr(SPR_HID5);
- scratch &= ~HID5_970_DCBZ_SIZE_HI;
- mtspr(SPR_HID5, scratch);
- break;
- #ifdef __powerpc64__
- case IBMPOWER7:
- case IBMPOWER7PLUS:
- case IBMPOWER8:
- case IBMPOWER8E:
- /* XXX: get from ibm,slb-size in device tree */
- n_slbs = 32;
- break;
- #endif
- }
/*
* Initialize the interrupt tables and figure out our cache line
Modified: head/sys/powerpc/aim/locore64.S
==============================================================================
--- head/sys/powerpc/aim/locore64.S Tue Mar 13 15:02:46 2018 (r330844)
+++ head/sys/powerpc/aim/locore64.S Tue Mar 13 15:03:58 2018 (r330845)
@@ -105,7 +105,6 @@ ap_kexec_start: /* At 0x60 past start, copied to 0x60
mtsrr1 %r1
ba EXC_RST
-
/*
* Now start the real text section
*/
@@ -149,9 +148,12 @@ ASENTRY_NOPROF(__start)
subf %r31,%r31,%r2 /* Subtract from real TOC base to get base */
/* Set up the stack pointer */
- ld %r1,TOC_REF(tmpstk)(%r2)
- addi %r1,%r1,TMPSTKSZ-96
- add %r1,%r1,%r31
+ bl 1f
+ .llong tmpstk + TMPSTKSZ - 96 - .
+1: mflr %r30
+ ld %r1,0(%r30)
+ add %r1,%r1,%r30
+ nop
/* Relocate kernel */
std %r3,48(%r1)
@@ -188,5 +190,21 @@ ASENTRY_NOPROF(__start)
/* Unreachable */
b .
+
+ASENTRY_NOPROF(__restartkernel)
+ /*
+ * r3-r7: arguments to go to __start
+ * r8: offset from current kernel address to apply
+ * r9: MSR to set when (atomically) jumping to __start + r8
+ */
+ mtsrr1 %r9
+ bl 1f
+1: mflr %r25
+ add %r25,%r8,%r25
+ addi %r25,%r25,2f-1b
+ mtsrr0 %r25
+ rfid
+2: bl __start
+ nop
#include <powerpc/aim/trap_subr64.S>
Modified: head/sys/powerpc/aim/mmu_oea64.c
==============================================================================
--- head/sys/powerpc/aim/mmu_oea64.c Tue Mar 13 15:02:46 2018 (r330844)
+++ head/sys/powerpc/aim/mmu_oea64.c Tue Mar 13 15:03:58 2018 (r330845)
@@ -701,6 +701,7 @@ moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernels
{
int i, j;
vm_size_t physsz, hwphyssz;
+ vm_paddr_t kernelphysstart, kernelphysend;
#ifndef __powerpc64__
/* We don't have a direct map since there is no BAT */
@@ -727,6 +728,9 @@ moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernels
__syncicache((void *)EXC_ISE, 0x80);
#endif
+ kernelphysstart = kernelstart & ~DMAP_BASE_ADDRESS;
+ kernelphysend = kernelend & ~DMAP_BASE_ADDRESS;
+
/* Get physical memory regions from firmware */
mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz);
CTR0(KTR_PMAP, "moea64_bootstrap: physical memory");
@@ -764,29 +768,30 @@ moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernels
if (phys_avail[j] < EXC_LAST)
phys_avail[j] += EXC_LAST;
- if (kernelstart >= phys_avail[j] &&
- kernelstart < phys_avail[j+1]) {
- if (kernelend < phys_avail[j+1]) {
+ if (kernelphysstart >= phys_avail[j] &&
+ kernelphysstart < phys_avail[j+1]) {
+ if (kernelphysend < phys_avail[j+1]) {
phys_avail[2*phys_avail_count] =
- (kernelend & ~PAGE_MASK) + PAGE_SIZE;
+ (kernelphysend & ~PAGE_MASK) + PAGE_SIZE;
phys_avail[2*phys_avail_count + 1] =
phys_avail[j+1];
phys_avail_count++;
}
- phys_avail[j+1] = kernelstart & ~PAGE_MASK;
+ phys_avail[j+1] = kernelphysstart & ~PAGE_MASK;
}
- if (kernelend >= phys_avail[j] &&
- kernelend < phys_avail[j+1]) {
- if (kernelstart > phys_avail[j]) {
+ if (kernelphysend >= phys_avail[j] &&
+ kernelphysend < phys_avail[j+1]) {
+ if (kernelphysstart > phys_avail[j]) {
phys_avail[2*phys_avail_count] = phys_avail[j];
phys_avail[2*phys_avail_count + 1] =
- kernelstart & ~PAGE_MASK;
+ kernelphysstart & ~PAGE_MASK;
phys_avail_count++;
}
- phys_avail[j] = (kernelend & ~PAGE_MASK) + PAGE_SIZE;
+ phys_avail[j] = (kernelphysend & ~PAGE_MASK) +
+ PAGE_SIZE;
}
}
Modified: head/sys/powerpc/ofw/ofwcall64.S
==============================================================================
--- head/sys/powerpc/ofw/ofwcall64.S Tue Mar 13 15:02:46 2018 (r330844)
+++ head/sys/powerpc/ofw/ofwcall64.S Tue Mar 13 15:03:58 2018 (r330845)
@@ -42,7 +42,7 @@
ofwstk:
.space OFWSTKSZ
rtas_regsave:
- .space 24 /* 3 * sizeof(register_t) */
+ .space 32 /* 4 * sizeof(register_t) */
GLOBAL(ofmsr)
.llong 0, 0, 0, 0, 0 /* msr/sprg0-3 used in Open Firmware */
GLOBAL(rtasmsr)
@@ -64,8 +64,8 @@ TOC_ENTRY(rtas_regsave)
*/
ASENTRY_NOPROF(ofwcall)
- mflr %r0
- std %r0,16(%r1)
+ mflr %r8
+ std %r8,16(%r1)
stdu %r1,-208(%r1)
/*
@@ -106,7 +106,7 @@ ASENTRY_NOPROF(ofwcall)
/* Get OF stack pointer */
ld %r7,TOC_REF(ofwstk)(%r2)
- addi %r7,%r7,OFWSTKSZ-32
+ addi %r7,%r7,OFWSTKSZ-40
/*
* Set the MSR to the OF value. This has the side effect of disabling
@@ -129,6 +129,8 @@ ASENTRY_NOPROF(ofwcall)
std %r5,8(%r1) /* Save real stack pointer */
std %r2,16(%r1) /* Save old TOC */
std %r6,24(%r1) /* Save old MSR */
+ std %r8,32(%r1) /* Save high 32-bits of the kernel's PC */
+
li %r5,0
stw %r5,4(%r1)
stw %r5,0(%r1)
@@ -137,15 +139,23 @@ ASENTRY_NOPROF(ofwcall)
mtctr %r4
bctrl
- /* Reload stack pointer and MSR from the OFW stack */
+ /* Reload stack pointer, MSR, and reference PC from the OFW stack */
+ ld %r7,32(%r1)
ld %r6,24(%r1)
ld %r2,16(%r1)
ld %r1,8(%r1)
- /* Now set the real MSR */
- mtmsrd %r6
- isync
+ /* Get back to the MSR/PC we want, using the cached high bits of PC */
+ mtsrr1 %r6
+ clrrdi %r7,%r7,32
+ bl 1f
+1: mflr %r8
+ or %r8,%r8,%r7
+ addi %r8,%r8,2f-1b
+ mtsrr0 %r8
+ rfid /* Turn on MMU, exceptions, and 64-bit mode */
+2:
/* Sign-extend the return value from OF */
extsw %r3,%r3
@@ -186,8 +196,8 @@ ASENTRY_NOPROF(ofwcall)
*/
ASENTRY_NOPROF(rtascall)
- mflr %r0
- std %r0,16(%r1)
+ mflr %r9
+ std %r9,16(%r1)
stdu %r1,-208(%r1)
/*
@@ -248,24 +258,41 @@ ASENTRY_NOPROF(rtascall)
std %r7,0(%r1) /* Save 64-bit stack pointer */
std %r2,8(%r1) /* Save TOC */
std %r6,16(%r1) /* Save MSR */
+ std %r9,24(%r1) /* Save reference PC for high 32 bits */
/* Finally, branch to RTAS */
mtctr %r5
bctrl
/*
- * Reload stack pointer and MSR from the reg save area in r1. We are
- * running in 32-bit mode at this point, so it doesn't matter if r1
+ * Reload stack pointer, MSR, reg PC from the reg save area in r1. We
+ * are running in 32-bit mode at this point, so it doesn't matter if r1
* has become sign-extended.
*/
+ ld %r7,24(%r1)
ld %r6,16(%r1)
ld %r2,8(%r1)
ld %r1,0(%r1)
- /* Now set the real MSR */
- mtmsrd %r6
- isync
+ /*
+ * Get back to the right PC. We need to atomically re-enable
+ * exceptions, 64-bit mode, and the MMU. One thing that has likely
+ * happened is that, if we were running in the high-memory direct
+ * map, we no longer are as a result of LR truncation in RTAS.
+ * Fix this by copying the high-order bits of the LR at function
+ * entry onto the current PC and then jumping there while flipping
+ * all the MSR bits.
+ */
+ mtsrr1 %r6
+ clrrdi %r7,%r7,32
+ bl 1f
+1: mflr %r8
+ or %r8,%r8,%r7
+ addi %r8,%r8,2f-1b
+ mtsrr0 %r8
+ rfid /* Turn on MMU, exceptions, and 64-bit mode */
+2:
/* Sign-extend the return value from RTAS */
extsw %r3,%r3
Modified: head/sys/powerpc/powerpc/machdep.c
==============================================================================
--- head/sys/powerpc/powerpc/machdep.c Tue Mar 13 15:02:46 2018 (r330844)
+++ head/sys/powerpc/powerpc/machdep.c Tue Mar 13 15:03:58 2018 (r330845)
@@ -237,6 +237,8 @@ extern unsigned char __sbss_start[];
extern unsigned char __sbss_end[];
extern unsigned char _end[];
+void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry,
+ void *mdp, uint32_t mdp_cookie);
void aim_cpu_init(vm_offset_t toc);
void booke_cpu_init(void);
@@ -247,7 +249,6 @@ powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offs
struct pcpu *pc;
struct cpuref bsp;
vm_offset_t startkernel, endkernel;
- void *kmdp;
char *env;
bool ofw_bootargs = false;
#ifdef DDB
@@ -255,8 +256,6 @@ powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offs
vm_offset_t ksym_end;
#endif
- kmdp = NULL;
-
/* First guess at start/end kernel positions */
startkernel = __startkernel;
endkernel = __endkernel;
@@ -278,15 +277,7 @@ powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offs
#endif
#ifdef AIM
- /*
- * If running from an FDT, make sure we are in real mode to avoid
- * tromping on firmware page tables. Everything in the kernel assumes
- * 1:1 mappings out of firmware, so this won't break anything not
- * already broken. This doesn't work if there is live OF, since OF
- * may internally use non-1:1 mappings.
- */
- if (ofentry == 0)
- mtmsr(mfmsr() & ~(PSL_IR | PSL_DR));
+ aim_early_init(fdt, toc, ofentry, mdp, mdp_cookie);
#endif
/*
@@ -295,14 +286,33 @@ powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offs
* boothowto.
*/
if (mdp != NULL) {
+ void *kmdp = NULL;
+ char *envp = NULL;
+ uintptr_t md_offset = 0;
+ vm_paddr_t kernelendphys;
+
+#ifdef AIM
+ if ((uintptr_t)&powerpc_init > DMAP_BASE_ADDRESS)
+ md_offset = DMAP_BASE_ADDRESS;
+#endif
+
preload_metadata = mdp;
+ if (md_offset > 0) {
+ preload_metadata += md_offset;
+ preload_bootstrap_relocate(md_offset);
+ }
kmdp = preload_search_by_type("elf kernel");
if (kmdp != NULL) {
boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
- init_static_kenv(MD_FETCH(kmdp, MODINFOMD_ENVP, char *),
- 0);
- endkernel = ulmax(endkernel, MD_FETCH(kmdp,
- MODINFOMD_KERNEND, vm_offset_t));
+ envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
+ if (envp != NULL)
+ envp += md_offset;
+ init_static_kenv(envp, 0);
+ kernelendphys = MD_FETCH(kmdp, MODINFOMD_KERNEND,
+ vm_offset_t);
+ if (kernelendphys != 0)
+ kernelendphys += md_offset;
+ endkernel = ulmax(endkernel, kernelendphys);
#ifdef DDB
ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
More information about the svn-src-head
mailing list