git: dfcca210755e - main - kboot: aarch64 trampoline implementation

From: Warner Losh <imp_at_FreeBSD.org>
Date: Fri, 03 Feb 2023 15:50:49 UTC
The branch main has been updated by imp:

URL: https://cgit.FreeBSD.org/src/commit/?id=dfcca210755e2c3448ed6094de0331f02ff5176d

commit dfcca210755e2c3448ed6094de0331f02ff5176d
Author:     Warner Losh <imp@FreeBSD.org>
AuthorDate: 2023-02-03 15:40:04 +0000
Commit:     Warner Losh <imp@FreeBSD.org>
CommitDate: 2023-02-03 15:41:40 +0000

    kboot: aarch64 trampoline implementation
    
    Update exec.c (copyied from efi/loader/arch/arm64/exec.c) to allow
    execution of aarch64 kernels. This includes a new trampoline code that
    handles copying the UEFI memory map, if available from the Linux FDT
    provided PA. This is a complete implementation now, able to boot from
    the LinuxBoot environment on an aarch64 server that only offers
    LinuxBoot (though a workaround for the gicv3 inability to re-init is not
    yet in FreeBSD). Many 'fit and finish' issues will be addressed in
    subsequent commits.
    
    Sponsored by:           Netflix
    Reviewed by:            tsoome, kevans, andrew
    Differential Revision:  https://reviews.freebsd.org/D38258
---
 stand/kboot/arch/aarch64/exec.c  | 166 ++++++++++++++++++++++++++++++++-------
 stand/kboot/arch/aarch64/tramp.S |  88 ++++++++++++++-------
 2 files changed, 199 insertions(+), 55 deletions(-)

diff --git a/stand/kboot/arch/aarch64/exec.c b/stand/kboot/arch/aarch64/exec.c
index 56a206c0f09f..b0cb2fcbb531 100644
--- a/stand/kboot/arch/aarch64/exec.c
+++ b/stand/kboot/arch/aarch64/exec.c
@@ -34,16 +34,17 @@ __FBSDID("$FreeBSD$");
 #include <sys/linker.h>
 #include <machine/elf.h>
 
-#include <bootstrap.h>
-
 #ifdef EFI
 #include <efi.h>
 #include <efilib.h>
-
 #include "loader_efi.h"
-
+#else
+#include "host_syscall.h"
 #endif
+#include <machine/metadata.h>
 
+#include "bootstrap.h"
+#include "kboot.h"
 #include "bootstrap.h"
 
 #include "platform/acfreebsd.h"
@@ -54,6 +55,10 @@ __FBSDID("$FreeBSD$");
 
 #include "cache.h"
 
+#ifndef EFI
+#define LOADER_PAGE_SIZE PAGE_SIZE
+#endif
+
 #ifdef EFI
 static EFI_GUID acpi_guid = ACPI_TABLE_GUID;
 static EFI_GUID acpi20_guid = ACPI_20_TABLE_GUID;
@@ -62,13 +67,14 @@ static EFI_GUID acpi20_guid = ACPI_20_TABLE_GUID;
 static int elf64_exec(struct preloaded_file *amp);
 static int elf64_obj_exec(struct preloaded_file *amp);
 
-/* Stub out temporarily */
-static int
-bi_load(char *args, vm_offset_t *modulep, vm_offset_t *kernendp,
-    bool exit_bs)
-{
-	return EINVAL;
-}
+bool do_mem_map = false;
+
+extern uint32_t efi_map_size;
+extern vm_paddr_t efi_map_phys_src;	/* From DTB */
+extern vm_paddr_t efi_map_phys_dst;	/* From our memory map metadata module */
+
+int bi_load(char *args, vm_offset_t *modulep, vm_offset_t *kernendp,
+    bool exit_bs);
 
 static struct file_format arm64_elf = {
 	elf64_loadfile,
@@ -80,21 +86,47 @@ struct file_format *file_formats[] = {
 	NULL
 };
 
+#ifndef EFI
+extern uintptr_t tramp;
+extern uint32_t tramp_size;
+extern uint32_t tramp_data_offset;
+
+struct trampoline_data {
+	uint64_t	entry;			//  0 (PA where kernel loaded)
+	uint64_t	modulep;		//  8 module metadata
+	uint64_t	memmap_src;		// 16 Linux-provided memory map PA
+	uint64_t	memmap_dst;		// 24 Module data copy PA
+	uint64_t	memmap_len;		// 32 Length to copy
+};
+#endif
+
+extern vm_offset_t kboot_get_phys_load_segment(void);
+
 static int
 elf64_exec(struct preloaded_file *fp)
 {
 	vm_offset_t modulep, kernendp;
-	vm_offset_t clean_addr;
-	size_t clean_size;
-	struct file_metadata *md;
-	Elf_Ehdr *ehdr;
+#ifdef EFI
+	vm_offset_t		clean_addr;
+	size_t			clean_size;
 	void (*entry)(vm_offset_t);
-	int err;
+#else
+	vm_offset_t		trampolinebase;
+	vm_offset_t		staging;
+	void			*trampcode;
+	uint64_t		*trampoline;
+	struct trampoline_data	*trampoline_data;
+	int			nseg;
+	void			*kseg;
+#endif
+	struct file_metadata	*md;
+	Elf_Ehdr		*ehdr;
+	int			error;
 #ifdef EFI
 	ACPI_TABLE_RSDP *rsdp;
 	char buf[24];
 	int revision;
-#endif
+
 	/*
 	 * Report the RSDP to the kernel. The old code used the 'hints' method
 	 * to communite this to the kernel. However, while convenient, the
@@ -103,7 +135,6 @@ elf64_exec(struct preloaded_file *fp)
 	 * that start with acpi. The old 'hints' can be removed before we branch
 	 * for FreeBSD 15.
 	 */
-#ifdef EFI
 	rsdp = efi_get_table(&acpi20_guid);
 	if (rsdp == NULL) {
 		rsdp = efi_get_table(&acpi_guid);
@@ -137,6 +168,46 @@ elf64_exec(struct preloaded_file *fp)
 		}
 	}
 #else
+	vm_offset_t rsdp;
+	rsdp = acpi_rsdp();
+	if (rsdp != 0) {
+		char buf[24];
+
+		printf("Found ACPI 2.0 at %#016lx\n", rsdp);
+		sprintf(buf, "0x%016llx", (unsigned long long)rsdp);
+		setenv("hint.acpi.0.rsdp", buf, 1); /* For 13.1R bootability */
+		setenv("acpi.rsdp", buf, 1);
+		/* Nobody uses the rest of that stuff */
+	}
+
+
+	// XXX Question: why not just use malloc?
+	trampcode = host_getmem(LOADER_PAGE_SIZE);
+	if (trampcode == NULL) {
+		printf("Unable to allocate trampoline\n");
+		return (ENOMEM);
+	}
+	bzero((void *)trampcode, LOADER_PAGE_SIZE);
+	bcopy((void *)&tramp, (void *)trampcode, tramp_size);
+	trampoline = (void *)trampcode;
+
+	/*
+	 * Figure out where to put it.
+	 *
+	 * Linux does not allow us to kexec_load into any part of memory. Ask
+	 * arch_loadaddr to resolve the first available chunk of physical memory
+	 * where loading is possible (load_addr).
+	 *
+	 * The kernel is loaded at the 'base' address in continguous physical
+	 * memory. We use the 2MB in front of the kernel as a place to put our
+	 * trampoline, but that's really overkill since we only need ~100 bytes.
+	 * The arm64 kernel's entry requirements are only 'load the kernel at a
+	 * 2MB alignment' and it figures out the rest, creates the right page
+	 * tables, etc.
+	 */
+	staging = kboot_get_phys_load_segment();
+	printf("Load address at %#jx\n", (uintmax_t)staging);
+	printf("Relocation offset is %#jx\n", (uintmax_t)elf64_relocation_offset);
 #endif
 
 	if ((md = file_findmetadata(fp, MODINFOMD_ELFHDR)) == NULL)
@@ -147,33 +218,72 @@ elf64_exec(struct preloaded_file *fp)
 	entry = efi_translate(ehdr->e_entry);
 
 	efi_time_fini();
-#else
-	entry = (void *)ehdr->e_entry;
 #endif
-	err = bi_load(fp->f_args, &modulep, &kernendp, true);
-	if (err != 0) {
+	error = bi_load(fp->f_args, &modulep, &kernendp, true);
+	if (error != 0) {
 #ifdef EFI
 		efi_time_init();
 #endif
-		return (err);
+		return (error);
 	}
 
 	dev_cleanup();
 
-	/* Clean D-cache under kernel area and invalidate whole I-cache */
 #ifdef EFI
+	/* Clean D-cache under kernel area and invalidate whole I-cache */
 	clean_addr = (vm_offset_t)efi_translate(fp->f_addr);
 	clean_size = (vm_offset_t)efi_translate(kernendp) - clean_addr;
-#else
-	clean_addr = (vm_offset_t)fp->f_addr;
-	clean_size = (vm_offset_t)kernendp - clean_addr;
-#endif
 
 	cpu_flush_dcache((void *)clean_addr, clean_size);
 	cpu_inval_icache();
 
 	(*entry)(modulep);
 
+#else
+	/* Linux will flush the caches, just pass this data into our trampoline and go */
+	trampoline_data = (void *)trampoline + tramp_data_offset;
+	memset(trampoline_data, 0, sizeof(*trampoline_data));
+	trampoline_data->entry = ehdr->e_entry - fp->f_addr + staging;
+	trampoline_data->modulep = modulep;
+	printf("Modulep = %jx\n", (uintmax_t)modulep);
+	if (efi_map_phys_src != 0) {
+		md = file_findmetadata(fp, MODINFOMD_EFI_MAP);
+		if (md == NULL || md->md_addr == 0) {
+			printf("Need to copy EFI MAP, but EFI MAP not found. %p\n", md);
+		} else {
+			printf("Metadata EFI map loaded at VA %lx\n", md->md_addr);
+			efi_map_phys_dst = md->md_addr + staging +
+			    roundup2(sizeof(struct efi_map_header), 16) - fp->f_addr;
+			trampoline_data->memmap_src = efi_map_phys_src;
+			trampoline_data->memmap_dst = efi_map_phys_dst;
+			trampoline_data->memmap_len = efi_map_size - roundup2(sizeof(struct efi_map_header), 16);
+			printf("Copying UEFI Memory Map data from %#lx to %#lx %ld bytes\n",
+			    efi_map_phys_src,
+			    trampoline_data->memmap_dst,
+			    trampoline_data->memmap_len);
+		}
+	}
+	/*
+	 * Copy the trampoline to the ksegs. Since we're just bouncing off of
+	 * this into the kernel, no need to preserve the pages. On arm64, the
+	 * kernel sets up the initial page table, so we don't have to preserve
+	 * the memory used for the trampoline past when it calls the kernel.
+	 */
+	printf("kernendp = %#llx\n", (long long)kernendp);
+	trampolinebase = staging + (kernendp - fp->f_addr);
+	printf("trampolinebase = %#llx\n", (long long)trampolinebase);
+	archsw.arch_copyin((void *)trampcode, kernendp, tramp_size);
+	printf("Trampoline bouncing to %#llx\n", (long long)trampoline_data->entry);
+
+	if (archsw.arch_kexec_kseg_get == NULL)
+		panic("architecture did not provide kexec segment mapping");
+	archsw.arch_kexec_kseg_get(&nseg, &kseg);
+	error = host_kexec_load(trampolinebase, nseg, kseg, HOST_KEXEC_ARCH_AARCH64);
+	if (error != 0)
+		panic("kexec_load returned error: %d", error);
+	host_reboot(HOST_REBOOT_MAGIC1, HOST_REBOOT_MAGIC2, HOST_REBOOT_CMD_KEXEC, 0);
+#endif
+
 	panic("exec returned");
 }
 
diff --git a/stand/kboot/arch/aarch64/tramp.S b/stand/kboot/arch/aarch64/tramp.S
index 1edb6823bdc9..9304ca325299 100644
--- a/stand/kboot/arch/aarch64/tramp.S
+++ b/stand/kboot/arch/aarch64/tramp.S
@@ -21,47 +21,81 @@
  * struct trampoline_data {
  *	uint64_t	entry;			//  0 (PA where kernel loaded)
  *	uint64_t	modulep;		//  8 module metadata
+ *	uint64_t	memmap_src;		// 16 Linux-provided memory map PA
+ *	uint64_t	memmap_dst;		// 24 Module data copy PA
+ *	uint64_t	memmap_len;		// 32 Length to copy
  * };
  *
- * The aarch64 _start routine assumes:
+ * FreeBSD's arm64 entry point is _start which assumes:
  *  MMU      on with an identity map, or off
  *  D-Cache: off
  *  I-Cache: on or off
  *  We are loaded at a 2MiB aligned address
  *  Module data (modulep) pointer in x0
  *
- * Unlike EFI, we don't support copying the staging area. We tell Linunx to land
- * the kernel in its final location with the needed alignment, etc.
+ * The rest of the boot loader tells Linux to land the kernel in its final
+ * location with the needed alignment, etc. It does this, and then we take over.
  *
- * This trampoline installs sets up the arguments the kernel expects, flushes
- * the cache lines and jumps to the kernel _start address. We pass the modulep
- * pointer in x0, as _start expects.
+ * The linux kernel will helpfully turn off the MMU, flush the caches, disables
+ * them, etc. It calls the tramp with two args: FDT blob addresss in x0 and the
+ * EL2 vectors in x1. Currently, we make use of neither of these parameters: we
+ * pass whatever dtb we think we need as part of the module data and we're a bit
+ * weak on hypervisor support at the moment. _start's requirements are all
+ * satisifed.
+ *
+ * This trampoline sets up the arguments the kernel expects and jumps to the
+ * kernel _start address. We pass the modulep pointer in x0, as _start
+ * expects. We assume that the various cache flushing, invalidation, etc that
+ * linux did during or after copying the data down is sufficient, though we may
+ * need to be mindful of cache flushing if we run in EL2 (TBD).
+ *
+ * Note, if TRAMP_MEMMAP_SRC is non-zero, then we have to copy the Linux
+ * provided UEFI memory map. It's easier to do that here. In kboot we couldn't
+ * access the physical memory, and it's a chicken and egg problem later in the
+ * kernel.
  */
+
+#define TRAMP_ENTRY		0
+#define TRAMP_MODULEP		8
+#define TRAMP_MEMMAP_SRC	16
+#define TRAMP_MEMMAP_DST	24
+#define TRAMP_MEMMAP_LEN	32
+#define TRAMP_TOTAL		40
+
 	.text
-	.globl	aarch64_tramp
-aarch64_tramp:
-	b	1f		/* skip over our saved args */
-	.p2align	3
-trampoline_data:
-#define TRAMP_ENTRY	0
-#define TRAMP_MODULEP	8
-#define TRAMP_TOTAL	16
-	.space TRAMP_TOTAL
-#define TMPSTACKSIZE	48	/* 16 bytes for args +8 for pushq/popfq + 24 spare */
+	.globl	tramp
+tramp:
+	adr	x8, trampoline_data
+	ldr	x10, [x8, #TRAMP_MEMMAP_SRC]
+	cmp	x10, xzr
+	b.eq	9f
+
+	/*
+	 * Copy over the memory map into area we have reserved for it. Assume
+	 * the copy is a multiple of 8, since we know table entries are made up
+	 * of several 64-bit quantities.
+	 */
+	ldp	x11, x12, [x8, #TRAMP_MEMMAP_DST]	/* x12 = len */
 1:
-	adr	x2, trampoline_data
-	ldr	x1, [x2, #TRAMP_ENTRY]
-	ldr	x0, [x2, #TRAMP_MODULEP]
-	br	x1
+	ldr	x13, [x10], #8
+	str	x13, [x11], #8
+	subs	x12, x12, #8
+	b.hi	1b
+9:
+	ldp	x9, x0, [x8, #TRAMP_ENTRY]		/* x0 = modulep */
+	br	x9
 
 	.p2align 4
+trampoline_data:
+	.space TRAMP_TOTAL
+#define TMPSTACKSIZE	48	/* 16 bytes for args +8 for pushq/popfq + 24 spare */
 	.space	TMPSTACKSIZE
-aarch64_tramp_end:			/* padding doubles as stack */
+tramp_end:			/* padding doubles as stack */
 
 	.data
-	.globl	aarch64_tramp_size
-aarch64_tramp_size:
-	.long	aarch64_tramp_end-aarch64_tramp
-	.globl	aarch64_tramp_data_offset
-aarch64_tramp_data_offset:
-	.long	trampoline_data-aarch64_tramp
+	.globl	tramp_size
+tramp_size:
+	.long	tramp_end-tramp
+	.globl	tramp_data_offset
+tramp_data_offset:
+	.long	trampoline_data-tramp