git: ff50e9d53ff8 - main - bhyve: Add bhyverun and vmexit handlers for arm64

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Wed, 10 Apr 2024 15:19:14 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=ff50e9d53ff836bd6276c9f5a355e0ab03a99c61

commit ff50e9d53ff836bd6276c9f5a355e0ab03a99c61
Author:     Andrew Turner <andrew@freebsd.org>
AuthorDate: 2024-04-03 17:10:41 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2024-04-10 15:17:56 +0000

    bhyve: Add bhyverun and vmexit handlers for arm64
    
    Reviewed by:    corvink, jhb
    MFC after:      2 weeks
    Differential Revision:  https://reviews.freebsd.org/D41006
---
 usr.sbin/bhyve/aarch64/Makefile.inc       |   8 +
 usr.sbin/bhyve/aarch64/bhyverun_machdep.c | 245 ++++++++++++++++++++++++++++
 usr.sbin/bhyve/aarch64/vmexit.c           | 260 ++++++++++++++++++++++++++++++
 usr.sbin/bhyve/bhyverun.c                 |   7 +-
 4 files changed, 517 insertions(+), 3 deletions(-)

diff --git a/usr.sbin/bhyve/aarch64/Makefile.inc b/usr.sbin/bhyve/aarch64/Makefile.inc
new file mode 100644
index 000000000000..2c7a3cac105e
--- /dev/null
+++ b/usr.sbin/bhyve/aarch64/Makefile.inc
@@ -0,0 +1,8 @@
+SRCS+=	\
+	fdt.c		\
+	uart_pl011.c
+
+.PATH:  ${BHYVE_SYSDIR}/sys/arm64/vmm
+SRCS+=	vmm_instruction_emul.c
+
+BHYVE_FDT_SUPPORT=
diff --git a/usr.sbin/bhyve/aarch64/bhyverun_machdep.c b/usr.sbin/bhyve/aarch64/bhyverun_machdep.c
new file mode 100644
index 000000000000..9b0010a78b47
--- /dev/null
+++ b/usr.sbin/bhyve/aarch64/bhyverun_machdep.c
@@ -0,0 +1,245 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <assert.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <vmmapi.h>
+
+#include "bhyverun.h"
+#include "config.h"
+#include "debug.h"
+#include "fdt.h"
+#include "mem.h"
+#include "uart_emul.h"
+
+/* Start of mem + 1M */
+#define	FDT_BASE	0x100000
+#define	FDT_SIZE	(64 * 1024)
+
+/* Start of lowmem + 64K */
+#define	UART_MMIO_BASE	0x10000
+#define	UART_MMIO_SIZE	0x1000
+#define	UART_INTR	32
+
+#define	GIC_DIST_BASE		0x2f000000
+#define	GIC_DIST_SIZE		0x10000
+#define	GIC_REDIST_BASE		0x2f100000
+#define	GIC_REDIST_SIZE(ncpu)	((ncpu) * 2 * PAGE_SIZE_64K)
+
+#define	PCIE_INTR	33
+
+void
+bhyve_init_config(void)
+{
+	init_config();
+
+	/* Set default values prior to option parsing. */
+	set_config_bool("acpi_tables", false);
+	set_config_bool("acpi_tables_in_memory", false);
+	set_config_value("memory.size", "256M");
+}
+
+void
+bhyve_init_vcpu(struct vcpu *vcpu __unused)
+{
+}
+
+void
+bhyve_start_vcpu(struct vcpu *vcpu, bool bsp __unused)
+{
+	fbsdrun_addcpu(vcpu_id(vcpu));
+}
+
+/*
+ * Load the specified boot code at the beginning of high memory.
+ */
+static void
+load_bootrom(struct vmctx *ctx, const char *path, uint64_t *elrp)
+{
+	struct stat sb;
+	void *data, *gptr;
+	vm_paddr_t loadaddr;
+	off_t size;
+	int fd;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		err(1, "open(%s)", path);
+	if (fstat(fd, &sb) != 0)
+		err(1, "fstat(%s)", path);
+
+	size = sb.st_size;
+
+	loadaddr = vm_get_highmem_base(ctx);
+	gptr = vm_map_gpa(ctx, loadaddr, round_page(size));
+
+	data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (data == MAP_FAILED)
+		err(1, "mmap(%s)", path);
+	(void)close(fd);
+	memcpy(gptr, data, size);
+
+	if (munmap(data, size) != 0)
+		err(1, "munmap(%s)", path);
+
+	*elrp = loadaddr;
+}
+
+static void
+mmio_uart_intr_assert(void *arg)
+{
+	struct vmctx *ctx = arg;
+
+	vm_assert_irq(ctx, UART_INTR);
+}
+
+static void
+mmio_uart_intr_deassert(void *arg)
+{
+	struct vmctx *ctx = arg;
+
+	vm_deassert_irq(ctx, UART_INTR);
+}
+
+static int
+mmio_uart_mem_handler(struct vcpu *vcpu __unused, int dir,
+    uint64_t addr, int size __unused, uint64_t *val, void *arg1, long arg2)
+{
+	struct uart_pl011_softc *sc = arg1;
+	long reg;
+
+	reg = (addr - arg2) >> 2;
+	if (dir == MEM_F_WRITE)
+		uart_pl011_write(sc, reg, *val);
+	else
+		*val = uart_pl011_read(sc, reg);
+
+	return (0);
+}
+
+static bool
+init_mmio_uart(struct vmctx *ctx)
+{
+	struct uart_pl011_softc *sc;
+	struct mem_range mr;
+	const char *path;
+	int error;
+
+	path = get_config_value("console");
+	if (path == NULL)
+		return (false);
+
+	sc = uart_pl011_init(mmio_uart_intr_assert, mmio_uart_intr_deassert,
+	    ctx);
+	if (uart_pl011_tty_open(sc, path) != 0) {
+		EPRINTLN("Unable to initialize backend '%s' for mmio uart",
+		    path);
+		assert(0);
+	}
+
+	bzero(&mr, sizeof(struct mem_range));
+	mr.name = "uart";
+	mr.base = UART_MMIO_BASE;
+	mr.size = UART_MMIO_SIZE;
+	mr.flags = MEM_F_RW;
+	mr.handler = mmio_uart_mem_handler;
+	mr.arg1 = sc;
+	mr.arg2 = mr.base;
+	error = register_mem(&mr);
+	assert(error == 0);
+
+	return (true);
+}
+
+static vm_paddr_t
+fdt_gpa(struct vmctx *ctx)
+{
+	return (vm_get_highmem_base(ctx) + FDT_BASE);
+}
+
+int
+bhyve_init_platform(struct vmctx *ctx, struct vcpu *bsp)
+{
+	const char *bootrom;
+	uint64_t elr;
+	int error;
+
+	bootrom = get_config_value("bootrom");
+	if (bootrom == NULL) {
+		warnx("no bootrom specified");
+		return (ENOENT);
+	}
+	load_bootrom(ctx, bootrom, &elr);
+	error = vm_set_register(bsp, VM_REG_GUEST_PC, elr);
+	if (error != 0) {
+		warn("vm_set_register(GUEST_PC)");
+		return (error);
+	}
+
+	error = fdt_init(ctx, guest_ncpus, fdt_gpa(ctx), FDT_SIZE);
+	if (error != 0)
+		return (error);
+
+	fdt_add_gic(GIC_DIST_BASE, GIC_DIST_SIZE, GIC_REDIST_BASE,
+	    GIC_REDIST_SIZE(guest_ncpus));
+	error = vm_attach_vgic(ctx, GIC_DIST_BASE, GIC_DIST_SIZE,
+	    GIC_REDIST_BASE, GIC_REDIST_SIZE(guest_ncpus));
+	if (error != 0) {
+		warn("vm_attach_vgic()");
+		return (error);
+	}
+
+	if (init_mmio_uart(ctx))
+		fdt_add_uart(UART_MMIO_BASE, UART_MMIO_SIZE, UART_INTR);
+	fdt_add_timer();
+	fdt_add_pcie(PCIE_INTR);
+
+	return (0);
+}
+
+int
+bhyve_init_platform_late(struct vmctx *ctx, struct vcpu *bsp __unused)
+{
+	int error;
+
+	fdt_finalize();
+
+	error = vm_set_register(bsp, VM_REG_GUEST_X0, fdt_gpa(ctx));
+	assert(error == 0);
+
+	return (0);
+}
diff --git a/usr.sbin/bhyve/aarch64/vmexit.c b/usr.sbin/bhyve/aarch64/vmexit.c
new file mode 100644
index 000000000000..0d328ab4ff85
--- /dev/null
+++ b/usr.sbin/bhyve/aarch64/vmexit.c
@@ -0,0 +1,260 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/cpuset.h>
+
+#include <dev/psci/psci.h>
+#include <dev/psci/smccc.h>
+
+#include <machine/armreg.h>
+#include <machine/cpu.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/vmm_instruction_emul.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <vmmapi.h>
+
+#include "bhyverun.h"
+#include "config.h"
+#include "debug.h"
+#include "mem.h"
+#include "vmexit.h"
+
+static cpuset_t running_cpumask;
+
+static int
+vmexit_inst_emul(struct vmctx *ctx __unused, struct vcpu *vcpu,
+    struct vm_run *vmrun)
+{
+	struct vm_exit *vme;
+	struct vie *vie;
+	int err;
+
+	vme = vmrun->vm_exit;
+	vie = &vme->u.inst_emul.vie;
+
+	err = emulate_mem(vcpu, vme->u.inst_emul.gpa, vie,
+	    &vme->u.inst_emul.paging);
+	if (err) {
+		if (err == ESRCH) {
+			EPRINTLN("Unhandled memory access to 0x%lx\n",
+			    vme->u.inst_emul.gpa);
+		}
+		goto fail;
+	}
+
+	return (VMEXIT_CONTINUE);
+
+fail:
+	fprintf(stderr, "Failed to emulate instruction ");
+	FPRINTLN(stderr, "at 0x%lx", vme->pc);
+	return (VMEXIT_ABORT);
+}
+
+static int
+vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
+{
+	struct vm_exit *vme;
+	enum vm_suspend_how how;
+	int vcpuid = vcpu_id(vcpu);
+
+	vme = vmrun->vm_exit;
+	how = vme->u.suspended.how;
+
+	fbsdrun_deletecpu(vcpuid);
+
+	switch (how) {
+	case VM_SUSPEND_RESET:
+		exit(0);
+	case VM_SUSPEND_POWEROFF:
+		if (get_config_bool_default("destroy_on_poweroff", false))
+			vm_destroy(ctx);
+		exit(1);
+	case VM_SUSPEND_HALT:
+		exit(2);
+	default:
+		fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how);
+		exit(100);
+	}
+	return (0);	/* NOTREACHED */
+}
+
+static int
+vmexit_debug(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
+    struct vm_run *vmrun __unused)
+{
+	return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_bogus(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
+    struct vm_run *vmrun __unused)
+{
+	return (VMEXIT_CONTINUE);
+}
+
+static uint64_t
+smccc_affinity_info(uint64_t target_affinity, uint32_t lowest_affinity_level)
+{
+	uint64_t cpu_aff, mask = 0;
+
+	switch (lowest_affinity_level) {
+	case 0:
+		mask |= CPU_AFF0_MASK;
+		/* FALLTHROUGH */
+	case 1:
+		mask |= CPU_AFF1_MASK;
+		/* FALLTHROUGH */
+	case 2:
+		mask |= CPU_AFF2_MASK;
+		/* FALLTHROUGH */
+	case 3:
+		mask |= CPU_AFF3_MASK;
+		break;
+	default:
+		return (PSCI_RETVAL_INVALID_PARAMS);
+	}
+
+	for (int vcpu = 0; vcpu < guest_ncpus; vcpu++) {
+		/* TODO: We should get this from the kernel */
+		cpu_aff = (vcpu & 0xf) << MPIDR_AFF0_SHIFT |
+		    ((vcpu >> 4) & 0xff) << MPIDR_AFF1_SHIFT |
+		    ((vcpu >> 12) & 0xff) << MPIDR_AFF2_SHIFT |
+		    (uint64_t)((vcpu >> 20) & 0xff) << MPIDR_AFF3_SHIFT;
+
+		if ((cpu_aff & mask) == (target_affinity & mask) &&
+		    CPU_ISSET(vcpu, &running_cpumask)) {
+			/* Return ON if any CPUs are on */
+			return (PSCI_AFFINITY_INFO_ON);
+		}
+	}
+
+	/* No CPUs in the affinity mask are on, return OFF */
+	return (PSCI_AFFINITY_INFO_OFF);
+}
+
+static int
+vmexit_smccc(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
+{
+	struct vcpu *newvcpu;
+	struct vm_exit *vme;
+	uint64_t newcpu, smccc_rv;
+	enum vm_suspend_how how;
+	int error;
+
+	/* Return the Unknown Function Identifier  by default */
+	smccc_rv = SMCCC_RET_NOT_SUPPORTED;
+
+	vme = vmrun->vm_exit;
+	switch (vme->u.smccc_call.func_id) {
+	case PSCI_FNID_VERSION:
+		/* We implement PSCI 1.0 */
+		smccc_rv = PSCI_VER(1, 0);
+		break;
+	case PSCI_FNID_CPU_SUSPEND:
+	case PSCI_FNID_CPU_OFF:
+		break;
+	case PSCI_FNID_CPU_ON:
+		newcpu = vme->u.smccc_call.args[0];
+		if (newcpu > (uint64_t)guest_ncpus) {
+			smccc_rv = PSCI_RETVAL_INVALID_PARAMS;
+			break;
+		}
+
+		if (CPU_ISSET(newcpu, &running_cpumask)) {
+			smccc_rv = PSCI_RETVAL_ALREADY_ON;
+			break;
+		}
+
+		newvcpu = fbsdrun_vcpu(newcpu);
+		assert(newvcpu != NULL);
+
+		/* Set the context ID */
+		error = vm_set_register(newvcpu, VM_REG_GUEST_X0,
+		    vme->u.smccc_call.args[2]);
+		assert(error == 0);
+
+		/* Set the start program counter */
+		error = vm_set_register(newvcpu, VM_REG_GUEST_PC,
+		    vme->u.smccc_call.args[1]);
+		assert(error == 0);
+
+		vm_resume_cpu(newvcpu);
+		CPU_SET_ATOMIC(newcpu, &running_cpumask);
+
+		smccc_rv = PSCI_RETVAL_SUCCESS;
+		break;
+	case PSCI_FNID_AFFINITY_INFO:
+		smccc_rv = smccc_affinity_info(vme->u.smccc_call.args[0],
+		    vme->u.smccc_call.args[1]);
+		break;
+	case PSCI_FNID_SYSTEM_OFF:
+	case PSCI_FNID_SYSTEM_RESET:
+		if (vme->u.smccc_call.func_id == PSCI_FNID_SYSTEM_OFF)
+			how = VM_SUSPEND_POWEROFF;
+		else
+			how = VM_SUSPEND_RESET;
+		vm_suspend(ctx, how);
+		break;
+	default:
+		break;
+	}
+
+	error = vm_set_register(vcpu, VM_REG_GUEST_X0, smccc_rv);
+	assert(error == 0);
+
+	return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_hyp(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
+    struct vm_run *vmrun)
+{
+	struct vm_exit *vme;
+
+	vme = vmrun->vm_exit;
+	printf("unhandled exception: esr %#lx, far %#lx\n",
+	    vme->u.hyp.esr_el2, vme->u.hyp.far_el2);
+	return (VMEXIT_ABORT);
+}
+
+const vmexit_handler_t vmexit_handlers[VM_EXITCODE_MAX] = {
+	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
+	[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
+	[VM_EXITCODE_SUSPENDED] = vmexit_suspend,
+	[VM_EXITCODE_DEBUG] = vmexit_debug,
+	[VM_EXITCODE_SMCCC] = vmexit_smccc,
+	[VM_EXITCODE_HYP] = vmexit_hyp,
+};
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index 8163ab0d810a..b9f00385d9e8 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -561,11 +561,12 @@ do_open(const char *vmname)
 	int error;
 	bool reinit, romboot;
 
-	reinit = romboot = false;
+	reinit = false;
 
 #ifdef __amd64__
-	if (lpc_bootrom())
-		romboot = true;
+	romboot = lpc_bootrom() != NULL;
+#else
+	romboot = true;
 #endif
 
 	error = vm_create(vmname);