git: 8f6b66a9d3f2 - main - riscv vmm: implement SBI RFNC extension.

From: Ruslan Bukin <br_at_FreeBSD.org>
Date: Tue, 21 Jan 2025 10:35:48 UTC
The branch main has been updated by br:

URL: https://cgit.FreeBSD.org/src/commit/?id=8f6b66a9d3f20d9f32259a7a83fbfb4364c4fafa

commit 8f6b66a9d3f20d9f32259a7a83fbfb4364c4fafa
Author:     Ruslan Bukin <br@FreeBSD.org>
AuthorDate: 2025-01-21 10:15:43 +0000
Commit:     Ruslan Bukin <br@FreeBSD.org>
CommitDate: 2025-01-21 10:35:19 +0000

    riscv vmm: implement SBI RFNC extension.
    
    The RISC-V SBI (Supervisor Binary Interface) RFNC (fence) extension is used
    to provide a standardized mechanism for enforcing memory and I/O ordering
    between different execution contexts or cores in a system. Specifically, it
    allows supervisors (such as an operating system kernel) to request certain
    synchronization operations across CPUs or harts (hardware threads) via the
    SBI.
    
    Differential Revision:  https://reviews.freebsd.org/D48441
---
 sys/conf/files.riscv        |   1 +
 sys/riscv/include/cpufunc.h |  15 ++++
 sys/riscv/vmm/riscv.h       |  22 +++++
 sys/riscv/vmm/vmm_fence.c   | 208 ++++++++++++++++++++++++++++++++++++++++++++
 sys/riscv/vmm/vmm_fence.h   |  43 +++++++++
 sys/riscv/vmm/vmm_riscv.c   |   9 ++
 sys/riscv/vmm/vmm_sbi.c     |  54 +++++++++---
 7 files changed, 339 insertions(+), 13 deletions(-)

diff --git a/sys/conf/files.riscv b/sys/conf/files.riscv
index 36eea03f29a1..e84f1367680f 100644
--- a/sys/conf/files.riscv
+++ b/sys/conf/files.riscv
@@ -86,6 +86,7 @@ riscv/riscv/vm_machdep.c	standard
 riscv/vmm/vmm.c					optional	vmm
 riscv/vmm/vmm_aplic.c				optional	vmm
 riscv/vmm/vmm_dev_machdep.c			optional	vmm
+riscv/vmm/vmm_fence.c				optional	vmm
 riscv/vmm/vmm_instruction_emul.c		optional	vmm
 riscv/vmm/vmm_riscv.c				optional	vmm
 riscv/vmm/vmm_sbi.c				optional	vmm
diff --git a/sys/riscv/include/cpufunc.h b/sys/riscv/include/cpufunc.h
index 8f5b87d24ce3..75b22632c546 100644
--- a/sys/riscv/include/cpufunc.h
+++ b/sys/riscv/include/cpufunc.h
@@ -104,6 +104,21 @@ sfence_vma_page(uintptr_t addr)
 	__asm __volatile("sfence.vma %0" :: "r" (addr) : "memory");
 }
 
+static __inline void
+sfence_vma_asid(uint64_t asid)
+{
+
+	__asm __volatile("sfence.vma x0, %0" :: "r" (asid) : "memory");
+}
+
+static __inline void
+sfence_vma_asid_page(uint64_t asid, uintptr_t addr)
+{
+
+	__asm __volatile("sfence.vma %0, %1" :: "r" (addr), "r" (asid)
+	    : "memory");
+}
+
 #define	rdcycle()			csr_read64(cycle)
 #define	rdtime()			csr_read64(time)
 #define	rdinstret()			csr_read64(instret)
diff --git a/sys/riscv/vmm/riscv.h b/sys/riscv/vmm/riscv.h
index f3665d33a386..793c61534cee 100644
--- a/sys/riscv/vmm/riscv.h
+++ b/sys/riscv/vmm/riscv.h
@@ -67,6 +67,20 @@ struct hypcsr {
 	uint64_t senvcfg;
 };
 
+enum vmm_fence_type {
+	VMM_RISCV_FENCE_INVALID = 0,
+	VMM_RISCV_FENCE_I,
+	VMM_RISCV_FENCE_VMA,
+	VMM_RISCV_FENCE_VMA_ASID,
+};
+
+struct vmm_fence {
+	enum vmm_fence_type type;
+	size_t start;
+	size_t size;
+	uint64_t asid;
+};
+
 struct hypctx {
 	struct hypregs host_regs;
 	struct hypregs guest_regs;
@@ -82,6 +96,14 @@ struct hypctx {
 	int ipi_pending;
 	int interrupts_pending;
 	struct vtimer vtimer;
+
+	struct vmm_fence *fence_queue;
+	struct mtx fence_queue_mtx;
+	int fence_queue_head;
+	int fence_queue_tail;
+#define	FENCE_REQ_I	(1 << 0)
+#define	FENCE_REQ_VMA	(1 << 1)
+	int fence_req;
 };
 
 struct hyp {
diff --git a/sys/riscv/vmm/vmm_fence.c b/sys/riscv/vmm/vmm_fence.c
new file mode 100644
index 000000000000..6bba3f4b5dfe
--- /dev/null
+++ b/sys/riscv/vmm/vmm_fence.c
@@ -0,0 +1,208 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/bus.h>
+
+#include "riscv.h"
+#include "vmm_fence.h"
+
+static bool
+vmm_fence_dequeue(struct hypctx *hypctx, struct vmm_fence *new_fence)
+{
+	struct vmm_fence *queue;
+	struct vmm_fence *fence;
+
+	mtx_lock_spin(&hypctx->fence_queue_mtx);
+	queue = hypctx->fence_queue;
+	fence = &queue[hypctx->fence_queue_head];
+	if (fence->type != VMM_RISCV_FENCE_INVALID) {
+		*new_fence = *fence;
+		fence->type = VMM_RISCV_FENCE_INVALID;
+		hypctx->fence_queue_head =
+		    (hypctx->fence_queue_head + 1) % VMM_FENCE_QUEUE_SIZE;
+	} else {
+		mtx_unlock_spin(&hypctx->fence_queue_mtx);
+		return (false);
+	}
+	mtx_unlock_spin(&hypctx->fence_queue_mtx);
+
+	return (true);
+}
+
+static bool
+vmm_fence_enqueue(struct hypctx *hypctx, struct vmm_fence *new_fence)
+{
+	struct vmm_fence *queue;
+	struct vmm_fence *fence;
+
+	mtx_lock_spin(&hypctx->fence_queue_mtx);
+	queue = hypctx->fence_queue;
+	fence = &queue[hypctx->fence_queue_tail];
+	if (fence->type == VMM_RISCV_FENCE_INVALID) {
+		*fence = *new_fence;
+		hypctx->fence_queue_tail =
+		    (hypctx->fence_queue_tail + 1) % VMM_FENCE_QUEUE_SIZE;
+	} else {
+		mtx_unlock_spin(&hypctx->fence_queue_mtx);
+		return (false);
+	}
+	mtx_unlock_spin(&hypctx->fence_queue_mtx);
+
+	return (true);
+}
+
+static void
+vmm_fence_process_one(struct vmm_fence *fence)
+{
+	uint64_t va;
+
+	KASSERT(fence->type == VMM_RISCV_FENCE_VMA ||
+	    fence->type == VMM_RISCV_FENCE_VMA_ASID,
+	    ("%s: wrong fence type %d", __func__, fence->type));
+
+	switch (fence->type) {
+	case VMM_RISCV_FENCE_VMA:
+		for (va = fence->start; va < fence->start + fence->size;
+		    va += PAGE_SIZE)
+			sfence_vma_page(va);
+		break;
+	case VMM_RISCV_FENCE_VMA_ASID:
+		if (fence->start == 0 && fence->size == 0)
+			sfence_vma_asid(fence->asid);
+		else
+			for (va = fence->start; va < fence->start + fence->size;
+			    va += PAGE_SIZE)
+				sfence_vma_asid_page(fence->asid, va);
+		break;
+	default:
+		break;
+	}
+}
+
+void
+vmm_fence_process(struct hypctx *hypctx)
+{
+	struct vmm_fence fence;
+	int pending;
+
+	pending = atomic_readandclear_32(&hypctx->fence_req);
+
+	KASSERT((pending & ~(FENCE_REQ_I | FENCE_REQ_VMA)) == 0,
+	    ("wrong fence bit mask"));
+
+	if (pending & FENCE_REQ_I)
+		fence_i();
+
+	if (pending & FENCE_REQ_VMA)
+		sfence_vma();
+
+	while (vmm_fence_dequeue(hypctx, &fence) == true)
+		vmm_fence_process_one(&fence);
+}
+
+void
+vmm_fence_add(struct vm *vm, cpuset_t *cpus, struct vmm_fence *fence)
+{
+	struct hypctx *hypctx;
+	cpuset_t running_cpus;
+	struct vcpu *vcpu;
+	uint16_t maxcpus;
+	int hostcpu;
+	int state;
+	bool enq;
+	int i;
+
+	CPU_ZERO(&running_cpus);
+
+	maxcpus = vm_get_maxcpus(vm);
+	for (i = 0; i < maxcpus; i++) {
+		if (!CPU_ISSET(i, cpus))
+			continue;
+		vcpu = vm_vcpu(vm, i);
+		hypctx = vcpu_get_cookie(vcpu);
+
+		enq = false;
+
+		/* No need to enqueue fences i and vma global. */
+		switch (fence->type) {
+		case VMM_RISCV_FENCE_I:
+			atomic_set_32(&hypctx->fence_req, FENCE_REQ_I);
+			break;
+		case VMM_RISCV_FENCE_VMA:
+			if (fence->start == 0 && fence->size == 0)
+				atomic_set_32(&hypctx->fence_req,
+				    FENCE_REQ_VMA);
+			else
+				enq = true;
+			break;
+		case VMM_RISCV_FENCE_VMA_ASID:
+			enq = true;
+			break;
+		default:
+			KASSERT(0, ("%s: wrong fence type %d", __func__,
+			    fence->type));
+			break;
+		}
+
+		/*
+		 * Try to enqueue. In case of failure use more conservative
+		 * request.
+		 */
+		if (enq)
+			if (vmm_fence_enqueue(hypctx, fence) == false)
+				atomic_set_32(&hypctx->fence_req,
+				    FENCE_REQ_VMA);
+
+		mb();
+
+		state = vcpu_get_state(vcpu, &hostcpu);
+		if (state == VCPU_RUNNING)
+			CPU_SET(hostcpu, &running_cpus);
+	}
+
+	/*
+	 * Interrupt other cores. On reception of IPI they will leave guest.
+	 * On entry back to the guest they will process fence request.
+	 *
+	 * If vcpu migrates to another cpu right here, it should process
+	 * all fences on entry to the guest as well.
+	 */
+	if (!CPU_EMPTY(&running_cpus))
+		smp_rendezvous_cpus(running_cpus, NULL, NULL, NULL, NULL);
+}
diff --git a/sys/riscv/vmm/vmm_fence.h b/sys/riscv/vmm/vmm_fence.h
new file mode 100644
index 000000000000..05d4466fd634
--- /dev/null
+++ b/sys/riscv/vmm/vmm_fence.h
@@ -0,0 +1,43 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_FENCE_H_
+#define _VMM_FENCE_H_
+
+struct hypctx;
+
+#define	VMM_FENCE_QUEUE_SIZE	128
+
+void vmm_fence_process(struct hypctx *hypctx);
+void vmm_fence_add(struct vm *vm, cpuset_t *cpus, struct vmm_fence *fence);
+
+#endif /* !_VMM_FENCE_H_ */
diff --git a/sys/riscv/vmm/vmm_riscv.c b/sys/riscv/vmm/vmm_riscv.c
index 6ac945dfa1d0..78250ae7c440 100644
--- a/sys/riscv/vmm/vmm_riscv.c
+++ b/sys/riscv/vmm/vmm_riscv.c
@@ -68,6 +68,7 @@
 
 #include "riscv.h"
 #include "vmm_aplic.h"
+#include "vmm_fence.h"
 #include "vmm_stat.h"
 
 MALLOC_DEFINE(M_HYP, "RISC-V VMM HYP", "RISC-V VMM HYP");
@@ -212,6 +213,11 @@ vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid)
 	hypctx->vcpu = vcpu1;
 	hypctx->guest_scounteren = HCOUNTEREN_CY | HCOUNTEREN_TM;
 
+	/* Fence queue. */
+	hypctx->fence_queue = mallocarray(VMM_FENCE_QUEUE_SIZE,
+	    sizeof(struct vmm_fence), M_HYP, M_WAITOK | M_ZERO);
+	mtx_init(&hypctx->fence_queue_mtx, "fence queue", NULL, MTX_SPIN);
+
 	/* sstatus */
 	hypctx->guest_regs.hyp_sstatus = SSTATUS_SPP | SSTATUS_SPIE;
 	hypctx->guest_regs.hyp_sstatus |= SSTATUS_FS_INITIAL;
@@ -659,6 +665,7 @@ vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
 		riscv_set_active_vcpu(hypctx);
 		aplic_flush_hwstate(hypctx);
 		riscv_sync_interrupts(hypctx);
+		vmm_fence_process(hypctx);
 
 		dprintf("%s: Entering guest VM, vsatp %lx, ss %lx hs %lx\n",
 		    __func__, csr_read(vsatp), hypctx->guest_regs.hyp_sstatus,
@@ -740,6 +747,8 @@ vmmops_vcpu_cleanup(void *vcpui)
 
 	aplic_cpucleanup(hypctx);
 
+	mtx_destroy(&hypctx->fence_queue_mtx);
+	free(hypctx->fence_queue, M_HYP);
 	free(hypctx, M_HYP);
 }
 
diff --git a/sys/riscv/vmm/vmm_sbi.c b/sys/riscv/vmm/vmm_sbi.c
index 63dcf9b4a7ae..586eb7c4d41c 100644
--- a/sys/riscv/vmm/vmm_sbi.c
+++ b/sys/riscv/vmm/vmm_sbi.c
@@ -58,39 +58,65 @@
 #include <machine/sbi.h>
 
 #include "riscv.h"
+#include "vmm_fence.h"
 
 static int
 vmm_sbi_handle_rfnc(struct vcpu *vcpu, struct hypctx *hypctx)
 {
-	uint64_t hart_mask __unused;
-	uint64_t start __unused;
-	uint64_t size __unused;
-	uint64_t asid __unused;
+	struct vmm_fence fence;
+	uint64_t hart_mask;
+	uint64_t hart_mask_base;
 	uint64_t func_id;
+	struct hyp *hyp;
+	uint16_t maxcpus;
+	cpuset_t cpus;
+	int vcpu_id;
+	int i;
 
 	func_id = hypctx->guest_regs.hyp_a[6];
 	hart_mask = hypctx->guest_regs.hyp_a[0];
-	start = hypctx->guest_regs.hyp_a[2];
-	size = hypctx->guest_regs.hyp_a[3];
-	asid = hypctx->guest_regs.hyp_a[4];
+	hart_mask_base = hypctx->guest_regs.hyp_a[1];
 
-	dprintf("%s: %ld hart_mask %lx start %lx size %lx\n", __func__,
-	    func_id, hart_mask, start, size);
+	/* Construct vma_fence. */
 
-	/* TODO: implement remote sfence. */
+	fence.start = hypctx->guest_regs.hyp_a[2];
+	fence.size = hypctx->guest_regs.hyp_a[3];
+	fence.asid = hypctx->guest_regs.hyp_a[4];
 
 	switch (func_id) {
 	case SBI_RFNC_REMOTE_FENCE_I:
+		fence.type = VMM_RISCV_FENCE_I;
 		break;
 	case SBI_RFNC_REMOTE_SFENCE_VMA:
+		fence.type = VMM_RISCV_FENCE_VMA;
 		break;
 	case SBI_RFNC_REMOTE_SFENCE_VMA_ASID:
+		fence.type = VMM_RISCV_FENCE_VMA_ASID;
 		break;
 	default:
-		break;
+		return (-1);
+	}
+
+	/* Construct cpuset_t from the mask supplied. */
+
+	CPU_ZERO(&cpus);
+	hyp = hypctx->hyp;
+	maxcpus = vm_get_maxcpus(hyp->vm);
+	for (i = 0; i < maxcpus; i++) {
+		vcpu = vm_vcpu(hyp->vm, i);
+		if (vcpu == NULL)
+			continue;
+		vcpu_id = vcpu_vcpuid(vcpu);
+		if (hart_mask_base != -1UL) {
+			if (vcpu_id < hart_mask_base)
+				continue;
+			if (!(hart_mask & (1UL << (vcpu_id - hart_mask_base))))
+				continue;
+		}
+		CPU_SET(i, &cpus);
 	}
 
-	hypctx->guest_regs.hyp_a[0] = 0;
+	vmm_fence_add(hyp->vm, &cpus, &fence);
 
 	return (0);
 }
@@ -172,6 +198,7 @@ vmm_sbi_ecall(struct vcpu *vcpu, bool *retu)
 {
 	int sbi_extension_id __unused;
 	struct hypctx *hypctx;
+	int error;
 
 	hypctx = riscv_get_active_vcpu();
 	sbi_extension_id = hypctx->guest_regs.hyp_a[7];
@@ -188,7 +215,8 @@ vmm_sbi_ecall(struct vcpu *vcpu, bool *retu)
 
 	switch (sbi_extension_id) {
 	case SBI_EXT_ID_RFNC:
-		vmm_sbi_handle_rfnc(vcpu, hypctx);
+		error = vmm_sbi_handle_rfnc(vcpu, hypctx);
+		hypctx->guest_regs.hyp_a[0] = error;
 		break;
 	case SBI_EXT_ID_TIME:
 		vmm_sbi_handle_time(vcpu, hypctx);