git: fa6787221742 - stable/14 - AMD IOMMU driver
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 09 Nov 2024 20:19:15 UTC
The branch stable/14 has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=fa6787221742a00d410920a4a917bf2f9b1ed192 commit fa6787221742a00d410920a4a917bf2f9b1ed192 Author: Konstantin Belousov <kib@FreeBSD.org> AuthorDate: 2024-05-12 10:20:11 +0000 Commit: Konstantin Belousov <kib@FreeBSD.org> CommitDate: 2024-11-09 19:04:18 +0000 AMD IOMMU driver (cherry picked from commit 0f5116d7efe33c81f0b24b56eec78af37898f500) --- sys/conf/files.x86 | 6 + sys/x86/iommu/amd_cmd.c | 360 +++++++++++++ sys/x86/iommu/amd_ctx.c | 639 +++++++++++++++++++++++ sys/x86/iommu/amd_drv.c | 1205 +++++++++++++++++++++++++++++++++++++++++++ sys/x86/iommu/amd_event.c | 323 ++++++++++++ sys/x86/iommu/amd_idpgtbl.c | 396 ++++++++++++++ sys/x86/iommu/amd_intrmap.c | 391 ++++++++++++++ sys/x86/iommu/amd_iommu.h | 243 +++++++++ 8 files changed, 3563 insertions(+) diff --git a/sys/conf/files.x86 b/sys/conf/files.x86 index c6d705e9715d..33da95a65ba4 100644 --- a/sys/conf/files.x86 +++ b/sys/conf/files.x86 @@ -344,6 +344,12 @@ x86/cpufreq/hwpstate_amd.c optional cpufreq x86/cpufreq/hwpstate_intel.c optional cpufreq x86/cpufreq/p4tcc.c optional cpufreq x86/cpufreq/powernow.c optional cpufreq +x86/iommu/amd_cmd.c optional acpi iommu pci +x86/iommu/amd_ctx.c optional acpi iommu pci +x86/iommu/amd_drv.c optional acpi iommu pci +x86/iommu/amd_event.c optional acpi iommu pci +x86/iommu/amd_idpgtbl.c optional acpi iommu pci +x86/iommu/amd_intrmap.c optional acpi iommu pci x86/iommu/intel_ctx.c optional acpi iommu pci x86/iommu/intel_drv.c optional acpi iommu pci x86/iommu/intel_fault.c optional acpi iommu pci diff --git a/sys/x86/iommu/amd_cmd.c b/sys/x86/iommu/amd_cmd.c new file mode 100644 index 000000000000..bbc2a8e0ad9f --- /dev/null +++ b/sys/x86/iommu/amd_cmd.c @@ -0,0 +1,360 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 The FreeBSD Foundation + * + * This software was developed by Konstantin Belousov <kib@FreeBSD.org> + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_acpi.h" + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/memdesc.h> +#include <sys/module.h> +#include <sys/rman.h> +#include <sys/taskqueue.h> +#include <sys/time.h> +#include <sys/tree.h> +#include <sys/vmem.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> +#include <vm/vm_map.h> +#include <contrib/dev/acpica/include/acpi.h> +#include <contrib/dev/acpica/include/accommon.h> +#include <dev/acpica/acpivar.h> +#include <dev/pci/pcireg.h> +#include <machine/bus.h> +#include <machine/cpu.h> +#include <x86/include/busdma_impl.h> +#include <dev/iommu/busdma_iommu.h> +#include <x86/iommu/amd_reg.h> +#include <x86/iommu/x86_iommu.h> +#include <x86/iommu/amd_iommu.h> + +static void +amdiommu_enable_cmdbuf(struct amdiommu_unit *unit) +{ + AMDIOMMU_ASSERT_LOCKED(unit); + + unit->hw_ctrl |= AMDIOMMU_CTRL_CMDBUF_EN; + amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); +} + +static void +amdiommu_disable_cmdbuf(struct amdiommu_unit *unit) +{ + AMDIOMMU_ASSERT_LOCKED(unit); + + unit->hw_ctrl &= ~AMDIOMMU_CTRL_CMDBUF_EN; + amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); +} + + +static void +amdiommu_enable_qi_intr(struct iommu_unit *iommu) +{ + struct amdiommu_unit *unit; + + unit = IOMMU2AMD(iommu); + AMDIOMMU_ASSERT_LOCKED(unit); + unit->hw_ctrl |= AMDIOMMU_CTRL_COMWINT_EN; + amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); + amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS, + AMDIOMMU_CMDEVS_COMWAITINT); +} + +static void +amdiommu_disable_qi_intr(struct iommu_unit *iommu) +{ + struct amdiommu_unit *unit; + + unit = IOMMU2AMD(iommu); + AMDIOMMU_ASSERT_LOCKED(unit); + unit->hw_ctrl &= ~AMDIOMMU_CTRL_COMWINT_EN; + amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); +} + +static void +amdiommu_cmd_advance_tail(struct iommu_unit *iommu) +{ + struct amdiommu_unit *unit; + + unit = IOMMU2AMD(iommu); + AMDIOMMU_ASSERT_LOCKED(unit); + amdiommu_write8(unit, AMDIOMMU_CMDBUF_TAIL, unit->x86c.inv_queue_tail); +} + +static void +amdiommu_cmd_ensure(struct iommu_unit *iommu, int descr_count) +{ + struct amdiommu_unit *unit; + uint64_t head; + int bytes; + + unit = IOMMU2AMD(iommu); + AMDIOMMU_ASSERT_LOCKED(unit); + bytes = descr_count << AMDIOMMU_CMD_SZ_SHIFT; + for (;;) { + if (bytes <= unit->x86c.inv_queue_avail) + break; + /* refill */ + head = amdiommu_read8(unit, AMDIOMMU_CMDBUF_HEAD); + head &= AMDIOMMU_CMDPTR_MASK; + unit->x86c.inv_queue_avail = head - unit->x86c.inv_queue_tail - + AMDIOMMU_CMD_SZ; + if (head <= unit->x86c.inv_queue_tail) + unit->x86c.inv_queue_avail += unit->x86c.inv_queue_size; + if (bytes <= unit->x86c.inv_queue_avail) + break; + + /* + * No space in the queue, do busy wait. Hardware must + * make a progress. But first advance the tail to + * inform the descriptor streamer about entries we + * might have already filled, otherwise they could + * clog the whole queue.. + * + * See dmar_qi_invalidate_locked() for a discussion + * about data race prevention. + */ + amdiommu_cmd_advance_tail(iommu); + unit->x86c.inv_queue_full++; + cpu_spinwait(); + } + unit->x86c.inv_queue_avail -= bytes; +} + +static void +amdiommu_cmd_emit(struct amdiommu_unit *unit, const struct + amdiommu_cmd_generic *cmd) +{ + AMDIOMMU_ASSERT_LOCKED(unit); + + memcpy(unit->x86c.inv_queue + unit->x86c.inv_queue_tail, cmd, + sizeof(*cmd)); + unit->x86c.inv_queue_tail += AMDIOMMU_CMD_SZ; + KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size, + ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail, + (uintmax_t)unit->x86c.inv_queue_size)); + unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1; +} + +static void +amdiommu_cmd_emit_wait_descr(struct iommu_unit *iommu, uint32_t seq, + bool intr, bool memw, bool fence) +{ + struct amdiommu_unit *unit; + struct amdiommu_cmd_completion_wait c; + + unit = IOMMU2AMD(iommu); + AMDIOMMU_ASSERT_LOCKED(unit); + + bzero(&c, sizeof(c)); + c.op = AMDIOMMU_CMD_COMPLETION_WAIT; + if (memw) { + uint32_t x; + + c.s = 1; + x = unit->x86c.inv_waitd_seq_hw_phys; + x >>= 3; + c.address0 = x; + x = unit->x86c.inv_waitd_seq_hw_phys >> 32; + c.address1 = x; + c.data0 = seq; + } + if (fence) + c.f = 1; + if (intr) + c.i = 1; + amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c); +} + +static void +amdiommu_qi_invalidate_emit(struct iommu_domain *adomain, iommu_gaddr_t base, + iommu_gaddr_t size, struct iommu_qi_genseq *pseq, bool emit_wait) +{ + struct amdiommu_domain *domain; + struct amdiommu_unit *unit; + struct amdiommu_cmd_invalidate_iommu_pages c; + u_int isize; + + domain = IODOM2DOM(adomain); + unit = domain->unit; + AMDIOMMU_ASSERT_LOCKED(unit); + bzero(&c, sizeof(c)); + c.op = AMDIOMMU_CMD_INVALIDATE_IOMMU_PAGES; + c.domainid = domain->domain; + isize = IOMMU_PAGE_SIZE; /* XXXKIB handle superpages */ + + for (; size > 0; base += isize, size -= isize) { + amdiommu_cmd_ensure(AMD2IOMMU(unit), 1); + c.s = 0; + c.pde = 1; + c.address = base >> IOMMU_PAGE_SHIFT; + amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c); + } + iommu_qi_emit_wait_seq(AMD2IOMMU(unit), pseq, emit_wait); +} + +void +amdiommu_qi_invalidate_all_pages_locked_nowait(struct amdiommu_domain *domain) +{ + struct amdiommu_unit *unit; + struct amdiommu_cmd_invalidate_iommu_pages c; + + unit = domain->unit; + AMDIOMMU_ASSERT_LOCKED(unit); + bzero(&c, sizeof(c)); + c.op = AMDIOMMU_CMD_INVALIDATE_IOMMU_PAGES; + c.domainid = domain->domain; + + /* + * The magic specified in the note for INVALIDATE_IOMMU_PAGES + * description. + */ + c.s = 1; + c.pde = 1; + c.address = 0x7ffffffffffff; + + amdiommu_cmd_ensure(AMD2IOMMU(unit), 1); + amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c); +} + +void +amdiommu_qi_invalidate_wait_sync(struct iommu_unit *iommu) +{ + struct iommu_qi_genseq gseq; + + amdiommu_cmd_ensure(iommu, 1); + iommu_qi_emit_wait_seq(iommu, &gseq, true); + IOMMU2AMD(iommu)->x86c.inv_seq_waiters++; + amdiommu_cmd_advance_tail(iommu); + iommu_qi_wait_for_seq(iommu, &gseq, true); +} + +void +amdiommu_qi_invalidate_ctx_locked_nowait(struct amdiommu_ctx *ctx) +{ + struct amdiommu_cmd_invalidate_devtab_entry c; + + amdiommu_cmd_ensure(AMD2IOMMU(CTX2AMD(ctx)), 1); + bzero(&c, sizeof(c)); + c.op = AMDIOMMU_CMD_INVALIDATE_DEVTAB_ENTRY; + c.devid = ctx->context.rid; + amdiommu_cmd_emit(CTX2AMD(ctx), (struct amdiommu_cmd_generic *)&c); +} + + +void +amdiommu_qi_invalidate_ctx_locked(struct amdiommu_ctx *ctx) +{ + amdiommu_qi_invalidate_ctx_locked_nowait(ctx); + amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(CTX2AMD(ctx))); +} + +void +amdiommu_qi_invalidate_ir_locked_nowait(struct amdiommu_unit *unit, + uint16_t devid) +{ + struct amdiommu_cmd_invalidate_interrupt_table c; + + AMDIOMMU_ASSERT_LOCKED(unit); + + amdiommu_cmd_ensure(AMD2IOMMU(unit), 1); + bzero(&c, sizeof(c)); + c.op = AMDIOMMU_CMD_INVALIDATE_INTERRUPT_TABLE; + c.devid = devid; + amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c); +} + +void +amdiommu_qi_invalidate_ir_locked(struct amdiommu_unit *unit, uint16_t devid) +{ + amdiommu_qi_invalidate_ir_locked_nowait(unit, devid); + amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(unit)); +} + +static void +amdiommu_qi_task(void *arg, int pending __unused) +{ + struct amdiommu_unit *unit; + + unit = IOMMU2AMD(arg); + iommu_qi_drain_tlb_flush(AMD2IOMMU(unit)); + + AMDIOMMU_LOCK(unit); + if (unit->x86c.inv_seq_waiters > 0) + wakeup(&unit->x86c.inv_seq_waiters); + AMDIOMMU_UNLOCK(unit); +} + +int +amdiommu_init_cmd(struct amdiommu_unit *unit) +{ + uint64_t qi_sz, rv; + + unit->x86c.qi_buf_maxsz = ilog2(AMDIOMMU_CMDBUF_MAX / PAGE_SIZE); + unit->x86c.qi_cmd_sz = AMDIOMMU_CMD_SZ; + iommu_qi_common_init(AMD2IOMMU(unit), amdiommu_qi_task); + get_x86_iommu()->qi_ensure = amdiommu_cmd_ensure; + get_x86_iommu()->qi_emit_wait_descr = amdiommu_cmd_emit_wait_descr; + get_x86_iommu()->qi_advance_tail = amdiommu_cmd_advance_tail; + get_x86_iommu()->qi_invalidate_emit = amdiommu_qi_invalidate_emit; + + rv = pmap_kextract((uintptr_t)unit->x86c.inv_queue); + + /* + * See the description of the ComLen encoding for Command + * buffer Base Address Register. + */ + qi_sz = ilog2(unit->x86c.inv_queue_size / PAGE_SIZE) + 8; + rv |= qi_sz << AMDIOMMU_CMDBUF_BASE_SZSHIFT; + + AMDIOMMU_LOCK(unit); + amdiommu_write8(unit, AMDIOMMU_CMDBUF_BASE, rv); + amdiommu_enable_cmdbuf(unit); + amdiommu_enable_qi_intr(AMD2IOMMU(unit)); + AMDIOMMU_UNLOCK(unit); + + return (0); +} + +static void +amdiommu_fini_cmd_helper(struct iommu_unit *iommu) +{ + amdiommu_disable_cmdbuf(IOMMU2AMD(iommu)); + amdiommu_disable_qi_intr(iommu); +} + +void +amdiommu_fini_cmd(struct amdiommu_unit *unit) +{ + iommu_qi_common_fini(AMD2IOMMU(unit), amdiommu_fini_cmd_helper); +} diff --git a/sys/x86/iommu/amd_ctx.c b/sys/x86/iommu/amd_ctx.c new file mode 100644 index 000000000000..b3e85350a995 --- /dev/null +++ b/sys/x86/iommu/amd_ctx.c @@ -0,0 +1,639 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 The FreeBSD Foundation + * + * This software was developed by Konstantin Belousov <kib@FreeBSD.org> + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/bus.h> +#include <sys/interrupt.h> +#include <sys/kernel.h> +#include <sys/ktr.h> +#include <sys/limits.h> +#include <sys/lock.h> +#include <sys/memdesc.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/rwlock.h> +#include <sys/rman.h> +#include <sys/sysctl.h> +#include <sys/taskqueue.h> +#include <sys/tree.h> +#include <sys/uio.h> +#include <sys/vmem.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_kern.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_pager.h> +#include <vm/vm_map.h> +#include <contrib/dev/acpica/include/acpi.h> +#include <contrib/dev/acpica/include/accommon.h> +#include <dev/pci/pcireg.h> +#include <dev/pci/pcivar.h> +#include <machine/atomic.h> +#include <machine/bus.h> +#include <machine/md_var.h> +#include <machine/specialreg.h> +#include <x86/include/busdma_impl.h> +#include <dev/iommu/busdma_iommu.h> +#include <x86/iommu/amd_reg.h> +#include <x86/iommu/x86_iommu.h> +#include <x86/iommu/amd_iommu.h> + +static MALLOC_DEFINE(M_AMDIOMMU_CTX, "amdiommu_ctx", "AMD IOMMU Context"); +static MALLOC_DEFINE(M_AMDIOMMU_DOMAIN, "amdiommu_dom", "AMD IOMMU Domain"); + +static void amdiommu_unref_domain_locked(struct amdiommu_unit *unit, + struct amdiommu_domain *domain); + +static struct amdiommu_dte * +amdiommu_get_dtep(struct amdiommu_ctx *ctx) +{ + return (&CTX2AMD(ctx)->dev_tbl[ctx->context.rid]); +} + +void +amdiommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, + bool cansleep) +{ + struct amdiommu_domain *domain; + struct amdiommu_unit *unit; + + domain = IODOM2DOM(entry->domain); + unit = DOM2AMD(domain); + + /* + * If "free" is false, then the IOTLB invalidation must be performed + * synchronously. Otherwise, the caller might free the entry before + * dmar_qi_task() is finished processing it. + */ + if (free) { + AMDIOMMU_LOCK(unit); + iommu_qi_invalidate_locked(&domain->iodom, entry, true); + AMDIOMMU_UNLOCK(unit); + } else { + iommu_qi_invalidate_sync(&domain->iodom, entry->start, + entry->end - entry->start, cansleep); + iommu_domain_free_entry(entry, false); + } +} + +static bool +amdiommu_domain_unload_emit_wait(struct amdiommu_domain *domain, + struct iommu_map_entry *entry) +{ + return (true); /* XXXKIB */ +} + +void +amdiommu_domain_unload(struct iommu_domain *iodom, + struct iommu_map_entries_tailq *entries, bool cansleep) +{ + struct amdiommu_domain *domain; + struct amdiommu_unit *unit; + struct iommu_map_entry *entry, *entry1; + int error __diagused; + + domain = IODOM2DOM(iodom); + unit = DOM2AMD(domain); + + TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) { + KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0, + ("not mapped entry %p %p", domain, entry)); + error = iodom->ops->unmap(iodom, entry, + cansleep ? IOMMU_PGF_WAITOK : 0); + KASSERT(error == 0, ("unmap %p error %d", domain, error)); + } + if (TAILQ_EMPTY(entries)) + return; + + AMDIOMMU_LOCK(unit); + while ((entry = TAILQ_FIRST(entries)) != NULL) { + TAILQ_REMOVE(entries, entry, dmamap_link); + iommu_qi_invalidate_locked(&domain->iodom, entry, + amdiommu_domain_unload_emit_wait(domain, entry)); + } + AMDIOMMU_UNLOCK(unit); +} + +static void +amdiommu_domain_destroy(struct amdiommu_domain *domain) +{ + struct iommu_domain *iodom; + struct amdiommu_unit *unit; + + iodom = DOM2IODOM(domain); + + KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries), + ("unfinished unloads %p", domain)); + KASSERT(LIST_EMPTY(&iodom->contexts), + ("destroying dom %p with contexts", domain)); + KASSERT(domain->ctx_cnt == 0, + ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt)); + KASSERT(domain->refs == 0, + ("destroying dom %p with refs %d", domain, domain->refs)); + + if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) { + AMDIOMMU_DOMAIN_LOCK(domain); + iommu_gas_fini_domain(iodom); + AMDIOMMU_DOMAIN_UNLOCK(domain); + } + if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) { + if (domain->pgtbl_obj != NULL) + AMDIOMMU_DOMAIN_PGLOCK(domain); + amdiommu_domain_free_pgtbl(domain); + } + iommu_domain_fini(iodom); + unit = DOM2AMD(domain); + free_unr(unit->domids, domain->domain); + free(domain, M_AMDIOMMU_DOMAIN); +} + +static iommu_gaddr_t +lvl2addr(int lvl) +{ + int x; + + x = IOMMU_PAGE_SHIFT + IOMMU_NPTEPGSHIFT * lvl; + /* Level 6 has only 8 bits for page table index */ + if (x >= NBBY * sizeof(uint64_t)) + return (-1ull); + return (1ull < (1ull << x)); +} + +static void +amdiommu_domain_init_pglvl(struct amdiommu_unit *unit, + struct amdiommu_domain *domain) +{ + iommu_gaddr_t end; + int hats, i; + uint64_t efr_hats; + + end = DOM2IODOM(domain)->end; + for (i = AMDIOMMU_PGTBL_MAXLVL; i > 1; i--) { + if (lvl2addr(i) >= end && lvl2addr(i - 1) < end) + break; + } + domain->pglvl = i; + + efr_hats = unit->efr & AMDIOMMU_EFR_HATS_MASK; + switch (efr_hats) { + case AMDIOMMU_EFR_HATS_6LVL: + hats = 6; + break; + case AMDIOMMU_EFR_HATS_5LVL: + hats = 5; + break; + case AMDIOMMU_EFR_HATS_4LVL: + hats = 4; + break; + default: + printf("amdiommu%d: HATS %#jx (reserved) ignoring\n", + unit->iommu.unit, (uintmax_t)efr_hats); + return; + } + if (hats >= domain->pglvl) + return; + + printf("amdiommu%d: domain %d HATS %d pglvl %d reducing to HATS\n", + unit->iommu.unit, domain->domain, hats, domain->pglvl); + domain->pglvl = hats; + domain->iodom.end = lvl2addr(hats); +} + +static struct amdiommu_domain * +amdiommu_domain_alloc(struct amdiommu_unit *unit, bool id_mapped) +{ + struct amdiommu_domain *domain; + struct iommu_domain *iodom; + int error, id; + + id = alloc_unr(unit->domids); + if (id == -1) + return (NULL); + domain = malloc(sizeof(*domain), M_AMDIOMMU_DOMAIN, M_WAITOK | M_ZERO); + iodom = DOM2IODOM(domain); + domain->domain = id; + LIST_INIT(&iodom->contexts); + iommu_domain_init(AMD2IOMMU(unit), iodom, &amdiommu_domain_map_ops); + + domain->unit = unit; + + domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR; + amdiommu_domain_init_pglvl(unit, domain); + iommu_gas_init_domain(DOM2IODOM(domain)); + + if (id_mapped) { + domain->iodom.flags |= IOMMU_DOMAIN_IDMAP; + } else { + error = amdiommu_domain_alloc_pgtbl(domain); + if (error != 0) + goto fail; + /* Disable local apic region access */ + error = iommu_gas_reserve_region(iodom, 0xfee00000, + 0xfeefffff + 1, &iodom->msi_entry); + if (error != 0) + goto fail; + } + + return (domain); + +fail: + amdiommu_domain_destroy(domain); + return (NULL); +} + +static struct amdiommu_ctx * +amdiommu_ctx_alloc(struct amdiommu_domain *domain, uint16_t rid) +{ + struct amdiommu_ctx *ctx; + + ctx = malloc(sizeof(*ctx), M_AMDIOMMU_CTX, M_WAITOK | M_ZERO); + ctx->context.domain = DOM2IODOM(domain); + ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu), + M_AMDIOMMU_CTX, M_WAITOK | M_ZERO); + ctx->context.rid = rid; + ctx->context.refs = 1; + return (ctx); +} + +static void +amdiommu_ctx_link(struct amdiommu_ctx *ctx) +{ + struct amdiommu_domain *domain; + + domain = CTX2DOM(ctx); + IOMMU_ASSERT_LOCKED(domain->iodom.iommu); + KASSERT(domain->refs >= domain->ctx_cnt, + ("dom %p ref underflow %d %d", domain, domain->refs, + domain->ctx_cnt)); + domain->refs++; + domain->ctx_cnt++; + LIST_INSERT_HEAD(&domain->iodom.contexts, &ctx->context, link); +} + +static void +amdiommu_ctx_unlink(struct amdiommu_ctx *ctx) +{ + struct amdiommu_domain *domain; + + domain = CTX2DOM(ctx); + IOMMU_ASSERT_LOCKED(domain->iodom.iommu); + KASSERT(domain->refs > 0, + ("domain %p ctx dtr refs %d", domain, domain->refs)); + KASSERT(domain->ctx_cnt >= domain->refs, + ("domain %p ctx dtr refs %d ctx_cnt %d", domain, + domain->refs, domain->ctx_cnt)); + domain->refs--; + domain->ctx_cnt--; + LIST_REMOVE(&ctx->context, link); +} + +struct amdiommu_ctx * +amdiommu_find_ctx_locked(struct amdiommu_unit *unit, uint16_t rid) +{ + struct amdiommu_domain *domain; + struct iommu_ctx *ctx; + + AMDIOMMU_ASSERT_LOCKED(unit); + + LIST_FOREACH(domain, &unit->domains, link) { + LIST_FOREACH(ctx, &domain->iodom.contexts, link) { + if (ctx->rid == rid) + return (IOCTX2CTX(ctx)); + } + } + return (NULL); +} + +struct amdiommu_domain * +amdiommu_find_domain(struct amdiommu_unit *unit, uint16_t rid) +{ + struct amdiommu_domain *domain; + struct iommu_ctx *ctx; + + AMDIOMMU_LOCK(unit); + LIST_FOREACH(domain, &unit->domains, link) { + LIST_FOREACH(ctx, &domain->iodom.contexts, link) { + if (ctx->rid == rid) + break; + } + } + AMDIOMMU_UNLOCK(unit); + return (domain); +} + +static void +amdiommu_free_ctx_locked(struct amdiommu_unit *unit, struct amdiommu_ctx *ctx) +{ + struct amdiommu_dte *dtep; + struct amdiommu_domain *domain; + + AMDIOMMU_ASSERT_LOCKED(unit); + KASSERT(ctx->context.refs >= 1, + ("amdiommu %p ctx %p refs %u", unit, ctx, ctx->context.refs)); + + /* + * If our reference is not last, only the dereference should + * be performed. + */ + if (ctx->context.refs > 1) { + ctx->context.refs--; + AMDIOMMU_UNLOCK(unit); + return; + } + + KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0, + ("lost ref on disabled ctx %p", ctx)); + + /* + * Otherwise, the device table entry must be cleared before + * the page table is destroyed. + */ + dtep = amdiommu_get_dtep(ctx); + dtep->v = 0; + atomic_thread_fence_rel(); + memset(dtep, 0, sizeof(*dtep)); + + domain = CTX2DOM(ctx); + amdiommu_qi_invalidate_ctx_locked_nowait(ctx); + amdiommu_qi_invalidate_ir_locked_nowait(unit, ctx->context.rid); + amdiommu_qi_invalidate_all_pages_locked_nowait(domain); + amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(CTX2AMD(ctx))); + + if (unit->irte_enabled) + amdiommu_ctx_fini_irte(ctx); + + amdiommu_ctx_unlink(ctx); + free(ctx->context.tag, M_AMDIOMMU_CTX); + free(ctx, M_AMDIOMMU_CTX); + amdiommu_unref_domain_locked(unit, domain); +} + +static void +amdiommu_free_ctx(struct amdiommu_ctx *ctx) +{ + struct amdiommu_unit *unit; + + unit = CTX2AMD(ctx); + AMDIOMMU_LOCK(unit); + amdiommu_free_ctx_locked(unit, ctx); +} + +static void +amdiommu_unref_domain_locked(struct amdiommu_unit *unit, + struct amdiommu_domain *domain) +{ + AMDIOMMU_ASSERT_LOCKED(unit); + KASSERT(domain->refs >= 1, + ("amdiommu%d domain %p refs %u", unit->iommu.unit, domain, + domain->refs)); + KASSERT(domain->refs > domain->ctx_cnt, + ("amdiommu%d domain %p refs %d ctx_cnt %d", unit->iommu.unit, + domain, domain->refs, domain->ctx_cnt)); + + if (domain->refs > 1) { + domain->refs--; + AMDIOMMU_UNLOCK(unit); + return; + } + + LIST_REMOVE(domain, link); + AMDIOMMU_UNLOCK(unit); + + taskqueue_drain(unit->iommu.delayed_taskqueue, + &domain->iodom.unload_task); + amdiommu_domain_destroy(domain); +} + +static void +dte_entry_init_one(struct amdiommu_dte *dtep, struct amdiommu_ctx *ctx, + vm_page_t pgtblr, uint8_t dte, uint32_t edte) +{ + struct amdiommu_domain *domain; + struct amdiommu_unit *unit; + + domain = CTX2DOM(ctx); + unit = DOM2AMD(domain); + + dtep->tv = 1; + /* dtep->had not used for now */ + dtep->ir = 1; + dtep->iw = 1; + dtep->domainid = domain->domain; + dtep->pioctl = AMDIOMMU_DTE_PIOCTL_DIS; + + /* fill device interrupt passing hints from IVHD. */ + dtep->initpass = (dte & ACPI_IVHD_INIT_PASS) != 0; + dtep->eintpass = (dte & ACPI_IVHD_EINT_PASS) != 0; + dtep->nmipass = (dte & ACPI_IVHD_NMI_PASS) != 0; + dtep->sysmgt = (dte & ACPI_IVHD_SYSTEM_MGMT) >> 4; + dtep->lint0pass = (dte & ACPI_IVHD_LINT0_PASS) != 0; + dtep->lint1pass = (dte & ACPI_IVHD_LINT1_PASS) != 0; + + if (unit->irte_enabled) { + dtep->iv = 1; + dtep->i = 0; + dtep->inttablen = ilog2(unit->irte_nentries); + dtep->intrroot = pmap_kextract(unit->irte_x2apic ? + (vm_offset_t)ctx->irtx2 : + (vm_offset_t)ctx->irtb) >> 6; + + dtep->intctl = AMDIOMMU_DTE_INTCTL_MAP; + } + + if ((DOM2IODOM(domain)->flags & IOMMU_DOMAIN_IDMAP) != 0) { + dtep->pgmode = AMDIOMMU_DTE_PGMODE_1T1; + } else { + MPASS(domain->pglvl > 0 && domain->pglvl <= + AMDIOMMU_PGTBL_MAXLVL); + dtep->pgmode = domain->pglvl; + dtep->ptroot = VM_PAGE_TO_PHYS(pgtblr) >> 12; + } + + atomic_thread_fence_rel(); + dtep->v = 1; +} + +static void +dte_entry_init(struct amdiommu_ctx *ctx, bool move, uint8_t dte, uint32_t edte) +{ + struct amdiommu_dte *dtep; + struct amdiommu_unit *unit; + struct amdiommu_domain *domain; + int i; + + domain = CTX2DOM(ctx); + unit = DOM2AMD(domain); + + dtep = amdiommu_get_dtep(ctx); + KASSERT(dtep->v == 0, + ("amdiommu%d initializing valid dte @%p %#jx", + CTX2AMD(ctx)->iommu.unit, dtep, (uintmax_t)(*(uint64_t *)dtep))); + + if (iommu_is_buswide_ctx(AMD2IOMMU(unit), + PCI_RID2BUS(ctx->context.rid))) { + MPASS(!move); + for (i = 0; i <= PCI_BUSMAX; i++) { + dte_entry_init_one(&dtep[i], ctx, domain->pgtblr, + dte, edte); + } + } else { + dte_entry_init_one(dtep, ctx, domain->pgtblr, dte, edte); + } +} + +struct amdiommu_ctx * +amdiommu_get_ctx_for_dev(struct amdiommu_unit *unit, device_t dev, uint16_t rid, + int dev_domain, bool id_mapped, bool rmrr_init, uint8_t dte, uint32_t edte) +{ + struct amdiommu_domain *domain, *domain1; + struct amdiommu_ctx *ctx, *ctx1; + int bus, slot, func; + + if (dev != NULL) { + bus = pci_get_bus(dev); + slot = pci_get_slot(dev); + func = pci_get_function(dev); + } else { + bus = PCI_RID2BUS(rid); + slot = PCI_RID2SLOT(rid); + func = PCI_RID2FUNC(rid); + } + AMDIOMMU_LOCK(unit); + KASSERT(!iommu_is_buswide_ctx(AMD2IOMMU(unit), bus) || + (slot == 0 && func == 0), + ("iommu%d pci%d:%d:%d get_ctx for buswide", AMD2IOMMU(unit)->unit, + bus, slot, func)); + ctx = amdiommu_find_ctx_locked(unit, rid); + if (ctx == NULL) { + /* + * Perform the allocations which require sleep or have + * higher chance to succeed if the sleep is allowed. + */ + AMDIOMMU_UNLOCK(unit); + domain1 = amdiommu_domain_alloc(unit, id_mapped); + if (domain1 == NULL) + return (NULL); + if (!id_mapped) { + /* + * XXXKIB IVMD seems to be less significant + * and less used on AMD than RMRR on Intel. + * Not implemented for now. + */ + } + ctx1 = amdiommu_ctx_alloc(domain1, rid); + amdiommu_ctx_init_irte(ctx1); + AMDIOMMU_LOCK(unit); + + /* + * Recheck the contexts, other thread might have + * already allocated needed one. + */ + ctx = amdiommu_find_ctx_locked(unit, rid); + if (ctx == NULL) { + domain = domain1; + ctx = ctx1; + amdiommu_ctx_link(ctx); + ctx->context.tag->owner = dev; + iommu_device_tag_init(CTX2IOCTX(ctx), dev); + + LIST_INSERT_HEAD(&unit->domains, domain, link); + dte_entry_init(ctx, false, dte, edte); + amdiommu_qi_invalidate_ctx_locked(ctx); + if (dev != NULL) { + device_printf(dev, + "amdiommu%d pci%d:%d:%d:%d rid %x domain %d " *** 2653 LINES SKIPPED ***