git: ad794e6d7d02 - main - x86 iommu: move DMAR-independent parts of the qi code into common
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 04 Sep 2024 21:50:41 UTC
The branch main has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=ad794e6d7d02a11b01e721859e096efeb258a4d4 commit ad794e6d7d02a11b01e721859e096efeb258a4d4 Author: Konstantin Belousov <kib@FreeBSD.org> AuthorDate: 2024-06-06 01:16:36 +0000 Commit: Konstantin Belousov <kib@FreeBSD.org> CommitDate: 2024-09-04 21:50:18 +0000 x86 iommu: move DMAR-independent parts of the qi code into common Sponsored by: Advanced Micro Devices (AMD) Sponsored by: The FreeBSD Foundation MFC after: 1 week --- sys/x86/iommu/intel_ctx.c | 7 +- sys/x86/iommu/intel_dmar.h | 42 +----- sys/x86/iommu/intel_drv.c | 26 ++-- sys/x86/iommu/intel_qi.c | 330 ++++++++++++-------------------------------- sys/x86/iommu/iommu_utils.c | 234 ++++++++++++++++++++++++++++++- sys/x86/iommu/x86_iommu.h | 72 ++++++++++ 6 files changed, 413 insertions(+), 298 deletions(-) diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c index a3ff35dc527e..03ef196c4cb0 100644 --- a/sys/x86/iommu/intel_ctx.c +++ b/sys/x86/iommu/intel_ctx.c @@ -887,10 +887,11 @@ dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free, if (unit->qi_enabled) { if (free) { DMAR_LOCK(unit); - dmar_qi_invalidate_locked(domain, entry, true); + iommu_qi_invalidate_locked(&domain->iodom, entry, + true); DMAR_UNLOCK(unit); } else { - dmar_qi_invalidate_sync(domain, entry->start, + iommu_qi_invalidate_sync(&domain->iodom, entry->start, entry->end - entry->start, cansleep); dmar_domain_free_entry(entry, false); } @@ -943,7 +944,7 @@ dmar_domain_unload(struct iommu_domain *iodom, DMAR_LOCK(unit); while ((entry = TAILQ_FIRST(entries)) != NULL) { TAILQ_REMOVE(entries, entry, dmamap_link); - dmar_qi_invalidate_locked(domain, entry, + iommu_qi_invalidate_locked(&domain->iodom, entry, dmar_domain_unload_emit_wait(domain, entry)); } DMAR_UNLOCK(unit); diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h index 0ede955e12b9..8a815d5cfca6 100644 --- a/sys/x86/iommu/intel_dmar.h +++ b/sys/x86/iommu/intel_dmar.h @@ -123,6 +123,7 @@ struct dmar_msi_data { struct dmar_unit { struct iommu_unit iommu; + struct x86_unit_common x86c; uint16_t segment; uint64_t base; @@ -155,17 +156,6 @@ struct dmar_unit { /* QI */ int qi_enabled; - char *inv_queue; - vm_size_t inv_queue_size; - uint32_t inv_queue_avail; - uint32_t inv_queue_tail; - volatile uint32_t inv_waitd_seq_hw; /* hw writes there on wait - descr completion */ - uint64_t inv_waitd_seq_hw_phys; - uint32_t inv_waitd_seq; /* next sequence number to use for wait descr */ - u_int inv_waitd_gen; /* seq number generation AKA seq overflows */ - u_int inv_seq_waiters; /* count of waiters for seq */ - u_int inv_queue_full; /* informational counter */ /* IR */ int ir_enabled; @@ -173,36 +163,6 @@ struct dmar_unit { dmar_irte_t *irt; u_int irte_cnt; vmem_t *irtids; - - /* - * Delayed freeing of map entries queue processing: - * - * tlb_flush_head and tlb_flush_tail are used to implement a FIFO - * queue that supports concurrent dequeues and enqueues. However, - * there can only be a single dequeuer (accessing tlb_flush_head) and - * a single enqueuer (accessing tlb_flush_tail) at a time. Since the - * unit's qi_task is the only dequeuer, it can access tlb_flush_head - * without any locking. In contrast, there may be multiple enqueuers, - * so the enqueuers acquire the iommu unit lock to serialize their - * accesses to tlb_flush_tail. - * - * In this FIFO queue implementation, the key to enabling concurrent - * dequeues and enqueues is that the dequeuer never needs to access - * tlb_flush_tail and the enqueuer never needs to access - * tlb_flush_head. In particular, tlb_flush_head and tlb_flush_tail - * are never NULL, so neither a dequeuer nor an enqueuer ever needs to - * update both. Instead, tlb_flush_head always points to a "zombie" - * struct, which previously held the last dequeued item. Thus, the - * zombie's next field actually points to the struct holding the first - * item in the queue. When an item is dequeued, the current zombie is - * finally freed, and the struct that held the just dequeued item - * becomes the new zombie. When the queue is empty, tlb_flush_tail - * also points to the zombie. - */ - struct iommu_map_entry *tlb_flush_head; - struct iommu_map_entry *tlb_flush_tail; - struct task qi_task; - struct taskqueue *qi_taskqueue; }; #define DMAR_LOCK(dmar) mtx_lock(&DMAR2IOMMU(dmar)->lock) diff --git a/sys/x86/iommu/intel_drv.c b/sys/x86/iommu/intel_drv.c index 79350358cced..9fa1b3f98dc6 100644 --- a/sys/x86/iommu/intel_drv.c +++ b/sys/x86/iommu/intel_drv.c @@ -1303,19 +1303,19 @@ dmar_print_one(int idx, bool show_domains, bool show_mappings) "size 0x%jx\n" " head 0x%x tail 0x%x avail 0x%x status 0x%x ctrl 0x%x\n" " hw compl 0x%x@%p/phys@%jx next seq 0x%x gen 0x%x\n", - (uintmax_t)unit->inv_queue, + (uintmax_t)unit->x86c.inv_queue, (uintmax_t)dmar_read8(unit, DMAR_IQA_REG), - (uintmax_t)unit->inv_queue_size, + (uintmax_t)unit->x86c.inv_queue_size, dmar_read4(unit, DMAR_IQH_REG), dmar_read4(unit, DMAR_IQT_REG), - unit->inv_queue_avail, + unit->x86c.inv_queue_avail, dmar_read4(unit, DMAR_ICS_REG), dmar_read4(unit, DMAR_IECTL_REG), - unit->inv_waitd_seq_hw, - &unit->inv_waitd_seq_hw, - (uintmax_t)unit->inv_waitd_seq_hw_phys, - unit->inv_waitd_seq, - unit->inv_waitd_gen); + unit->x86c.inv_waitd_seq_hw, + &unit->x86c.inv_waitd_seq_hw, + (uintmax_t)unit->x86c.inv_waitd_seq_hw_phys, + unit->x86c.inv_waitd_seq, + unit->x86c.inv_waitd_gen); } else { db_printf("qi is disabled\n"); } @@ -1368,7 +1368,17 @@ dmar_find_method(device_t dev, bool verbose) return (&dmar->iommu); } +static struct x86_unit_common * +dmar_get_x86_common(struct iommu_unit *unit) +{ + struct dmar_unit *dmar; + + dmar = IOMMU2DMAR(unit); + return (&dmar->x86c); +} + static struct x86_iommu dmar_x86_iommu = { + .get_x86_common = dmar_get_x86_common, .domain_unload_entry = dmar_domain_unload_entry, .domain_unload = dmar_domain_unload, .get_ctx = dmar_get_ctx, diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c index 590cbac9bcbd..a94fbb54e7f7 100644 --- a/sys/x86/iommu/intel_qi.c +++ b/sys/x86/iommu/intel_qi.c @@ -58,17 +58,6 @@ #include <x86/iommu/x86_iommu.h> #include <x86/iommu/intel_dmar.h> -static bool -dmar_qi_seq_processed(const struct dmar_unit *unit, - const struct iommu_qi_genseq *pseq) -{ - u_int gen; - - gen = unit->inv_waitd_gen; - return (pseq->gen < gen || - (pseq->gen == gen && pseq->seq <= unit->inv_waitd_seq_hw)); -} - static int dmar_enable_qi(struct dmar_unit *unit) { @@ -96,32 +85,36 @@ dmar_disable_qi(struct dmar_unit *unit) } static void -dmar_qi_advance_tail(struct dmar_unit *unit) +dmar_qi_advance_tail(struct iommu_unit *iommu) { + struct dmar_unit *unit; + unit = IOMMU2DMAR(iommu); DMAR_ASSERT_LOCKED(unit); - dmar_write4(unit, DMAR_IQT_REG, unit->inv_queue_tail); + dmar_write4(unit, DMAR_IQT_REG, unit->x86c.inv_queue_tail); } static void -dmar_qi_ensure(struct dmar_unit *unit, int descr_count) +dmar_qi_ensure(struct iommu_unit *iommu, int descr_count) { + struct dmar_unit *unit; uint32_t head; int bytes; + unit = IOMMU2DMAR(iommu); DMAR_ASSERT_LOCKED(unit); bytes = descr_count << DMAR_IQ_DESCR_SZ_SHIFT; for (;;) { - if (bytes <= unit->inv_queue_avail) + if (bytes <= unit->x86c.inv_queue_avail) break; /* refill */ head = dmar_read4(unit, DMAR_IQH_REG); head &= DMAR_IQH_MASK; - unit->inv_queue_avail = head - unit->inv_queue_tail - + unit->x86c.inv_queue_avail = head - unit->x86c.inv_queue_tail - DMAR_IQ_DESCR_SZ; - if (head <= unit->inv_queue_tail) - unit->inv_queue_avail += unit->inv_queue_size; - if (bytes <= unit->inv_queue_avail) + if (head <= unit->x86c.inv_queue_tail) + unit->x86c.inv_queue_avail += unit->x86c.inv_queue_size; + if (bytes <= unit->x86c.inv_queue_avail) break; /* @@ -134,11 +127,11 @@ dmar_qi_ensure(struct dmar_unit *unit, int descr_count) * See dmar_qi_invalidate_locked() for a discussion * about data race prevention. */ - dmar_qi_advance_tail(unit); - unit->inv_queue_full++; + dmar_qi_advance_tail(DMAR2IOMMU(unit)); + unit->x86c.inv_queue_full++; cpu_spinwait(); } - unit->inv_queue_avail -= bytes; + unit->x86c.inv_queue_avail -= bytes; } static void @@ -146,162 +139,60 @@ dmar_qi_emit(struct dmar_unit *unit, uint64_t data1, uint64_t data2) { DMAR_ASSERT_LOCKED(unit); - *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data1; - unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2; - KASSERT(unit->inv_queue_tail <= unit->inv_queue_size, - ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail, - (uintmax_t)unit->inv_queue_size)); - unit->inv_queue_tail &= unit->inv_queue_size - 1; - *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data2; - unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2; - KASSERT(unit->inv_queue_tail <= unit->inv_queue_size, - ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail, - (uintmax_t)unit->inv_queue_size)); - unit->inv_queue_tail &= unit->inv_queue_size - 1; + *(volatile uint64_t *)(unit->x86c.inv_queue + + unit->x86c.inv_queue_tail) = data1; + unit->x86c.inv_queue_tail += DMAR_IQ_DESCR_SZ / 2; + KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size, + ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail, + (uintmax_t)unit->x86c.inv_queue_size)); + unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1; + *(volatile uint64_t *)(unit->x86c.inv_queue + + unit->x86c.inv_queue_tail) = data2; + unit->x86c.inv_queue_tail += DMAR_IQ_DESCR_SZ / 2; + KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size, + ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail, + (uintmax_t)unit->x86c.inv_queue_size)); + unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1; } static void -dmar_qi_emit_wait_descr(struct dmar_unit *unit, uint32_t seq, bool intr, +dmar_qi_emit_wait_descr(struct iommu_unit *iommu, uint32_t seq, bool intr, bool memw, bool fence) { + struct dmar_unit *unit; + unit = IOMMU2DMAR(iommu); DMAR_ASSERT_LOCKED(unit); dmar_qi_emit(unit, DMAR_IQ_DESCR_WAIT_ID | (intr ? DMAR_IQ_DESCR_WAIT_IF : 0) | (memw ? DMAR_IQ_DESCR_WAIT_SW : 0) | (fence ? DMAR_IQ_DESCR_WAIT_FN : 0) | (memw ? DMAR_IQ_DESCR_WAIT_SD(seq) : 0), - memw ? unit->inv_waitd_seq_hw_phys : 0); -} - -static void -dmar_qi_emit_wait_seq(struct dmar_unit *unit, struct iommu_qi_genseq *pseq, - bool emit_wait) -{ - struct iommu_qi_genseq gsec; - uint32_t seq; - - KASSERT(pseq != NULL, ("wait descriptor with no place for seq")); - DMAR_ASSERT_LOCKED(unit); - if (unit->inv_waitd_seq == 0xffffffff) { - gsec.gen = unit->inv_waitd_gen; - gsec.seq = unit->inv_waitd_seq; - dmar_qi_ensure(unit, 1); - dmar_qi_emit_wait_descr(unit, gsec.seq, false, true, false); - dmar_qi_advance_tail(unit); - while (!dmar_qi_seq_processed(unit, &gsec)) - cpu_spinwait(); - unit->inv_waitd_gen++; - unit->inv_waitd_seq = 1; - } - seq = unit->inv_waitd_seq++; - pseq->gen = unit->inv_waitd_gen; - pseq->seq = seq; - if (emit_wait) { - dmar_qi_ensure(unit, 1); - dmar_qi_emit_wait_descr(unit, seq, true, true, false); - } + memw ? unit->x86c.inv_waitd_seq_hw_phys : 0); } -/* - * To avoid missed wakeups, callers must increment the unit's waiters count - * before advancing the tail past the wait descriptor. - */ static void -dmar_qi_wait_for_seq(struct dmar_unit *unit, const struct iommu_qi_genseq *gseq, - bool nowait) -{ - - DMAR_ASSERT_LOCKED(unit); - KASSERT(unit->inv_seq_waiters > 0, ("%s: no waiters", __func__)); - while (!dmar_qi_seq_processed(unit, gseq)) { - if (cold || nowait) { - cpu_spinwait(); - } else { - msleep(&unit->inv_seq_waiters, &unit->iommu.lock, 0, - "dmarse", hz); - } - } - unit->inv_seq_waiters--; -} - -static void -dmar_qi_invalidate_emit(struct dmar_domain *domain, iommu_gaddr_t base, +dmar_qi_invalidate_emit(struct iommu_domain *idomain, iommu_gaddr_t base, iommu_gaddr_t size, struct iommu_qi_genseq *pseq, bool emit_wait) { struct dmar_unit *unit; + struct dmar_domain *domain; iommu_gaddr_t isize; int am; + domain = __containerof(idomain, struct dmar_domain, iodom); unit = domain->dmar; DMAR_ASSERT_LOCKED(unit); for (; size > 0; base += isize, size -= isize) { am = calc_am(unit, base, size, &isize); - dmar_qi_ensure(unit, 1); + dmar_qi_ensure(DMAR2IOMMU(unit), 1); dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_PAGE | DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR | DMAR_IQ_DESCR_IOTLB_DID(domain->domain), base | am); } - dmar_qi_emit_wait_seq(unit, pseq, emit_wait); -} - -/* - * The caller must not be using the entry's dmamap_link field. - */ -void -dmar_qi_invalidate_locked(struct dmar_domain *domain, - struct iommu_map_entry *entry, bool emit_wait) -{ - struct dmar_unit *unit; - - unit = domain->dmar; - DMAR_ASSERT_LOCKED(unit); - dmar_qi_invalidate_emit(domain, entry->start, entry->end - - entry->start, &entry->gseq, emit_wait); - - /* - * To avoid a data race in dmar_qi_task(), the entry's gseq must be - * initialized before the entry is added to the TLB flush list, and the - * entry must be added to that list before the tail is advanced. More - * precisely, the tail must not be advanced past the wait descriptor - * that will generate the interrupt that schedules dmar_qi_task() for - * execution before the entry is added to the list. While an earlier - * call to dmar_qi_ensure() might have advanced the tail, it will not - * advance it past the wait descriptor. - * - * See the definition of struct dmar_unit for more information on - * synchronization. - */ - entry->tlb_flush_next = NULL; - atomic_store_rel_ptr((uintptr_t *)&unit->tlb_flush_tail->tlb_flush_next, - (uintptr_t)entry); - unit->tlb_flush_tail = entry; - - dmar_qi_advance_tail(unit); -} - -void -dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t base, - iommu_gaddr_t size, bool cansleep) -{ - struct dmar_unit *unit; - struct iommu_qi_genseq gseq; - - unit = domain->dmar; - DMAR_LOCK(unit); - dmar_qi_invalidate_emit(domain, base, size, &gseq, true); - - /* - * To avoid a missed wakeup in dmar_qi_task(), the unit's waiters count - * must be incremented before the tail is advanced. - */ - unit->inv_seq_waiters++; - - dmar_qi_advance_tail(unit); - dmar_qi_wait_for_seq(unit, &gseq, !cansleep); - DMAR_UNLOCK(unit); + iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), pseq, emit_wait); } void @@ -310,13 +201,13 @@ dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit) struct iommu_qi_genseq gseq; DMAR_ASSERT_LOCKED(unit); - dmar_qi_ensure(unit, 2); + dmar_qi_ensure(DMAR2IOMMU(unit), 2); dmar_qi_emit(unit, DMAR_IQ_DESCR_CTX_INV | DMAR_IQ_DESCR_CTX_GLOB, 0); - dmar_qi_emit_wait_seq(unit, &gseq, true); + iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true); /* See dmar_qi_invalidate_sync(). */ - unit->inv_seq_waiters++; - dmar_qi_advance_tail(unit); - dmar_qi_wait_for_seq(unit, &gseq, false); + unit->x86c.inv_seq_waiters++; + dmar_qi_advance_tail(DMAR2IOMMU(unit)); + iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, false); } void @@ -325,14 +216,14 @@ dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit) struct iommu_qi_genseq gseq; DMAR_ASSERT_LOCKED(unit); - dmar_qi_ensure(unit, 2); + dmar_qi_ensure(DMAR2IOMMU(unit), 2); dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_GLOB | DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR, 0); - dmar_qi_emit_wait_seq(unit, &gseq, true); + iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true); /* See dmar_qi_invalidate_sync(). */ - unit->inv_seq_waiters++; - dmar_qi_advance_tail(unit); - dmar_qi_wait_for_seq(unit, &gseq, false); + unit->x86c.inv_seq_waiters++; + dmar_qi_advance_tail(DMAR2IOMMU(unit)); + iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, false); } void @@ -341,13 +232,13 @@ dmar_qi_invalidate_iec_glob(struct dmar_unit *unit) struct iommu_qi_genseq gseq; DMAR_ASSERT_LOCKED(unit); - dmar_qi_ensure(unit, 2); + dmar_qi_ensure(DMAR2IOMMU(unit), 2); dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV, 0); - dmar_qi_emit_wait_seq(unit, &gseq, true); + iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true); /* See dmar_qi_invalidate_sync(). */ - unit->inv_seq_waiters++; - dmar_qi_advance_tail(unit); - dmar_qi_wait_for_seq(unit, &gseq, false); + unit->x86c.inv_seq_waiters++; + dmar_qi_advance_tail(DMAR2IOMMU(unit)); + iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, false); } void @@ -363,21 +254,21 @@ dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt) for (; cnt > 0; cnt -= c, start += c) { l = ffs(start | cnt) - 1; c = 1 << l; - dmar_qi_ensure(unit, 1); + dmar_qi_ensure(DMAR2IOMMU(unit), 1); dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV | DMAR_IQ_DESCR_IEC_IDX | DMAR_IQ_DESCR_IEC_IIDX(start) | DMAR_IQ_DESCR_IEC_IM(l), 0); } - dmar_qi_ensure(unit, 1); - dmar_qi_emit_wait_seq(unit, &gseq, true); + dmar_qi_ensure(DMAR2IOMMU(unit), 1); + iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true); /* - * Since dmar_qi_wait_for_seq() will not sleep, this increment's + * Since iommu_qi_wait_for_seq() will not sleep, this increment's * placement relative to advancing the tail doesn't matter. */ - unit->inv_seq_waiters++; + unit->x86c.inv_seq_waiters++; - dmar_qi_advance_tail(unit); + dmar_qi_advance_tail(DMAR2IOMMU(unit)); /* * The caller of the function, in particular, @@ -394,7 +285,7 @@ dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt) * queue is processed, which includes requests possibly issued * before our request. */ - dmar_qi_wait_for_seq(unit, &gseq, true); + iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, true); } int @@ -405,38 +296,18 @@ dmar_qi_intr(void *arg) unit = arg; KASSERT(unit->qi_enabled, ("dmar%d: QI is not enabled", unit->iommu.unit)); - taskqueue_enqueue(unit->qi_taskqueue, &unit->qi_task); + taskqueue_enqueue(unit->x86c.qi_taskqueue, &unit->x86c.qi_task); return (FILTER_HANDLED); } -static void -dmar_qi_drain_tlb_flush(struct dmar_unit *unit) -{ - struct iommu_map_entry *entry, *head; - - for (head = unit->tlb_flush_head;; head = entry) { - entry = (struct iommu_map_entry *) - atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next); - if (entry == NULL || - !dmar_qi_seq_processed(unit, &entry->gseq)) - break; - unit->tlb_flush_head = entry; - iommu_gas_free_entry(head); - if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) - iommu_gas_free_region(entry); - else - iommu_gas_free_space(entry); - } -} - static void dmar_qi_task(void *arg, int pending __unused) { struct dmar_unit *unit; uint32_t ics; - unit = arg; - dmar_qi_drain_tlb_flush(unit); + unit = IOMMU2DMAR(arg); + iommu_qi_drain_tlb_flush(DMAR2IOMMU(unit)); /* * Request an interrupt on the completion of the next invalidation @@ -453,16 +324,16 @@ dmar_qi_task(void *arg, int pending __unused) * Otherwise, such entries will linger until a later entry * that requests an interrupt is processed. */ - dmar_qi_drain_tlb_flush(unit); + iommu_qi_drain_tlb_flush(DMAR2IOMMU(unit)); } - if (unit->inv_seq_waiters > 0) { + if (unit->x86c.inv_seq_waiters > 0) { /* * Acquire the DMAR lock so that wakeup() is called only after * the waiter is sleeping. */ DMAR_LOCK(unit); - wakeup(&unit->inv_seq_waiters); + wakeup(&unit->x86c.inv_seq_waiters); DMAR_UNLOCK(unit); } } @@ -472,7 +343,7 @@ dmar_init_qi(struct dmar_unit *unit) { uint64_t iqa; uint32_t ics; - int qi_sz; + u_int qi_sz; if (!DMAR_HAS_QI(unit) || (unit->hw_cap & DMAR_CAP_CM) != 0) return (0); @@ -481,34 +352,19 @@ dmar_init_qi(struct dmar_unit *unit) if (!unit->qi_enabled) return (0); - unit->tlb_flush_head = unit->tlb_flush_tail = - iommu_gas_alloc_entry(NULL, 0); - TASK_INIT(&unit->qi_task, 0, dmar_qi_task, unit); - unit->qi_taskqueue = taskqueue_create_fast("dmarqf", M_WAITOK, - taskqueue_thread_enqueue, &unit->qi_taskqueue); - taskqueue_start_threads(&unit->qi_taskqueue, 1, PI_AV, - "dmar%d qi taskq", unit->iommu.unit); - - unit->inv_waitd_gen = 0; - unit->inv_waitd_seq = 1; - - qi_sz = DMAR_IQA_QS_DEF; - TUNABLE_INT_FETCH("hw.dmar.qi_size", &qi_sz); - if (qi_sz > DMAR_IQA_QS_MAX) - qi_sz = DMAR_IQA_QS_MAX; - unit->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE; - /* Reserve one descriptor to prevent wraparound. */ - unit->inv_queue_avail = unit->inv_queue_size - DMAR_IQ_DESCR_SZ; - - /* The invalidation queue reads by DMARs are always coherent. */ - unit->inv_queue = kmem_alloc_contig(unit->inv_queue_size, M_WAITOK | - M_ZERO, 0, iommu_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); - unit->inv_waitd_seq_hw_phys = pmap_kextract( - (vm_offset_t)&unit->inv_waitd_seq_hw); + unit->x86c.qi_buf_maxsz = DMAR_IQA_QS_MAX; + unit->x86c.qi_cmd_sz = DMAR_IQ_DESCR_SZ; + iommu_qi_common_init(DMAR2IOMMU(unit), dmar_qi_task); + get_x86_iommu()->qi_ensure = dmar_qi_ensure; + get_x86_iommu()->qi_emit_wait_descr = dmar_qi_emit_wait_descr; + get_x86_iommu()->qi_advance_tail = dmar_qi_advance_tail; + get_x86_iommu()->qi_invalidate_emit = dmar_qi_invalidate_emit; + + qi_sz = ilog2(unit->x86c.inv_queue_size / PAGE_SIZE); DMAR_LOCK(unit); dmar_write8(unit, DMAR_IQT_REG, 0); - iqa = pmap_kextract((uintptr_t)unit->inv_queue); + iqa = pmap_kextract((uintptr_t)unit->x86c.inv_queue); iqa |= qi_sz; dmar_write8(unit, DMAR_IQA_REG, iqa); dmar_enable_qi(unit); @@ -523,35 +379,19 @@ dmar_init_qi(struct dmar_unit *unit) return (0); } +static void +dmar_fini_qi_helper(struct iommu_unit *iommu) +{ + dmar_disable_qi_intr(IOMMU2DMAR(iommu)); + dmar_disable_qi(IOMMU2DMAR(iommu)); +} + void dmar_fini_qi(struct dmar_unit *unit) { - struct iommu_qi_genseq gseq; - if (!unit->qi_enabled) return; - taskqueue_drain(unit->qi_taskqueue, &unit->qi_task); - taskqueue_free(unit->qi_taskqueue); - unit->qi_taskqueue = NULL; - - DMAR_LOCK(unit); - /* quisce */ - dmar_qi_ensure(unit, 1); - dmar_qi_emit_wait_seq(unit, &gseq, true); - /* See dmar_qi_invalidate_sync_locked(). */ - unit->inv_seq_waiters++; - dmar_qi_advance_tail(unit); - dmar_qi_wait_for_seq(unit, &gseq, false); - /* only after the quisce, disable queue */ - dmar_disable_qi_intr(unit); - dmar_disable_qi(unit); - KASSERT(unit->inv_seq_waiters == 0, - ("dmar%d: waiters on disabled queue", unit->iommu.unit)); - DMAR_UNLOCK(unit); - - kmem_free(unit->inv_queue, unit->inv_queue_size); - unit->inv_queue = NULL; - unit->inv_queue_size = 0; + iommu_qi_common_fini(DMAR2IOMMU(unit), dmar_fini_qi_helper); unit->qi_enabled = 0; } diff --git a/sys/x86/iommu/iommu_utils.c b/sys/x86/iommu/iommu_utils.c index ea2c0358e072..571e5a2e65cd 100644 --- a/sys/x86/iommu/iommu_utils.c +++ b/sys/x86/iommu/iommu_utils.c @@ -29,7 +29,9 @@ */ #include <sys/systm.h> +#include <sys/kernel.h> #include <sys/lock.h> +#include <sys/malloc.h> #include <sys/memdesc.h> #include <sys/mutex.h> #include <sys/sf_buf.h> @@ -40,8 +42,11 @@ #include <sys/taskqueue.h> #include <sys/tree.h> #include <vm/vm.h> -#include <vm/vm_page.h> +#include <vm/vm_extern.h> +#include <vm/vm_kern.h> +#include <vm/vm_map.h> #include <vm/vm_object.h> +#include <vm/vm_page.h> #include <dev/pci/pcireg.h> #include <machine/atomic.h> #include <machine/bus.h> @@ -251,3 +256,230 @@ iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie) { return (x86_iommu->unmap_ioapic_intr(ioapic_id, cookie)); } + +#define IOMMU2X86C(iommu) (x86_iommu->get_x86_common(iommu)) + +static bool +iommu_qi_seq_processed(struct iommu_unit *unit, + const struct iommu_qi_genseq *pseq) +{ + struct x86_unit_common *x86c; + u_int gen; + + x86c = IOMMU2X86C(unit); + gen = x86c->inv_waitd_gen; + return (pseq->gen < gen || + (pseq->gen == gen && pseq->seq <= x86c->inv_waitd_seq_hw)); +} + +void +iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq *pseq, + bool emit_wait) +{ + struct x86_unit_common *x86c; + struct iommu_qi_genseq gsec; + uint32_t seq; + + KASSERT(pseq != NULL, ("wait descriptor with no place for seq")); + IOMMU_ASSERT_LOCKED(unit); + x86c = IOMMU2X86C(unit); + + if (x86c->inv_waitd_seq == 0xffffffff) { + gsec.gen = x86c->inv_waitd_gen; + gsec.seq = x86c->inv_waitd_seq; + x86_iommu->qi_ensure(unit, 1); + x86_iommu->qi_emit_wait_descr(unit, gsec.seq, false, + true, false); + x86_iommu->qi_advance_tail(unit); + while (!iommu_qi_seq_processed(unit, &gsec)) + cpu_spinwait(); + x86c->inv_waitd_gen++; + x86c->inv_waitd_seq = 1; + } + seq = x86c->inv_waitd_seq++; + pseq->gen = x86c->inv_waitd_gen; + pseq->seq = seq; + if (emit_wait) { + x86_iommu->qi_ensure(unit, 1); + x86_iommu->qi_emit_wait_descr(unit, seq, true, true, false); + } +} + +/* + * To avoid missed wakeups, callers must increment the unit's waiters count + * before advancing the tail past the wait descriptor. + */ +void +iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct iommu_qi_genseq * + gseq, bool nowait) +{ + struct x86_unit_common *x86c; + + IOMMU_ASSERT_LOCKED(unit); + x86c = IOMMU2X86C(unit); + + KASSERT(x86c->inv_seq_waiters > 0, ("%s: no waiters", __func__)); + while (!iommu_qi_seq_processed(unit, gseq)) { + if (cold || nowait) { + cpu_spinwait(); + } else { + msleep(&x86c->inv_seq_waiters, &unit->lock, 0, + "dmarse", hz); + } + } + x86c->inv_seq_waiters--; +} + +/* + * The caller must not be using the entry's dmamap_link field. + */ +void +iommu_qi_invalidate_locked(struct iommu_domain *domain, + struct iommu_map_entry *entry, bool emit_wait) +{ + struct iommu_unit *unit; + struct x86_unit_common *x86c; + + unit = domain->iommu; + x86c = IOMMU2X86C(unit); + IOMMU_ASSERT_LOCKED(unit); + + x86_iommu->qi_invalidate_emit(domain, entry->start, entry->end - + entry->start, &entry->gseq, emit_wait); + + /* + * To avoid a data race in dmar_qi_task(), the entry's gseq must be + * initialized before the entry is added to the TLB flush list, and the + * entry must be added to that list before the tail is advanced. More + * precisely, the tail must not be advanced past the wait descriptor + * that will generate the interrupt that schedules dmar_qi_task() for + * execution before the entry is added to the list. While an earlier + * call to dmar_qi_ensure() might have advanced the tail, it will not + * advance it past the wait descriptor. + * + * See the definition of struct dmar_unit for more information on + * synchronization. + */ + entry->tlb_flush_next = NULL; + atomic_store_rel_ptr((uintptr_t *)&x86c->tlb_flush_tail-> + tlb_flush_next, (uintptr_t)entry); + x86c->tlb_flush_tail = entry; + + x86_iommu->qi_advance_tail(unit); +} + +void +iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base, + iommu_gaddr_t size, bool cansleep) +{ + struct iommu_unit *unit; + struct iommu_qi_genseq gseq; + + unit = domain->iommu; + IOMMU_LOCK(unit); + x86_iommu->qi_invalidate_emit(domain, base, size, &gseq, true); + + /* + * To avoid a missed wakeup in iommu_qi_task(), the unit's + * waiters count must be incremented before the tail is + * advanced. + */ + IOMMU2X86C(unit)->inv_seq_waiters++; + + x86_iommu->qi_advance_tail(unit); + iommu_qi_wait_for_seq(unit, &gseq, !cansleep); + IOMMU_UNLOCK(unit); +} + +void +iommu_qi_drain_tlb_flush(struct iommu_unit *unit) +{ + struct x86_unit_common *x86c; + struct iommu_map_entry *entry, *head; + + x86c = IOMMU2X86C(unit); + for (head = x86c->tlb_flush_head;; head = entry) { + entry = (struct iommu_map_entry *) + atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next); + if (entry == NULL || + !iommu_qi_seq_processed(unit, &entry->gseq)) + break; + x86c->tlb_flush_head = entry; + iommu_gas_free_entry(head); + if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) + iommu_gas_free_region(entry); + else + iommu_gas_free_space(entry); + } +} + +void +iommu_qi_common_init(struct iommu_unit *unit, task_fn_t qi_task) +{ + struct x86_unit_common *x86c; + u_int qi_sz; + + x86c = IOMMU2X86C(unit); + + x86c->tlb_flush_head = x86c->tlb_flush_tail = + iommu_gas_alloc_entry(NULL, 0); + TASK_INIT(&x86c->qi_task, 0, qi_task, unit); + x86c->qi_taskqueue = taskqueue_create_fast("iommuqf", M_WAITOK, + taskqueue_thread_enqueue, &x86c->qi_taskqueue); + taskqueue_start_threads(&x86c->qi_taskqueue, 1, PI_AV, + "iommu%d qi taskq", unit->unit); + + x86c->inv_waitd_gen = 0; + x86c->inv_waitd_seq = 1; + + qi_sz = 3; + TUNABLE_INT_FETCH("hw.iommu.qi_size", &qi_sz); + if (qi_sz > x86c->qi_buf_maxsz) + qi_sz = x86c->qi_buf_maxsz; + x86c->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE; + /* Reserve one descriptor to prevent wraparound. */ + x86c->inv_queue_avail = x86c->inv_queue_size - + x86c->qi_cmd_sz; + + /* + * The invalidation queue reads by DMARs/AMDIOMMUs are always + * coherent. + */ + x86c->inv_queue = kmem_alloc_contig(x86c->inv_queue_size, + M_WAITOK | M_ZERO, 0, iommu_high, PAGE_SIZE, 0, + VM_MEMATTR_DEFAULT); + x86c->inv_waitd_seq_hw_phys = pmap_kextract( + (vm_offset_t)&x86c->inv_waitd_seq_hw); +} + +void +iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)( + struct iommu_unit *)) +{ + struct x86_unit_common *x86c; + struct iommu_qi_genseq gseq; + + x86c = IOMMU2X86C(unit); + + taskqueue_drain(x86c->qi_taskqueue, &x86c->qi_task); + taskqueue_free(x86c->qi_taskqueue); + x86c->qi_taskqueue = NULL; + + IOMMU_LOCK(unit); + /* quisce */ + x86_iommu->qi_ensure(unit, 1); + iommu_qi_emit_wait_seq(unit, &gseq, true); + /* See iommu_qi_invalidate_locked(). */ + x86c->inv_seq_waiters++; + x86_iommu->qi_advance_tail(unit); + iommu_qi_wait_for_seq(unit, &gseq, false); + /* only after the quisce, disable queue */ + disable_qi(unit); + KASSERT(x86c->inv_seq_waiters == 0, + ("iommu%d: waiters on disabled queue", unit->unit)); + IOMMU_UNLOCK(unit); + + kmem_free(x86c->inv_queue, x86c->inv_queue_size); + x86c->inv_queue = NULL; + x86c->inv_queue_size = 0; +} diff --git a/sys/x86/iommu/x86_iommu.h b/sys/x86/iommu/x86_iommu.h index 8c908964acd0..eb1bbafbeb77 100644 --- a/sys/x86/iommu/x86_iommu.h +++ b/sys/x86/iommu/x86_iommu.h @@ -59,7 +59,18 @@ extern int iommu_tbl_pagecnt; SYSCTL_DECL(_hw_iommu); SYSCTL_DECL(_hw_iommu_dmar); +struct x86_unit_common; + struct x86_iommu { + struct x86_unit_common *(*get_x86_common)(struct + iommu_unit *iommu); + void (*qi_ensure)(struct iommu_unit *unit, int descr_count); + void (*qi_emit_wait_descr)(struct iommu_unit *unit, uint32_t seq, + bool, bool, bool); + void (*qi_advance_tail)(struct iommu_unit *unit); + void (*qi_invalidate_emit)(struct iommu_domain *idomain, + iommu_gaddr_t base, iommu_gaddr_t size, struct iommu_qi_genseq * + pseq, bool emit_wait); void (*domain_unload_entry)(struct iommu_map_entry *entry, bool free, bool cansleep); void (*domain_unload)(struct iommu_domain *iodom, @@ -82,4 +93,65 @@ struct x86_iommu { void set_x86_iommu(struct x86_iommu *); struct x86_iommu *get_x86_iommu(void); +struct x86_unit_common { + uint32_t qi_buf_maxsz; + uint32_t qi_cmd_sz; + + char *inv_queue; + vm_size_t inv_queue_size; + uint32_t inv_queue_avail; + uint32_t inv_queue_tail; + volatile uint32_t inv_waitd_seq_hw; /* hw writes there on wait + descr completion */ *** 52 LINES SKIPPED ***