git: ced1133739d4 - stable/14 - bnxt_en: Thor2 Specific Doorbell related changes
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sun, 27 Apr 2025 22:06:13 UTC
The branch stable/14 has been updated by imp: URL: https://cgit.FreeBSD.org/src/commit/?id=ced1133739d405f15719e4881d458877d207f28e commit ced1133739d405f15719e4881d458877d207f28e Author: Sreekanth Reddy <sreekanth.reddy@broadcom.com> AuthorDate: 2025-04-09 05:44:22 +0000 Commit: Warner Losh <imp@FreeBSD.org> CommitDate: 2025-04-27 22:02:59 +0000 bnxt_en: Thor2 Specific Doorbell related changes Doorbell offset : For Thor controllers doorbell offset was always hardcoded to 0x10000 for PF devices where as for Thor2 controllers doorbell offset will be legacy_l2_db_size_kb value provided by firmware through hwrm_func_qcfg command. CQ Toggle & Epoch bits support : In order to handle out of order doorbell handling as part of Dropped Doorbell Recovery, HW expects two changes in the driver in data path. - First change is the epoch bit changes while updating the producer indexes of Tx. This epoch bit is toggled by the driver, each time the queue is wrapped for that specific doorbell. - The second change is to add a toggle bit pair to each ARM type doorbell. This includes the CQ_ARMALL, CQ_ARMSE, CQ_ARMENA doorbells. The toggle bit pair in context is incremented by the chip each time a new NQE completion is generated by the chip. To keep the driver in-sync, the toggle bit pair will be passed in the NQE to the host completion. This will be the toggle bit pair value that the host must use to setup the next NQE operation. The driver will pass that latest toggle bit pair value into the ARM type doorbells it generates to the chip. The doorbell clients will compare the toggle bit pair in each doorbell with the value in context. If the values match, the doorbell will be honored. If the values do not match, the doorbell will be discarded. MFC-After: 3 days Differential-Revision: https://reviews.freebsd.org/D49730 (cherry picked from commit 39c0b8b7994b0d339bffb0b17291c4a2b14cae3a) --- sys/dev/bnxt/bnxt_en/bnxt.h | 88 ++++++++++++----- sys/dev/bnxt/bnxt_en/bnxt_txrx.c | 52 +++++++++- sys/dev/bnxt/bnxt_en/if_bnxt.c | 203 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 311 insertions(+), 32 deletions(-) diff --git a/sys/dev/bnxt/bnxt_en/bnxt.h b/sys/dev/bnxt/bnxt_en/bnxt.h index eff1976a7954..0ba7b5723b91 100644 --- a/sys/dev/bnxt/bnxt_en/bnxt.h +++ b/sys/dev/bnxt/bnxt_en/bnxt.h @@ -190,32 +190,36 @@ #define BNXT_NO_MORE_WOL_FILTERS 0xFFFF #define bnxt_wol_supported(softc) (!((softc)->flags & BNXT_FLAG_VF) && \ ((softc)->flags & BNXT_FLAG_WOL_CAP )) - /* 64-bit doorbell */ -#define DBR_INDEX_MASK 0x0000000000ffffffULL -#define DBR_PI_LO_MASK 0xff000000UL -#define DBR_PI_LO_SFT 24 -#define DBR_XID_MASK 0x000fffff00000000ULL -#define DBR_XID_SFT 32 -#define DBR_PI_HI_MASK 0xf0000000000000ULL -#define DBR_PI_HI_SFT 52 -#define DBR_PATH_L2 (0x1ULL << 56) -#define DBR_VALID (0x1ULL << 58) -#define DBR_TYPE_SQ (0x0ULL << 60) -#define DBR_TYPE_RQ (0x1ULL << 60) -#define DBR_TYPE_SRQ (0x2ULL << 60) -#define DBR_TYPE_SRQ_ARM (0x3ULL << 60) -#define DBR_TYPE_CQ (0x4ULL << 60) -#define DBR_TYPE_CQ_ARMSE (0x5ULL << 60) -#define DBR_TYPE_CQ_ARMALL (0x6ULL << 60) -#define DBR_TYPE_CQ_ARMENA (0x7ULL << 60) -#define DBR_TYPE_SRQ_ARMENA (0x8ULL << 60) -#define DBR_TYPE_CQ_CUTOFF_ACK (0x9ULL << 60) -#define DBR_TYPE_NQ (0xaULL << 60) -#define DBR_TYPE_NQ_ARM (0xbULL << 60) -#define DBR_TYPE_PUSH_START (0xcULL << 60) -#define DBR_TYPE_PUSH_END (0xdULL << 60) -#define DBR_TYPE_NULL (0xfULL << 60) +#define DBR_INDEX_MASK 0x0000000000ffffffULL +#define DBR_PI_LO_MASK 0xff000000UL +#define DBR_PI_LO_SFT 24 +#define DBR_EPOCH_MASK 0x01000000UL +#define DBR_EPOCH_SFT 24 +#define DBR_TOGGLE_MASK 0x06000000UL +#define DBR_TOGGLE_SFT 25 +#define DBR_XID_MASK 0x000fffff00000000ULL +#define DBR_XID_SFT 32 +#define DBR_PI_HI_MASK 0xf0000000000000ULL +#define DBR_PI_HI_SFT 52 +#define DBR_PATH_L2 (0x1ULL << 56) +#define DBR_VALID (0x1ULL << 58) +#define DBR_TYPE_SQ (0x0ULL << 60) +#define DBR_TYPE_RQ (0x1ULL << 60) +#define DBR_TYPE_SRQ (0x2ULL << 60) +#define DBR_TYPE_SRQ_ARM (0x3ULL << 60) +#define DBR_TYPE_CQ (0x4ULL << 60) +#define DBR_TYPE_CQ_ARMSE (0x5ULL << 60) +#define DBR_TYPE_CQ_ARMALL (0x6ULL << 60) +#define DBR_TYPE_CQ_ARMENA (0x7ULL << 60) +#define DBR_TYPE_SRQ_ARMENA (0x8ULL << 60) +#define DBR_TYPE_CQ_CUTOFF_ACK (0x9ULL << 60) +#define DBR_TYPE_NQ (0xaULL << 60) +#define DBR_TYPE_NQ_ARM (0xbULL << 60) +#define DBR_TYPE_PUSH_START (0xcULL << 60) +#define DBR_TYPE_PUSH_END (0xdULL << 60) +#define DBR_TYPE_NQ_MASK (0xeULL << 60) +#define DBR_TYPE_NULL (0xfULL << 60) #define BNXT_MAX_L2_QUEUES 128 #define BNXT_ROCE_IRQ_COUNT 9 @@ -582,6 +586,8 @@ struct bnxt_grp_info { uint16_t ag_ring_id; }; +#define EPOCH_ARR_SZ 4096 + struct bnxt_ring { uint64_t paddr; vm_offset_t doorbell; @@ -592,12 +598,24 @@ struct bnxt_ring { uint16_t phys_id; uint16_t idx; struct bnxt_full_tpa_start *tpa_start; + union { + u64 db_key64; + u32 db_key32; + }; + uint32_t db_ring_mask; + uint32_t db_epoch_mask; + uint8_t db_epoch_shift; + + uint64_t epoch_arr[EPOCH_ARR_SZ]; + bool epoch_bit; + }; struct bnxt_cp_ring { struct bnxt_ring ring; struct if_irq irq; uint32_t cons; + uint32_t raw_cons; bool v_bit; /* Value of valid bit */ struct ctx_hw_stats *stats; uint32_t stats_ctx_id; @@ -605,6 +623,10 @@ struct bnxt_cp_ring { * set to the last read pidx */ uint64_t int_count; + uint8_t toggle; + uint8_t type; +#define Q_TYPE_TX 1 +#define Q_TYPE_RX 2 }; struct bnxt_full_tpa_start { @@ -1005,6 +1027,22 @@ struct bnxt_fw_health { #define BNXT_GRC_BASE_MASK 0xfffff000 #define BNXT_GRC_OFFSET_MASK 0x00000ffc + +#define NQE_CN_TYPE(type) ((type) & NQ_CN_TYPE_MASK) +#define NQE_CN_TOGGLE(type) (((type) & NQ_CN_TOGGLE_MASK) >> \ + NQ_CN_TOGGLE_SFT) + +#define DB_EPOCH(ring, idx) (((idx) & (ring)->db_epoch_mask) << \ + ((ring)->db_epoch_shift)) + +#define DB_TOGGLE(tgl) ((tgl) << DBR_TOGGLE_SFT) + +#define DB_RING_IDX_CMP(ring, idx) (((idx) & (ring)->db_ring_mask) | \ + DB_EPOCH(ring, idx)) + +#define DB_RING_IDX(ring, idx, bit) (((idx) & (ring)->db_ring_mask) | \ + ((bit) << (24))) + struct bnxt_softc { device_t dev; if_ctx_t ctx; diff --git a/sys/dev/bnxt/bnxt_en/bnxt_txrx.c b/sys/dev/bnxt/bnxt_en/bnxt_txrx.c index 733db2902a5c..8b2ff6238367 100644 --- a/sys/dev/bnxt/bnxt_en/bnxt_txrx.c +++ b/sys/dev/bnxt/bnxt_en/bnxt_txrx.c @@ -98,6 +98,7 @@ bnxt_isc_txd_encap(void *sc, if_pkt_info_t pi) uint16_t lflags; uint32_t cfa_meta; int seg = 0; + uint8_t wrap = 0; /* If we have offloads enabled, we need to use two BDs. */ if ((pi->ipi_csum_flags & (CSUM_OFFLOAD | CSUM_TSO | CSUM_IP)) || @@ -124,7 +125,18 @@ bnxt_isc_txd_encap(void *sc, if_pkt_info_t pi) if (need_hi) { flags_type |= TX_BD_LONG_TYPE_TX_BD_LONG; + /* Handle wrapping */ + if (pi->ipi_new_pidx == txr->ring_size - 1) + wrap = 1; + pi->ipi_new_pidx = RING_NEXT(txr, pi->ipi_new_pidx); + + /* Toggle epoch bit on wrap */ + if (wrap && pi->ipi_new_pidx == 0) + txr->epoch_bit = !txr->epoch_bit; + if (pi->ipi_new_pidx < EPOCH_ARR_SZ) + txr->epoch_arr[pi->ipi_new_pidx] = txr->epoch_bit; + tbdh = &((struct tx_bd_long_hi *)txr->vaddr)[pi->ipi_new_pidx]; tbdh->kid_or_ts_high_mss = htole16(pi->ipi_tso_segsz); tbdh->kid_or_ts_low_hdr_size = htole16((pi->ipi_ehdrlen + pi->ipi_ip_hlen + @@ -158,7 +170,15 @@ bnxt_isc_txd_encap(void *sc, if_pkt_info_t pi) for (; seg < pi->ipi_nsegs; seg++) { tbd->flags_type = htole16(flags_type); + + if (pi->ipi_new_pidx == txr->ring_size - 1) + wrap = 1; pi->ipi_new_pidx = RING_NEXT(txr, pi->ipi_new_pidx); + if (wrap && pi->ipi_new_pidx == 0) + txr->epoch_bit = !txr->epoch_bit; + if (pi->ipi_new_pidx < EPOCH_ARR_SZ) + txr->epoch_arr[pi->ipi_new_pidx] = txr->epoch_bit; + tbd = &((struct tx_bd_long *)txr->vaddr)[pi->ipi_new_pidx]; tbd->len = htole16(pi->ipi_segs[seg].ds_len); tbd->addr = htole64(pi->ipi_segs[seg].ds_addr); @@ -166,7 +186,13 @@ bnxt_isc_txd_encap(void *sc, if_pkt_info_t pi) } flags_type |= TX_BD_SHORT_FLAGS_PACKET_END; tbd->flags_type = htole16(flags_type); + if (pi->ipi_new_pidx == txr->ring_size - 1) + wrap = 1; pi->ipi_new_pidx = RING_NEXT(txr, pi->ipi_new_pidx); + if (wrap && pi->ipi_new_pidx == 0) + txr->epoch_bit = !txr->epoch_bit; + if (pi->ipi_new_pidx < EPOCH_ARR_SZ) + txr->epoch_arr[pi->ipi_new_pidx] = txr->epoch_bit; return 0; } @@ -190,16 +216,21 @@ bnxt_isc_txd_credits_update(void *sc, uint16_t txqid, bool clear) struct tx_cmpl *cmpl = (struct tx_cmpl *)cpr->ring.vaddr; int avail = 0; uint32_t cons = cpr->cons; + uint32_t raw_cons = cpr->raw_cons; bool v_bit = cpr->v_bit; bool last_v_bit; uint32_t last_cons; + uint32_t last_raw_cons; uint16_t type; uint16_t err; for (;;) { last_cons = cons; + last_raw_cons = raw_cons; last_v_bit = v_bit; + NEXT_CP_CONS_V(&cpr->ring, cons, v_bit); + raw_cons++; CMPL_PREFETCH_NEXT(cpr, cons); if (!CMP_VALID(&cmpl[cons], v_bit)) @@ -227,8 +258,10 @@ bnxt_isc_txd_credits_update(void *sc, uint16_t txqid, bool clear) default: if (type & 1) { NEXT_CP_CONS_V(&cpr->ring, cons, v_bit); - if (!CMP_VALID(&cmpl[cons], v_bit)) + raw_cons++; + if (!CMP_VALID(&cmpl[cons], v_bit)) { goto done; + } } device_printf(softc->dev, "Unhandled TX completion type %u\n", type); @@ -239,6 +272,7 @@ done: if (clear && avail) { cpr->cons = last_cons; + cpr->raw_cons = last_raw_cons; cpr->v_bit = last_v_bit; softc->db_ops.bnxt_db_tx_cq(cpr, 0); } @@ -285,9 +319,16 @@ bnxt_isc_rxd_refill(void *sc, if_rxd_update_t iru) rxbd[pidx].opaque = (((rxqid & 0xff) << 24) | (flid << 16) | (frag_idxs[i])); rxbd[pidx].addr = htole64(paddrs[i]); - if (++pidx == rx_ring->ring_size) + + /* Increment pidx and handle wrap-around */ + if (++pidx == rx_ring->ring_size) { pidx = 0; + rx_ring->epoch_bit = !rx_ring->epoch_bit; + } + if (pidx < EPOCH_ARR_SZ) + rx_ring->epoch_arr[pidx] = rx_ring->epoch_bit; } + return; } @@ -472,6 +513,7 @@ bnxt_pkt_get_l2(struct bnxt_softc *softc, if_rxd_info_t ri, /* Now the second 16-byte BD */ NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit); + cpr->raw_cons++; ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx); rcph = &((struct rx_pkt_cmpl_hi *)cpr->ring.vaddr)[cpr->cons]; @@ -503,6 +545,7 @@ bnxt_pkt_get_l2(struct bnxt_softc *softc, if_rxd_info_t ri, /* And finally the ag ring stuff. */ for (i=1; i < ri->iri_nfrags; i++) { NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit); + cpr->raw_cons++; ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx); acp = &((struct rx_abuf_cmpl *)cpr->ring.vaddr)[cpr->cons]; @@ -553,6 +596,7 @@ bnxt_pkt_get_tpa(struct bnxt_softc *softc, if_rxd_info_t ri, /* Now the second 16-byte BD */ NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit); + cpr->raw_cons++; ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx); flags2 = le32toh(tpas->high.flags2); @@ -578,6 +622,7 @@ bnxt_pkt_get_tpa(struct bnxt_softc *softc, if_rxd_info_t ri, /* Now the ag ring stuff. */ for (i=1; i < ri->iri_nfrags; i++) { NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit); + cpr->raw_cons++; ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx); acp = &((struct rx_abuf_cmpl *)cpr->ring.vaddr)[cpr->cons]; @@ -614,6 +659,7 @@ bnxt_isc_rxd_pkt_get(void *sc, if_rxd_info_t ri) for (;;) { NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit); + cpr->raw_cons++; ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx); CMPL_PREFETCH_NEXT(cpr, cpr->cons); cmp = &((struct cmpl_base *)cpr->ring.vaddr)[cpr->cons]; @@ -636,6 +682,7 @@ bnxt_isc_rxd_pkt_get(void *sc, if_rxd_info_t ri) softc->rx_rings[ri->iri_qsidx].tpa_start[agg_id].low = *rtpa; NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit); + cpr->raw_cons++; ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx); CMPL_PREFETCH_NEXT(cpr, cpr->cons); @@ -649,6 +696,7 @@ bnxt_isc_rxd_pkt_get(void *sc, if_rxd_info_t ri) if (type & 1) { NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit); + cpr->raw_cons++; ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx); CMPL_PREFETCH_NEXT(cpr, cpr->cons); diff --git a/sys/dev/bnxt/bnxt_en/if_bnxt.c b/sys/dev/bnxt/bnxt_en/if_bnxt.c index e00f59fd390e..8960866fcf90 100644 --- a/sys/dev/bnxt/bnxt_en/if_bnxt.c +++ b/sys/dev/bnxt/bnxt_en/if_bnxt.c @@ -428,6 +428,18 @@ bnxt_nq_free(struct bnxt_softc *softc) softc->nq_rings = NULL; } + +static void +bnxt_set_db_mask(struct bnxt_softc *bp, struct bnxt_ring *db, + u32 ring_type) +{ + if (BNXT_CHIP_P7(bp)) { + db->db_epoch_mask = db->db_ring_mask + 1; + db->db_epoch_shift = DBR_EPOCH_SFT - ilog2(db->db_epoch_mask); + + } +} + /* * Device Dependent Configuration Functions */ @@ -492,6 +504,8 @@ bnxt_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, softc->legacy_db_size: softc->tx_cp_rings[i].ring.id * 0x80; softc->tx_cp_rings[i].ring.ring_size = softc->scctx->isc_ntxd[0]; + softc->tx_cp_rings[i].ring.db_ring_mask = + softc->tx_cp_rings[i].ring.ring_size - 1; softc->tx_cp_rings[i].ring.vaddr = vaddrs[i * ntxqs]; softc->tx_cp_rings[i].ring.paddr = paddrs[i * ntxqs]; @@ -505,6 +519,7 @@ bnxt_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, softc->tx_rings[i].doorbell = (BNXT_CHIP_P5_PLUS(softc)) ? softc->legacy_db_size : softc->tx_rings[i].id * 0x80; softc->tx_rings[i].ring_size = softc->scctx->isc_ntxd[1]; + softc->tx_rings[i].db_ring_mask = softc->tx_rings[i].ring_size - 1; softc->tx_rings[i].vaddr = vaddrs[i * ntxqs + 1]; softc->tx_rings[i].paddr = paddrs[i * ntxqs + 1]; @@ -521,8 +536,10 @@ bnxt_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, softc->nq_rings[i].ring.doorbell = (BNXT_CHIP_P5_PLUS(softc)) ? softc->legacy_db_size : softc->nq_rings[i].ring.id * 0x80; softc->nq_rings[i].ring.ring_size = softc->scctx->isc_ntxd[2]; + softc->nq_rings[i].ring.db_ring_mask = softc->nq_rings[i].ring.ring_size - 1; softc->nq_rings[i].ring.vaddr = vaddrs[i * ntxqs + 2]; softc->nq_rings[i].ring.paddr = paddrs[i * ntxqs + 2]; + softc->nq_rings[i].type = Q_TYPE_TX; } } @@ -684,6 +701,8 @@ bnxt_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, */ softc->rx_cp_rings[i].ring.ring_size = softc->scctx->isc_nrxd[0]; + softc->rx_cp_rings[i].ring.db_ring_mask = + softc->rx_cp_rings[i].ring.ring_size - 1; softc->rx_cp_rings[i].ring.vaddr = vaddrs[i * nrxqs]; softc->rx_cp_rings[i].ring.paddr = paddrs[i * nrxqs]; @@ -696,6 +715,8 @@ bnxt_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, softc->rx_rings[i].doorbell = (BNXT_CHIP_P5_PLUS(softc)) ? softc->legacy_db_size : softc->rx_rings[i].id * 0x80; softc->rx_rings[i].ring_size = softc->scctx->isc_nrxd[1]; + softc->rx_rings[i].db_ring_mask = + softc->rx_rings[i].ring_size -1; softc->rx_rings[i].vaddr = vaddrs[i * nrxqs + 1]; softc->rx_rings[i].paddr = paddrs[i * nrxqs + 1]; @@ -717,6 +738,7 @@ bnxt_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, softc->ag_rings[i].doorbell = (BNXT_CHIP_P5_PLUS(softc)) ? softc->legacy_db_size : softc->ag_rings[i].id * 0x80; softc->ag_rings[i].ring_size = softc->scctx->isc_nrxd[2]; + softc->ag_rings[i].db_ring_mask = softc->ag_rings[i].ring_size - 1; softc->ag_rings[i].vaddr = vaddrs[i * nrxqs + 2]; softc->ag_rings[i].paddr = paddrs[i * nrxqs + 2]; @@ -1414,6 +1436,141 @@ static void bnxt_thor_db_nq(void *db_ptr, bool enable_irq) BUS_SPACE_BARRIER_WRITE); } +static void +bnxt_thor2_db_rx(void *db_ptr, uint16_t idx) +{ + struct bnxt_ring *ring = (struct bnxt_ring *) db_ptr; + struct bnxt_bar_info *db_bar = &ring->softc->doorbell_bar; + uint64_t db_val; + + if (idx >= ring->ring_size) { + device_printf(ring->softc->dev, "%s: BRCM DBG: idx: %d crossed boundary\n", __func__, idx); + return; + } + + db_val = ((DBR_PATH_L2 | DBR_TYPE_SRQ | DBR_VALID | idx) | + ((uint64_t)ring->phys_id << DBR_XID_SFT)); + + /* Add the PI index */ + db_val |= DB_RING_IDX(ring, idx, ring->epoch_arr[idx]); + + bus_space_barrier(db_bar->tag, db_bar->handle, ring->doorbell, 8, + BUS_SPACE_BARRIER_WRITE); + bus_space_write_8(db_bar->tag, db_bar->handle, ring->doorbell, + htole64(db_val)); +} + +static void +bnxt_thor2_db_tx(void *db_ptr, uint16_t idx) +{ + struct bnxt_ring *ring = (struct bnxt_ring *) db_ptr; + struct bnxt_bar_info *db_bar = &ring->softc->doorbell_bar; + uint64_t db_val; + + if (idx >= ring->ring_size) { + device_printf(ring->softc->dev, "%s: BRCM DBG: idx: %d crossed boundary\n", __func__, idx); + return; + } + + db_val = ((DBR_PATH_L2 | DBR_TYPE_SQ | DBR_VALID | idx) | + ((uint64_t)ring->phys_id << DBR_XID_SFT)); + + /* Add the PI index */ + db_val |= DB_RING_IDX(ring, idx, ring->epoch_arr[idx]); + + bus_space_barrier(db_bar->tag, db_bar->handle, ring->doorbell, 8, + BUS_SPACE_BARRIER_WRITE); + bus_space_write_8(db_bar->tag, db_bar->handle, ring->doorbell, + htole64(db_val)); +} + +static void +bnxt_thor2_db_rx_cq(void *db_ptr, bool enable_irq) +{ + struct bnxt_cp_ring *cpr = (struct bnxt_cp_ring *) db_ptr; + struct bnxt_bar_info *db_bar = &cpr->ring.softc->doorbell_bar; + u64 db_msg = { 0 }; + uint32_t cons = cpr->raw_cons; + uint32_t toggle = 0; + + if (cons == UINT32_MAX) + cons = 0; + + if (enable_irq == true) + toggle = cpr->toggle; + + db_msg = DBR_PATH_L2 | ((u64)cpr->ring.phys_id << DBR_XID_SFT) | DBR_VALID | + DB_RING_IDX_CMP(&cpr->ring, cons) | DB_TOGGLE(toggle); + + if (enable_irq) + db_msg |= DBR_TYPE_CQ_ARMALL; + else + db_msg |= DBR_TYPE_CQ; + + bus_space_barrier(db_bar->tag, db_bar->handle, cpr->ring.doorbell, 8, + BUS_SPACE_BARRIER_WRITE); + bus_space_write_8(db_bar->tag, db_bar->handle, cpr->ring.doorbell, + htole64(*(uint64_t *)&db_msg)); + bus_space_barrier(db_bar->tag, db_bar->handle, 0, db_bar->size, + BUS_SPACE_BARRIER_WRITE); +} + +static void +bnxt_thor2_db_tx_cq(void *db_ptr, bool enable_irq) +{ + struct bnxt_cp_ring *cpr = (struct bnxt_cp_ring *) db_ptr; + struct bnxt_bar_info *db_bar = &cpr->ring.softc->doorbell_bar; + u64 db_msg = { 0 }; + uint32_t cons = cpr->raw_cons; + uint32_t toggle = 0; + + if (enable_irq == true) + toggle = cpr->toggle; + + db_msg = DBR_PATH_L2 | ((u64)cpr->ring.phys_id << DBR_XID_SFT) | DBR_VALID | + DB_RING_IDX_CMP(&cpr->ring, cons) | DB_TOGGLE(toggle); + + if (enable_irq) + db_msg |= DBR_TYPE_CQ_ARMALL; + else + db_msg |= DBR_TYPE_CQ; + + bus_space_barrier(db_bar->tag, db_bar->handle, cpr->ring.doorbell, 8, + BUS_SPACE_BARRIER_WRITE); + bus_space_write_8(db_bar->tag, db_bar->handle, cpr->ring.doorbell, + htole64(*(uint64_t *)&db_msg)); + bus_space_barrier(db_bar->tag, db_bar->handle, 0, db_bar->size, + BUS_SPACE_BARRIER_WRITE); +} + +static void +bnxt_thor2_db_nq(void *db_ptr, bool enable_irq) +{ + struct bnxt_cp_ring *cpr = (struct bnxt_cp_ring *) db_ptr; + struct bnxt_bar_info *db_bar = &cpr->ring.softc->doorbell_bar; + u64 db_msg = { 0 }; + uint32_t cons = cpr->raw_cons; + uint32_t toggle = 0; + + if (enable_irq == true) + toggle = cpr->toggle; + + db_msg = DBR_PATH_L2 | ((u64)cpr->ring.phys_id << DBR_XID_SFT) | DBR_VALID | + DB_RING_IDX_CMP(&cpr->ring, cons) | DB_TOGGLE(toggle); + + if (enable_irq) + db_msg |= DBR_TYPE_NQ_ARM; + else + db_msg |= DBR_TYPE_NQ_MASK; + + bus_space_barrier(db_bar->tag, db_bar->handle, cpr->ring.doorbell, 8, + BUS_SPACE_BARRIER_WRITE); + bus_space_write_8(db_bar->tag, db_bar->handle, cpr->ring.doorbell, + htole64(*(uint64_t *)&db_msg)); + bus_space_barrier(db_bar->tag, db_bar->handle, 0, db_bar->size, + BUS_SPACE_BARRIER_WRITE); +} + struct bnxt_softc *bnxt_find_dev(uint32_t domain, uint32_t bus, uint32_t dev_fn, char *dev_name) { struct bnxt_softc_list *sc = NULL; @@ -2295,6 +2452,12 @@ bnxt_attach_pre(if_ctx_t ctx) softc->db_ops.bnxt_db_rx_cq = bnxt_thor_db_rx_cq; softc->db_ops.bnxt_db_tx_cq = bnxt_thor_db_tx_cq; softc->db_ops.bnxt_db_nq = bnxt_thor_db_nq; + } else if (BNXT_CHIP_P7(softc)) { + softc->db_ops.bnxt_db_tx = bnxt_thor2_db_tx; + softc->db_ops.bnxt_db_rx = bnxt_thor2_db_rx; + softc->db_ops.bnxt_db_rx_cq = bnxt_thor2_db_rx_cq; + softc->db_ops.bnxt_db_tx_cq = bnxt_thor2_db_tx_cq; + softc->db_ops.bnxt_db_nq = bnxt_thor2_db_nq; } else { softc->db_ops.bnxt_db_tx = bnxt_cuw_db_tx; softc->db_ops.bnxt_db_rx = bnxt_cuw_db_rx; @@ -2455,6 +2618,7 @@ bnxt_attach_pre(if_ctx_t ctx) softc->legacy_db_size : softc->def_cp_ring.ring.id * 0x80; softc->def_cp_ring.ring.ring_size = PAGE_SIZE / sizeof(struct cmpl_base); + softc->def_cp_ring.ring.db_ring_mask = softc->def_cp_ring.ring.ring_size -1 ; rc = iflib_dma_alloc(ctx, sizeof(struct cmpl_base) * softc->def_cp_ring.ring.ring_size, &softc->def_cp_ring_mem, 0); @@ -2872,6 +3036,8 @@ bnxt_init(if_ctx_t ctx) rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL, &softc->def_cp_ring.ring); + bnxt_set_db_mask(softc, &softc->def_cp_ring.ring, + HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL); if (rc) goto fail; skip_def_cp_ring: @@ -2882,15 +3048,18 @@ skip_def_cp_ring: if (rc) goto fail; - if (BNXT_CHIP_P5(softc)) { + if (BNXT_CHIP_P5_PLUS(softc)) { /* Allocate the NQ */ softc->nq_rings[i].cons = 0; + softc->nq_rings[i].raw_cons = 0; softc->nq_rings[i].v_bit = 1; softc->nq_rings[i].last_idx = UINT32_MAX; bnxt_mark_cpr_invalid(&softc->nq_rings[i]); rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ, &softc->nq_rings[i].ring); + bnxt_set_db_mask(softc, &softc->nq_rings[i].ring, + HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ); if (rc) goto fail; @@ -2898,21 +3067,27 @@ skip_def_cp_ring: } /* Allocate the completion ring */ softc->rx_cp_rings[i].cons = UINT32_MAX; + softc->rx_cp_rings[i].raw_cons = UINT32_MAX; softc->rx_cp_rings[i].v_bit = 1; softc->rx_cp_rings[i].last_idx = UINT32_MAX; + softc->rx_cp_rings[i].toggle = 0; bnxt_mark_cpr_invalid(&softc->rx_cp_rings[i]); rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL, &softc->rx_cp_rings[i].ring); + bnxt_set_db_mask(softc, &softc->rx_cp_rings[i].ring, + HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL); if (rc) goto fail; - if (BNXT_CHIP_P5(softc)) + if (BNXT_CHIP_P5_PLUS(softc)) softc->db_ops.bnxt_db_rx_cq(&softc->rx_cp_rings[i], 1); /* Allocate the RX ring */ rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_RX, &softc->rx_rings[i]); + bnxt_set_db_mask(softc, &softc->rx_rings[i], + HWRM_RING_ALLOC_INPUT_RING_TYPE_RX); if (rc) goto fail; softc->db_ops.bnxt_db_rx(&softc->rx_rings[i], 0); @@ -2921,6 +3096,8 @@ skip_def_cp_ring: rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_RX_AGG, &softc->ag_rings[i]); + bnxt_set_db_mask(softc, &softc->ag_rings[i], + HWRM_RING_ALLOC_INPUT_RING_TYPE_RX_AGG); if (rc) goto fail; softc->db_ops.bnxt_db_rx(&softc->ag_rings[i], 0); @@ -2983,21 +3160,27 @@ skip_def_cp_ring: /* Allocate the completion ring */ softc->tx_cp_rings[i].cons = UINT32_MAX; + softc->tx_cp_rings[i].raw_cons = UINT32_MAX; softc->tx_cp_rings[i].v_bit = 1; + softc->tx_cp_rings[i].toggle = 0; bnxt_mark_cpr_invalid(&softc->tx_cp_rings[i]); rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL, &softc->tx_cp_rings[i].ring); + bnxt_set_db_mask(softc, &softc->tx_cp_rings[i].ring, + HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL); if (rc) goto fail; - if (BNXT_CHIP_P5(softc)) + if (BNXT_CHIP_P5_PLUS(softc)) softc->db_ops.bnxt_db_tx_cq(&softc->tx_cp_rings[i], 1); /* Allocate the TX ring */ rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_TX, &softc->tx_rings[i]); + bnxt_set_db_mask(softc, &softc->tx_rings[i], + HWRM_RING_ALLOC_INPUT_RING_TYPE_TX); if (rc) goto fail; softc->db_ops.bnxt_db_tx(&softc->tx_rings[i], 0); @@ -3568,25 +3751,35 @@ process_nq(struct bnxt_softc *softc, uint16_t nqid) { struct bnxt_cp_ring *cpr = &softc->nq_rings[nqid]; nq_cn_t *cmp = (nq_cn_t *) cpr->ring.vaddr; + struct bnxt_cp_ring *tx_cpr = &softc->tx_cp_rings[nqid]; + struct bnxt_cp_ring *rx_cpr = &softc->rx_cp_rings[nqid]; bool v_bit = cpr->v_bit; uint32_t cons = cpr->cons; + uint32_t raw_cons = cpr->raw_cons; uint16_t nq_type, nqe_cnt = 0; while (1) { - if (!NQ_VALID(&cmp[cons], v_bit)) + if (!NQ_VALID(&cmp[cons], v_bit)) { goto done; + } nq_type = NQ_CN_TYPE_MASK & cmp[cons].type; - if (nq_type != NQ_CN_TYPE_CQ_NOTIFICATION) + if (NQE_CN_TYPE(nq_type) != NQ_CN_TYPE_CQ_NOTIFICATION) { bnxt_process_async_msg(cpr, (tx_cmpl_t *)&cmp[cons]); + } else { + tx_cpr->toggle = NQE_CN_TOGGLE(cmp[cons].type); + rx_cpr->toggle = NQE_CN_TOGGLE(cmp[cons].type); + } NEXT_CP_CONS_V(&cpr->ring, cons, v_bit); + raw_cons++; nqe_cnt++; } done: if (nqe_cnt) { cpr->cons = cons; + cpr->raw_cons = raw_cons; cpr->v_bit = v_bit; } }