git: ced1133739d4 - stable/14 - bnxt_en: Thor2 Specific Doorbell related changes

From: Warner Losh <imp_at_FreeBSD.org>
Date: Sun, 27 Apr 2025 22:06:13 UTC
The branch stable/14 has been updated by imp:

URL: https://cgit.FreeBSD.org/src/commit/?id=ced1133739d405f15719e4881d458877d207f28e

commit ced1133739d405f15719e4881d458877d207f28e
Author:     Sreekanth Reddy <sreekanth.reddy@broadcom.com>
AuthorDate: 2025-04-09 05:44:22 +0000
Commit:     Warner Losh <imp@FreeBSD.org>
CommitDate: 2025-04-27 22:02:59 +0000

    bnxt_en: Thor2 Specific Doorbell related changes
    
    Doorbell offset :
    For Thor controllers doorbell offset was always hardcoded to 0x10000 for
    PF devices where as for Thor2 controllers doorbell offset will be
    legacy_l2_db_size_kb value provided by firmware through hwrm_func_qcfg
    command.
    
    CQ Toggle & Epoch bits support :
    In order to handle out of order doorbell handling as part of Dropped
    Doorbell Recovery, HW expects two changes in the driver in data path.
    
    - First change is the epoch bit changes while updating the producer
    indexes of Tx. This epoch bit is toggled by the driver, each time the
    queue is wrapped for that specific doorbell.
    
    - The second change is to add a toggle bit pair to each ARM type
    doorbell.  This includes the CQ_ARMALL, CQ_ARMSE, CQ_ARMENA
    doorbells. The toggle bit pair in context is incremented by the chip
    each time a new NQE completion is generated by the chip. To keep the
    driver in-sync, the toggle bit pair will be passed in the NQE to the
    host completion. This will be the toggle bit pair value that the host
    must use to setup the next NQE operation. The driver will pass that
    latest toggle bit pair value into the ARM type doorbells it generates to
    the chip. The doorbell clients will compare the toggle bit pair in each
    doorbell with the value in context.  If the values match, the doorbell
    will be honored. If the values do not match, the doorbell will be
    discarded.
    
    MFC-After: 3 days
    Differential-Revision: https://reviews.freebsd.org/D49730
    (cherry picked from commit 39c0b8b7994b0d339bffb0b17291c4a2b14cae3a)
---
 sys/dev/bnxt/bnxt_en/bnxt.h      |  88 ++++++++++++-----
 sys/dev/bnxt/bnxt_en/bnxt_txrx.c |  52 +++++++++-
 sys/dev/bnxt/bnxt_en/if_bnxt.c   | 203 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 311 insertions(+), 32 deletions(-)

diff --git a/sys/dev/bnxt/bnxt_en/bnxt.h b/sys/dev/bnxt/bnxt_en/bnxt.h
index eff1976a7954..0ba7b5723b91 100644
--- a/sys/dev/bnxt/bnxt_en/bnxt.h
+++ b/sys/dev/bnxt/bnxt_en/bnxt.h
@@ -190,32 +190,36 @@
 #define BNXT_NO_MORE_WOL_FILTERS	0xFFFF
 #define bnxt_wol_supported(softc)	(!((softc)->flags & BNXT_FLAG_VF) && \
 					  ((softc)->flags & BNXT_FLAG_WOL_CAP ))
-
 /* 64-bit doorbell */
-#define DBR_INDEX_MASK                                  0x0000000000ffffffULL
-#define DBR_PI_LO_MASK                                  0xff000000UL
-#define DBR_PI_LO_SFT                                   24
-#define DBR_XID_MASK                                    0x000fffff00000000ULL
-#define DBR_XID_SFT                                     32
-#define DBR_PI_HI_MASK                                  0xf0000000000000ULL
-#define DBR_PI_HI_SFT                                   52
-#define DBR_PATH_L2                                     (0x1ULL << 56)
-#define DBR_VALID                                       (0x1ULL << 58)
-#define DBR_TYPE_SQ                                     (0x0ULL << 60)
-#define DBR_TYPE_RQ                                     (0x1ULL << 60)
-#define DBR_TYPE_SRQ                                    (0x2ULL << 60)
-#define DBR_TYPE_SRQ_ARM                                (0x3ULL << 60)
-#define DBR_TYPE_CQ                                     (0x4ULL << 60)
-#define DBR_TYPE_CQ_ARMSE                               (0x5ULL << 60)
-#define DBR_TYPE_CQ_ARMALL                              (0x6ULL << 60)
-#define DBR_TYPE_CQ_ARMENA                              (0x7ULL << 60)
-#define DBR_TYPE_SRQ_ARMENA                             (0x8ULL << 60)
-#define DBR_TYPE_CQ_CUTOFF_ACK                          (0x9ULL << 60)
-#define DBR_TYPE_NQ                                     (0xaULL << 60)
-#define DBR_TYPE_NQ_ARM                                 (0xbULL << 60)
-#define DBR_TYPE_PUSH_START                             (0xcULL << 60)
-#define DBR_TYPE_PUSH_END                               (0xdULL << 60)
-#define DBR_TYPE_NULL                                   (0xfULL << 60)
+#define DBR_INDEX_MASK					0x0000000000ffffffULL
+#define DBR_PI_LO_MASK					0xff000000UL
+#define DBR_PI_LO_SFT					24
+#define DBR_EPOCH_MASK					0x01000000UL
+#define DBR_EPOCH_SFT					24
+#define DBR_TOGGLE_MASK					0x06000000UL
+#define DBR_TOGGLE_SFT					25
+#define DBR_XID_MASK					0x000fffff00000000ULL
+#define DBR_XID_SFT					32
+#define DBR_PI_HI_MASK					0xf0000000000000ULL
+#define DBR_PI_HI_SFT					52
+#define DBR_PATH_L2					(0x1ULL << 56)
+#define DBR_VALID					(0x1ULL << 58)
+#define DBR_TYPE_SQ					(0x0ULL << 60)
+#define DBR_TYPE_RQ					(0x1ULL << 60)
+#define DBR_TYPE_SRQ					(0x2ULL << 60)
+#define DBR_TYPE_SRQ_ARM				(0x3ULL << 60)
+#define DBR_TYPE_CQ					(0x4ULL << 60)
+#define DBR_TYPE_CQ_ARMSE				(0x5ULL << 60)
+#define DBR_TYPE_CQ_ARMALL				(0x6ULL << 60)
+#define DBR_TYPE_CQ_ARMENA				(0x7ULL << 60)
+#define DBR_TYPE_SRQ_ARMENA				(0x8ULL << 60)
+#define DBR_TYPE_CQ_CUTOFF_ACK				(0x9ULL << 60)
+#define DBR_TYPE_NQ					(0xaULL << 60)
+#define DBR_TYPE_NQ_ARM					(0xbULL << 60)
+#define DBR_TYPE_PUSH_START				(0xcULL << 60)
+#define DBR_TYPE_PUSH_END				(0xdULL << 60)
+#define DBR_TYPE_NQ_MASK				(0xeULL << 60)
+#define DBR_TYPE_NULL					(0xfULL << 60)
 
 #define BNXT_MAX_L2_QUEUES				128
 #define BNXT_ROCE_IRQ_COUNT				9
@@ -582,6 +586,8 @@ struct bnxt_grp_info {
 	uint16_t	ag_ring_id;
 };
 
+#define	EPOCH_ARR_SZ	4096
+
 struct bnxt_ring {
 	uint64_t		paddr;
 	vm_offset_t		doorbell;
@@ -592,12 +598,24 @@ struct bnxt_ring {
 	uint16_t		phys_id;
 	uint16_t		idx;
 	struct bnxt_full_tpa_start *tpa_start;
+	union {
+		u64             db_key64;
+		u32             db_key32;
+	};
+	uint32_t                db_ring_mask;
+	uint32_t                db_epoch_mask;
+	uint8_t                 db_epoch_shift;
+
+	uint64_t		epoch_arr[EPOCH_ARR_SZ];
+	bool                    epoch_bit;
+
 };
 
 struct bnxt_cp_ring {
 	struct bnxt_ring	ring;
 	struct if_irq		irq;
 	uint32_t		cons;
+	uint32_t		raw_cons;
 	bool			v_bit;		/* Value of valid bit */
 	struct ctx_hw_stats	*stats;
 	uint32_t		stats_ctx_id;
@@ -605,6 +623,10 @@ struct bnxt_cp_ring {
 						 * set to the last read pidx
 						 */
 	uint64_t 		int_count;
+	uint8_t			toggle;
+	uint8_t			type;
+#define Q_TYPE_TX		1
+#define Q_TYPE_RX		2
 };
 
 struct bnxt_full_tpa_start {
@@ -1005,6 +1027,22 @@ struct bnxt_fw_health {
 
 #define BNXT_GRC_BASE_MASK			0xfffff000
 #define BNXT_GRC_OFFSET_MASK			0x00000ffc
+
+#define NQE_CN_TYPE(type)	((type) & NQ_CN_TYPE_MASK)
+#define NQE_CN_TOGGLE(type)	(((type) & NQ_CN_TOGGLE_MASK) >>        \
+				 NQ_CN_TOGGLE_SFT)
+
+#define DB_EPOCH(ring, idx)	(((idx) & (ring)->db_epoch_mask) <<       \
+				 ((ring)->db_epoch_shift))
+
+#define DB_TOGGLE(tgl)		((tgl) << DBR_TOGGLE_SFT)
+
+#define DB_RING_IDX_CMP(ring, idx)    (((idx) & (ring)->db_ring_mask) |         \
+				       DB_EPOCH(ring, idx))
+
+#define DB_RING_IDX(ring, idx, bit)    (((idx) & (ring)->db_ring_mask) | \
+                                       ((bit) << (24)))
+
 struct bnxt_softc {
 	device_t	dev;
 	if_ctx_t	ctx;
diff --git a/sys/dev/bnxt/bnxt_en/bnxt_txrx.c b/sys/dev/bnxt/bnxt_en/bnxt_txrx.c
index 733db2902a5c..8b2ff6238367 100644
--- a/sys/dev/bnxt/bnxt_en/bnxt_txrx.c
+++ b/sys/dev/bnxt/bnxt_en/bnxt_txrx.c
@@ -98,6 +98,7 @@ bnxt_isc_txd_encap(void *sc, if_pkt_info_t pi)
 	uint16_t lflags;
 	uint32_t cfa_meta;
 	int seg = 0;
+	uint8_t wrap = 0;
 
 	/* If we have offloads enabled, we need to use two BDs. */
 	if ((pi->ipi_csum_flags & (CSUM_OFFLOAD | CSUM_TSO | CSUM_IP)) ||
@@ -124,7 +125,18 @@ bnxt_isc_txd_encap(void *sc, if_pkt_info_t pi)
 	if (need_hi) {
 		flags_type |= TX_BD_LONG_TYPE_TX_BD_LONG;
 
+		/* Handle wrapping */
+		if (pi->ipi_new_pidx == txr->ring_size - 1)
+			wrap = 1;
+
 		pi->ipi_new_pidx = RING_NEXT(txr, pi->ipi_new_pidx);
+
+		/* Toggle epoch bit on wrap */
+		if (wrap && pi->ipi_new_pidx == 0)
+			txr->epoch_bit = !txr->epoch_bit;
+		if (pi->ipi_new_pidx < EPOCH_ARR_SZ)
+			txr->epoch_arr[pi->ipi_new_pidx] = txr->epoch_bit;
+
 		tbdh = &((struct tx_bd_long_hi *)txr->vaddr)[pi->ipi_new_pidx];
 		tbdh->kid_or_ts_high_mss = htole16(pi->ipi_tso_segsz);
 		tbdh->kid_or_ts_low_hdr_size = htole16((pi->ipi_ehdrlen + pi->ipi_ip_hlen +
@@ -158,7 +170,15 @@ bnxt_isc_txd_encap(void *sc, if_pkt_info_t pi)
 
 	for (; seg < pi->ipi_nsegs; seg++) {
 		tbd->flags_type = htole16(flags_type);
+
+		if (pi->ipi_new_pidx == txr->ring_size - 1)
+			wrap = 1;
 		pi->ipi_new_pidx = RING_NEXT(txr, pi->ipi_new_pidx);
+		if (wrap && pi->ipi_new_pidx == 0)
+			txr->epoch_bit = !txr->epoch_bit;
+		if (pi->ipi_new_pidx < EPOCH_ARR_SZ)
+			txr->epoch_arr[pi->ipi_new_pidx] = txr->epoch_bit;
+
 		tbd = &((struct tx_bd_long *)txr->vaddr)[pi->ipi_new_pidx];
 		tbd->len = htole16(pi->ipi_segs[seg].ds_len);
 		tbd->addr = htole64(pi->ipi_segs[seg].ds_addr);
@@ -166,7 +186,13 @@ bnxt_isc_txd_encap(void *sc, if_pkt_info_t pi)
 	}
 	flags_type |= TX_BD_SHORT_FLAGS_PACKET_END;
 	tbd->flags_type = htole16(flags_type);
+	if (pi->ipi_new_pidx == txr->ring_size - 1)
+		wrap = 1;
 	pi->ipi_new_pidx = RING_NEXT(txr, pi->ipi_new_pidx);
+	if (wrap && pi->ipi_new_pidx == 0)
+		txr->epoch_bit = !txr->epoch_bit;
+	if (pi->ipi_new_pidx < EPOCH_ARR_SZ)
+		txr->epoch_arr[pi->ipi_new_pidx] = txr->epoch_bit;
 
 	return 0;
 }
@@ -190,16 +216,21 @@ bnxt_isc_txd_credits_update(void *sc, uint16_t txqid, bool clear)
 	struct tx_cmpl *cmpl = (struct tx_cmpl *)cpr->ring.vaddr;
 	int avail = 0;
 	uint32_t cons = cpr->cons;
+	uint32_t raw_cons = cpr->raw_cons;
 	bool v_bit = cpr->v_bit;
 	bool last_v_bit;
 	uint32_t last_cons;
+	uint32_t last_raw_cons;
 	uint16_t type;
 	uint16_t err;
 
 	for (;;) {
 		last_cons = cons;
+		last_raw_cons = raw_cons;
 		last_v_bit = v_bit;
+
 		NEXT_CP_CONS_V(&cpr->ring, cons, v_bit);
+		raw_cons++;
 		CMPL_PREFETCH_NEXT(cpr, cons);
 
 		if (!CMP_VALID(&cmpl[cons], v_bit))
@@ -227,8 +258,10 @@ bnxt_isc_txd_credits_update(void *sc, uint16_t txqid, bool clear)
 		default:
 			if (type & 1) {
 				NEXT_CP_CONS_V(&cpr->ring, cons, v_bit);
-				if (!CMP_VALID(&cmpl[cons], v_bit))
+				raw_cons++;
+				if (!CMP_VALID(&cmpl[cons], v_bit)) {
 					goto done;
+				}
 			}
 			device_printf(softc->dev,
 			    "Unhandled TX completion type %u\n", type);
@@ -239,6 +272,7 @@ done:
 
 	if (clear && avail) {
 		cpr->cons = last_cons;
+		cpr->raw_cons = last_raw_cons;
 		cpr->v_bit = last_v_bit;
 		softc->db_ops.bnxt_db_tx_cq(cpr, 0);
 	}
@@ -285,9 +319,16 @@ bnxt_isc_rxd_refill(void *sc, if_rxd_update_t iru)
 		rxbd[pidx].opaque = (((rxqid & 0xff) << 24) | (flid << 16)
 		    | (frag_idxs[i]));
 		rxbd[pidx].addr = htole64(paddrs[i]);
-		if (++pidx == rx_ring->ring_size)
+
+		/* Increment pidx and handle wrap-around */
+		if (++pidx == rx_ring->ring_size) {
 			pidx = 0;
+			rx_ring->epoch_bit = !rx_ring->epoch_bit;
+		}
+		if (pidx < EPOCH_ARR_SZ)
+			rx_ring->epoch_arr[pidx] = rx_ring->epoch_bit;
 	}
+
 	return;
 }
 
@@ -472,6 +513,7 @@ bnxt_pkt_get_l2(struct bnxt_softc *softc, if_rxd_info_t ri,
 
 	/* Now the second 16-byte BD */
 	NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit);
+	cpr->raw_cons++;
 	ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx);
 	rcph = &((struct rx_pkt_cmpl_hi *)cpr->ring.vaddr)[cpr->cons];
 
@@ -503,6 +545,7 @@ bnxt_pkt_get_l2(struct bnxt_softc *softc, if_rxd_info_t ri,
 	/* And finally the ag ring stuff. */
 	for (i=1; i < ri->iri_nfrags; i++) {
 		NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit);
+		cpr->raw_cons++;
 		ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx);
 		acp = &((struct rx_abuf_cmpl *)cpr->ring.vaddr)[cpr->cons];
 
@@ -553,6 +596,7 @@ bnxt_pkt_get_tpa(struct bnxt_softc *softc, if_rxd_info_t ri,
 
 	/* Now the second 16-byte BD */
 	NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit);
+	cpr->raw_cons++;
 	ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx);
 
 	flags2 = le32toh(tpas->high.flags2);
@@ -578,6 +622,7 @@ bnxt_pkt_get_tpa(struct bnxt_softc *softc, if_rxd_info_t ri,
 	/* Now the ag ring stuff. */
 	for (i=1; i < ri->iri_nfrags; i++) {
 		NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit);
+		cpr->raw_cons++;
 		ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx);
 		acp = &((struct rx_abuf_cmpl *)cpr->ring.vaddr)[cpr->cons];
 
@@ -614,6 +659,7 @@ bnxt_isc_rxd_pkt_get(void *sc, if_rxd_info_t ri)
 
 	for (;;) {
 		NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit);
+		cpr->raw_cons++;
 		ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx);
 		CMPL_PREFETCH_NEXT(cpr, cpr->cons);
 		cmp = &((struct cmpl_base *)cpr->ring.vaddr)[cpr->cons];
@@ -636,6 +682,7 @@ bnxt_isc_rxd_pkt_get(void *sc, if_rxd_info_t ri)
 			softc->rx_rings[ri->iri_qsidx].tpa_start[agg_id].low = *rtpa;
 
 			NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit);
+			cpr->raw_cons++;
 			ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx);
 			CMPL_PREFETCH_NEXT(cpr, cpr->cons);
 
@@ -649,6 +696,7 @@ bnxt_isc_rxd_pkt_get(void *sc, if_rxd_info_t ri)
 			if (type & 1) {
 				NEXT_CP_CONS_V(&cpr->ring, cpr->cons,
 				    cpr->v_bit);
+				cpr->raw_cons++;
 				ri->iri_cidx = RING_NEXT(&cpr->ring,
 				    ri->iri_cidx);
 				CMPL_PREFETCH_NEXT(cpr, cpr->cons);
diff --git a/sys/dev/bnxt/bnxt_en/if_bnxt.c b/sys/dev/bnxt/bnxt_en/if_bnxt.c
index e00f59fd390e..8960866fcf90 100644
--- a/sys/dev/bnxt/bnxt_en/if_bnxt.c
+++ b/sys/dev/bnxt/bnxt_en/if_bnxt.c
@@ -428,6 +428,18 @@ bnxt_nq_free(struct bnxt_softc *softc)
 	softc->nq_rings = NULL;
 }
 
+
+static void
+bnxt_set_db_mask(struct bnxt_softc *bp, struct bnxt_ring *db,
+		 u32 ring_type)
+{
+	if (BNXT_CHIP_P7(bp)) {
+		db->db_epoch_mask = db->db_ring_mask + 1;
+		db->db_epoch_shift = DBR_EPOCH_SFT - ilog2(db->db_epoch_mask);
+
+	}
+}
+
 /*
  * Device Dependent Configuration Functions
 */
@@ -492,6 +504,8 @@ bnxt_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
 			softc->legacy_db_size: softc->tx_cp_rings[i].ring.id * 0x80;
 		softc->tx_cp_rings[i].ring.ring_size =
 		    softc->scctx->isc_ntxd[0];
+		softc->tx_cp_rings[i].ring.db_ring_mask =
+		    softc->tx_cp_rings[i].ring.ring_size - 1;
 		softc->tx_cp_rings[i].ring.vaddr = vaddrs[i * ntxqs];
 		softc->tx_cp_rings[i].ring.paddr = paddrs[i * ntxqs];
 
@@ -505,6 +519,7 @@ bnxt_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
 		softc->tx_rings[i].doorbell = (BNXT_CHIP_P5_PLUS(softc)) ?
 			softc->legacy_db_size : softc->tx_rings[i].id * 0x80;
 		softc->tx_rings[i].ring_size = softc->scctx->isc_ntxd[1];
+		softc->tx_rings[i].db_ring_mask = softc->tx_rings[i].ring_size - 1;
 		softc->tx_rings[i].vaddr = vaddrs[i * ntxqs + 1];
 		softc->tx_rings[i].paddr = paddrs[i * ntxqs + 1];
 
@@ -521,8 +536,10 @@ bnxt_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
 			softc->nq_rings[i].ring.doorbell = (BNXT_CHIP_P5_PLUS(softc)) ?
 				softc->legacy_db_size : softc->nq_rings[i].ring.id * 0x80;
 			softc->nq_rings[i].ring.ring_size = softc->scctx->isc_ntxd[2];
+			softc->nq_rings[i].ring.db_ring_mask = softc->nq_rings[i].ring.ring_size - 1;
 			softc->nq_rings[i].ring.vaddr = vaddrs[i * ntxqs + 2];
 			softc->nq_rings[i].ring.paddr = paddrs[i * ntxqs + 2];
+			softc->nq_rings[i].type = Q_TYPE_TX;
 		}
 	}
 
@@ -684,6 +701,8 @@ bnxt_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
 		 */
 		softc->rx_cp_rings[i].ring.ring_size =
 		    softc->scctx->isc_nrxd[0];
+		softc->rx_cp_rings[i].ring.db_ring_mask =
+		    softc->rx_cp_rings[i].ring.ring_size - 1;
 
 		softc->rx_cp_rings[i].ring.vaddr = vaddrs[i * nrxqs];
 		softc->rx_cp_rings[i].ring.paddr = paddrs[i * nrxqs];
@@ -696,6 +715,8 @@ bnxt_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
 		softc->rx_rings[i].doorbell = (BNXT_CHIP_P5_PLUS(softc)) ?
 			softc->legacy_db_size : softc->rx_rings[i].id * 0x80;
 		softc->rx_rings[i].ring_size = softc->scctx->isc_nrxd[1];
+		softc->rx_rings[i].db_ring_mask =
+			softc->rx_rings[i].ring_size -1;
 		softc->rx_rings[i].vaddr = vaddrs[i * nrxqs + 1];
 		softc->rx_rings[i].paddr = paddrs[i * nrxqs + 1];
 
@@ -717,6 +738,7 @@ bnxt_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
 		softc->ag_rings[i].doorbell = (BNXT_CHIP_P5_PLUS(softc)) ?
 			softc->legacy_db_size : softc->ag_rings[i].id * 0x80;
 		softc->ag_rings[i].ring_size = softc->scctx->isc_nrxd[2];
+		softc->ag_rings[i].db_ring_mask = softc->ag_rings[i].ring_size - 1;
 		softc->ag_rings[i].vaddr = vaddrs[i * nrxqs + 2];
 		softc->ag_rings[i].paddr = paddrs[i * nrxqs + 2];
 
@@ -1414,6 +1436,141 @@ static void bnxt_thor_db_nq(void *db_ptr, bool enable_irq)
 			BUS_SPACE_BARRIER_WRITE);
 }
 
+static void
+bnxt_thor2_db_rx(void *db_ptr, uint16_t idx)
+{
+	struct bnxt_ring *ring = (struct bnxt_ring *) db_ptr;
+	struct bnxt_bar_info *db_bar = &ring->softc->doorbell_bar;
+	uint64_t db_val;
+
+	if (idx >= ring->ring_size) {
+		device_printf(ring->softc->dev, "%s: BRCM DBG: idx: %d crossed boundary\n", __func__, idx);
+		return;
+	}
+
+	db_val = ((DBR_PATH_L2 | DBR_TYPE_SRQ | DBR_VALID | idx) |
+				((uint64_t)ring->phys_id << DBR_XID_SFT));
+
+	/* Add the PI index */
+	db_val |= DB_RING_IDX(ring, idx, ring->epoch_arr[idx]);
+
+	bus_space_barrier(db_bar->tag, db_bar->handle, ring->doorbell, 8,
+			BUS_SPACE_BARRIER_WRITE);
+	bus_space_write_8(db_bar->tag, db_bar->handle, ring->doorbell,
+			htole64(db_val));
+}
+
+static void
+bnxt_thor2_db_tx(void *db_ptr, uint16_t idx)
+{
+	struct bnxt_ring *ring = (struct bnxt_ring *) db_ptr;
+	struct bnxt_bar_info *db_bar = &ring->softc->doorbell_bar;
+	uint64_t db_val;
+
+	if (idx >= ring->ring_size) {
+		device_printf(ring->softc->dev, "%s: BRCM DBG: idx: %d crossed boundary\n", __func__, idx);
+		return;
+	}
+
+	db_val = ((DBR_PATH_L2 | DBR_TYPE_SQ | DBR_VALID | idx) |
+				((uint64_t)ring->phys_id << DBR_XID_SFT));
+
+	/* Add the PI index */
+	db_val |= DB_RING_IDX(ring, idx, ring->epoch_arr[idx]);
+
+	bus_space_barrier(db_bar->tag, db_bar->handle, ring->doorbell, 8,
+			BUS_SPACE_BARRIER_WRITE);
+	bus_space_write_8(db_bar->tag, db_bar->handle, ring->doorbell,
+			htole64(db_val));
+}
+
+static void
+bnxt_thor2_db_rx_cq(void *db_ptr, bool enable_irq)
+{
+	struct bnxt_cp_ring *cpr = (struct bnxt_cp_ring *) db_ptr;
+	struct bnxt_bar_info *db_bar = &cpr->ring.softc->doorbell_bar;
+	u64 db_msg = { 0 };
+	uint32_t cons = cpr->raw_cons;
+	uint32_t toggle = 0;
+
+	if (cons == UINT32_MAX)
+		cons = 0;
+
+	if (enable_irq == true)
+		toggle = cpr->toggle;
+
+	db_msg = DBR_PATH_L2 | ((u64)cpr->ring.phys_id << DBR_XID_SFT) | DBR_VALID |
+			DB_RING_IDX_CMP(&cpr->ring, cons) | DB_TOGGLE(toggle);
+
+	if (enable_irq)
+		db_msg |= DBR_TYPE_CQ_ARMALL;
+	else
+		db_msg |= DBR_TYPE_CQ;
+
+	bus_space_barrier(db_bar->tag, db_bar->handle, cpr->ring.doorbell, 8,
+			BUS_SPACE_BARRIER_WRITE);
+	bus_space_write_8(db_bar->tag, db_bar->handle, cpr->ring.doorbell,
+			htole64(*(uint64_t *)&db_msg));
+	bus_space_barrier(db_bar->tag, db_bar->handle, 0, db_bar->size,
+			BUS_SPACE_BARRIER_WRITE);
+}
+
+static void
+bnxt_thor2_db_tx_cq(void *db_ptr, bool enable_irq)
+{
+	struct bnxt_cp_ring *cpr = (struct bnxt_cp_ring *) db_ptr;
+	struct bnxt_bar_info *db_bar = &cpr->ring.softc->doorbell_bar;
+	u64 db_msg = { 0 };
+	uint32_t cons = cpr->raw_cons;
+	uint32_t toggle = 0;
+
+	if (enable_irq == true)
+		toggle = cpr->toggle;
+
+	db_msg = DBR_PATH_L2 | ((u64)cpr->ring.phys_id << DBR_XID_SFT) | DBR_VALID |
+			DB_RING_IDX_CMP(&cpr->ring, cons) | DB_TOGGLE(toggle);
+
+	if (enable_irq)
+		db_msg |= DBR_TYPE_CQ_ARMALL;
+	else
+		db_msg |= DBR_TYPE_CQ;
+
+	bus_space_barrier(db_bar->tag, db_bar->handle, cpr->ring.doorbell, 8,
+			BUS_SPACE_BARRIER_WRITE);
+	bus_space_write_8(db_bar->tag, db_bar->handle, cpr->ring.doorbell,
+			htole64(*(uint64_t *)&db_msg));
+	bus_space_barrier(db_bar->tag, db_bar->handle, 0, db_bar->size,
+			BUS_SPACE_BARRIER_WRITE);
+}
+
+static void
+bnxt_thor2_db_nq(void *db_ptr, bool enable_irq)
+{
+	struct bnxt_cp_ring *cpr = (struct bnxt_cp_ring *) db_ptr;
+	struct bnxt_bar_info *db_bar = &cpr->ring.softc->doorbell_bar;
+	u64 db_msg = { 0 };
+	uint32_t cons = cpr->raw_cons;
+	uint32_t toggle = 0;
+
+	if (enable_irq == true)
+		toggle = cpr->toggle;
+
+	db_msg = DBR_PATH_L2 | ((u64)cpr->ring.phys_id << DBR_XID_SFT) | DBR_VALID |
+			DB_RING_IDX_CMP(&cpr->ring, cons) | DB_TOGGLE(toggle);
+
+	if (enable_irq)
+		db_msg |= DBR_TYPE_NQ_ARM;
+	else
+		db_msg |= DBR_TYPE_NQ_MASK;
+
+	bus_space_barrier(db_bar->tag, db_bar->handle, cpr->ring.doorbell, 8,
+			BUS_SPACE_BARRIER_WRITE);
+	bus_space_write_8(db_bar->tag, db_bar->handle, cpr->ring.doorbell,
+			htole64(*(uint64_t *)&db_msg));
+	bus_space_barrier(db_bar->tag, db_bar->handle, 0, db_bar->size,
+			BUS_SPACE_BARRIER_WRITE);
+}
+
 struct bnxt_softc *bnxt_find_dev(uint32_t domain, uint32_t bus, uint32_t dev_fn, char *dev_name)
 {
 	struct bnxt_softc_list *sc = NULL;
@@ -2295,6 +2452,12 @@ bnxt_attach_pre(if_ctx_t ctx)
 		softc->db_ops.bnxt_db_rx_cq = bnxt_thor_db_rx_cq;
 		softc->db_ops.bnxt_db_tx_cq = bnxt_thor_db_tx_cq;
 		softc->db_ops.bnxt_db_nq = bnxt_thor_db_nq;
+	} else if (BNXT_CHIP_P7(softc)) {
+		softc->db_ops.bnxt_db_tx = bnxt_thor2_db_tx;
+		softc->db_ops.bnxt_db_rx = bnxt_thor2_db_rx;
+		softc->db_ops.bnxt_db_rx_cq = bnxt_thor2_db_rx_cq;
+		softc->db_ops.bnxt_db_tx_cq = bnxt_thor2_db_tx_cq;
+		softc->db_ops.bnxt_db_nq = bnxt_thor2_db_nq;
 	} else {
 		softc->db_ops.bnxt_db_tx = bnxt_cuw_db_tx;
 		softc->db_ops.bnxt_db_rx = bnxt_cuw_db_rx;
@@ -2455,6 +2618,7 @@ bnxt_attach_pre(if_ctx_t ctx)
 		softc->legacy_db_size : softc->def_cp_ring.ring.id * 0x80;
 	softc->def_cp_ring.ring.ring_size = PAGE_SIZE /
 	    sizeof(struct cmpl_base);
+	softc->def_cp_ring.ring.db_ring_mask = softc->def_cp_ring.ring.ring_size -1 ;
 	rc = iflib_dma_alloc(ctx,
 	    sizeof(struct cmpl_base) * softc->def_cp_ring.ring.ring_size,
 	    &softc->def_cp_ring_mem, 0);
@@ -2872,6 +3036,8 @@ bnxt_init(if_ctx_t ctx)
 	rc = bnxt_hwrm_ring_alloc(softc,
 			HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL,
 			&softc->def_cp_ring.ring);
+	bnxt_set_db_mask(softc, &softc->def_cp_ring.ring,
+			HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL);
 	if (rc)
 		goto fail;
 skip_def_cp_ring:
@@ -2882,15 +3048,18 @@ skip_def_cp_ring:
 		if (rc)
 			goto fail;
 
-		if (BNXT_CHIP_P5(softc)) {
+		if (BNXT_CHIP_P5_PLUS(softc)) {
 			/* Allocate the NQ */
 			softc->nq_rings[i].cons = 0;
+			softc->nq_rings[i].raw_cons = 0;
 			softc->nq_rings[i].v_bit = 1;
 			softc->nq_rings[i].last_idx = UINT32_MAX;
 			bnxt_mark_cpr_invalid(&softc->nq_rings[i]);
 			rc = bnxt_hwrm_ring_alloc(softc,
 					HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ,
 					&softc->nq_rings[i].ring);
+			bnxt_set_db_mask(softc, &softc->nq_rings[i].ring,
+					HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ);
 			if (rc)
 				goto fail;
 
@@ -2898,21 +3067,27 @@ skip_def_cp_ring:
 		}
 		/* Allocate the completion ring */
 		softc->rx_cp_rings[i].cons = UINT32_MAX;
+		softc->rx_cp_rings[i].raw_cons = UINT32_MAX;
 		softc->rx_cp_rings[i].v_bit = 1;
 		softc->rx_cp_rings[i].last_idx = UINT32_MAX;
+		softc->rx_cp_rings[i].toggle = 0;
 		bnxt_mark_cpr_invalid(&softc->rx_cp_rings[i]);
 		rc = bnxt_hwrm_ring_alloc(softc,
 				HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL,
 				&softc->rx_cp_rings[i].ring);
+		bnxt_set_db_mask(softc, &softc->rx_cp_rings[i].ring,
+				HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL);
 		if (rc)
 			goto fail;
 
-		if (BNXT_CHIP_P5(softc))
+		if (BNXT_CHIP_P5_PLUS(softc))
 			softc->db_ops.bnxt_db_rx_cq(&softc->rx_cp_rings[i], 1);
 
 		/* Allocate the RX ring */
 		rc = bnxt_hwrm_ring_alloc(softc,
 		    HWRM_RING_ALLOC_INPUT_RING_TYPE_RX, &softc->rx_rings[i]);
+		bnxt_set_db_mask(softc, &softc->rx_rings[i],
+				HWRM_RING_ALLOC_INPUT_RING_TYPE_RX);
 		if (rc)
 			goto fail;
 		softc->db_ops.bnxt_db_rx(&softc->rx_rings[i], 0);
@@ -2921,6 +3096,8 @@ skip_def_cp_ring:
 		rc = bnxt_hwrm_ring_alloc(softc,
 				HWRM_RING_ALLOC_INPUT_RING_TYPE_RX_AGG,
 				&softc->ag_rings[i]);
+		bnxt_set_db_mask(softc, &softc->ag_rings[i],
+				HWRM_RING_ALLOC_INPUT_RING_TYPE_RX_AGG);
 		if (rc)
 			goto fail;
 		softc->db_ops.bnxt_db_rx(&softc->ag_rings[i], 0);
@@ -2983,21 +3160,27 @@ skip_def_cp_ring:
 
 		/* Allocate the completion ring */
 		softc->tx_cp_rings[i].cons = UINT32_MAX;
+		softc->tx_cp_rings[i].raw_cons = UINT32_MAX;
 		softc->tx_cp_rings[i].v_bit = 1;
+		softc->tx_cp_rings[i].toggle = 0;
 		bnxt_mark_cpr_invalid(&softc->tx_cp_rings[i]);
 		rc = bnxt_hwrm_ring_alloc(softc,
 				HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL,
 				&softc->tx_cp_rings[i].ring);
+		bnxt_set_db_mask(softc, &softc->tx_cp_rings[i].ring,
+				HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL);
 		if (rc)
 			goto fail;
 
-		if (BNXT_CHIP_P5(softc))
+		if (BNXT_CHIP_P5_PLUS(softc))
 			softc->db_ops.bnxt_db_tx_cq(&softc->tx_cp_rings[i], 1);
 
 		/* Allocate the TX ring */
 		rc = bnxt_hwrm_ring_alloc(softc,
 				HWRM_RING_ALLOC_INPUT_RING_TYPE_TX,
 				&softc->tx_rings[i]);
+		bnxt_set_db_mask(softc, &softc->tx_rings[i],
+				HWRM_RING_ALLOC_INPUT_RING_TYPE_TX);
 		if (rc)
 			goto fail;
 		softc->db_ops.bnxt_db_tx(&softc->tx_rings[i], 0);
@@ -3568,25 +3751,35 @@ process_nq(struct bnxt_softc *softc, uint16_t nqid)
 {
 	struct bnxt_cp_ring *cpr = &softc->nq_rings[nqid];
 	nq_cn_t *cmp = (nq_cn_t *) cpr->ring.vaddr;
+	struct bnxt_cp_ring *tx_cpr = &softc->tx_cp_rings[nqid];
+	struct bnxt_cp_ring *rx_cpr = &softc->rx_cp_rings[nqid];
 	bool v_bit = cpr->v_bit;
 	uint32_t cons = cpr->cons;
+	uint32_t raw_cons = cpr->raw_cons;
 	uint16_t nq_type, nqe_cnt = 0;
 
 	while (1) {
-		if (!NQ_VALID(&cmp[cons], v_bit))
+		if (!NQ_VALID(&cmp[cons], v_bit)) {
 			goto done;
+		}
 
 		nq_type = NQ_CN_TYPE_MASK & cmp[cons].type;
 
-		if (nq_type != NQ_CN_TYPE_CQ_NOTIFICATION)
+		if (NQE_CN_TYPE(nq_type) != NQ_CN_TYPE_CQ_NOTIFICATION) {
 			 bnxt_process_async_msg(cpr, (tx_cmpl_t *)&cmp[cons]);
+		} else {
+			tx_cpr->toggle = NQE_CN_TOGGLE(cmp[cons].type);
+			rx_cpr->toggle = NQE_CN_TOGGLE(cmp[cons].type);
+		}
 
 		NEXT_CP_CONS_V(&cpr->ring, cons, v_bit);
+		raw_cons++;
 		nqe_cnt++;
 	}
 done:
 	if (nqe_cnt) {
 		cpr->cons = cons;
+		cpr->raw_cons = raw_cons;
 		cpr->v_bit = v_bit;
 	}
 }