git: af2e102c4065 - main - rtwn: enable periodic TX reporting support on RTL8188EU NICs.

From: Adrian Chadd <adrian_at_FreeBSD.org>
Date: Tue, 31 Dec 2024 19:13:32 UTC
The branch main has been updated by adrian:

URL: https://cgit.FreeBSD.org/src/commit/?id=af2e102c40652156c89029177da7961165ffe4cc

commit af2e102c40652156c89029177da7961165ffe4cc
Author:     Adrian Chadd <adrian@FreeBSD.org>
AuthorDate: 2024-12-15 04:38:18 +0000
Commit:     Adrian Chadd <adrian@FreeBSD.org>
CommitDate: 2024-12-31 19:11:33 +0000

    rtwn: enable periodic TX reporting support on RTL8188EU NICs.
    
    The RTL8188E firmware doesn't have the "full" offload firmware
    rate control.  Instead, the vendor driver has a bunch of logic
    in the driver for rate probing and selection.
    
    Part of this is the periodic TX report - which uploads a summary
    of multi-rate retries and drops per MAC.  Using it drastically
    cuts down on the TX notifications - it's fired from a timer
    (defaulting to ~ 1.6 seconds) and is a single receive frame in
    the normal bulk RX path.
    
    I've not ported / reimplemented the whole vendor driver rate adaption
    code - instead, I'm just using the normal net80211 rate control APIs.
    It seems to behave OK - I get 25-30mbit down and 20mbit up using TCP/
    speedtest.
    
    Locally tested:
    
    * RTL8188EU, STA mode
    
    Differential Revision:   https://reviews.freebsd.org/D48088
    Reviewed by:    fuz, bz
    Obtained from:  https://github.com/lwfinger/rtl8188eu/blob/master/hal/Hal8188ERateAdaptive.c
---
 sys/dev/rtwn/if_rtwnvar.h                |  1 +
 sys/dev/rtwn/rtl8188e/r88e.h             |  1 +
 sys/dev/rtwn/rtl8188e/r88e_rx.c          | 86 ++++++++++++++++++++++++++++++++
 sys/dev/rtwn/rtl8188e/r88e_rx_desc.h     | 14 ++++++
 sys/dev/rtwn/rtl8188e/usb/r88eu_attach.c |  4 +-
 sys/dev/rtwn/rtl8188e/usb/r88eu_init.c   | 15 +++++-
 sys/dev/rtwn/rtl8192c/r92c_tx.c          | 23 ++++++++-
 7 files changed, 141 insertions(+), 3 deletions(-)

diff --git a/sys/dev/rtwn/if_rtwnvar.h b/sys/dev/rtwn/if_rtwnvar.h
index 570403747392..3913526f8c3c 100644
--- a/sys/dev/rtwn/if_rtwnvar.h
+++ b/sys/dev/rtwn/if_rtwnvar.h
@@ -401,6 +401,7 @@ struct rtwn_softc {
 	uint16_t			rx_dma_size;
 
 	int				macid_limit;
+	int				macid_rpt2_max_num;
 	int				cam_entry_limit;
 	int				fwsize_limit;
 	int				temp_delta;
diff --git a/sys/dev/rtwn/rtl8188e/r88e.h b/sys/dev/rtwn/rtl8188e/r88e.h
index 488e6ea79d3f..6569b014a5c6 100644
--- a/sys/dev/rtwn/rtl8188e/r88e.h
+++ b/sys/dev/rtwn/rtl8188e/r88e.h
@@ -86,6 +86,7 @@ int8_t	r88e_get_rssi_cck(struct rtwn_softc *, void *);
 int8_t	r88e_get_rssi_ofdm(struct rtwn_softc *, void *);
 void	r88e_get_rx_stats(struct rtwn_softc *, struct ieee80211_rx_stats *,
 	    const void *, const void *);
+void	r88e_ratectl_tx_complete_periodic(struct rtwn_softc *, uint8_t *, int);
 
 /* r88e_tx.c */
 void	r88e_tx_enable_ampdu(void *, int);
diff --git a/sys/dev/rtwn/rtl8188e/r88e_rx.c b/sys/dev/rtwn/rtl8188e/r88e_rx.c
index 287869885b86..2ff0ee4dae00 100644
--- a/sys/dev/rtwn/rtl8188e/r88e_rx.c
+++ b/sys/dev/rtwn/rtl8188e/r88e_rx.c
@@ -232,3 +232,89 @@ r88e_get_rx_stats(struct rtwn_softc *sc, struct ieee80211_rx_stats *rxs,
 		rxs->c_band = IEEE80211_CHAN_2GHZ;
 	}
 }
+
+void
+r88e_ratectl_tx_complete_periodic(struct rtwn_softc *sc, uint8_t *buf,
+    int len)
+{
+	const struct r92c_rx_stat *rxs;
+	uint64_t mac_bitmap;
+	int macid;
+
+	if (len < sizeof(struct r92c_rx_stat))
+		return;
+
+	rxs = (const struct r92c_rx_stat *) buf;
+
+	/* Skip Rx descriptor. */
+	buf += sizeof(struct r92c_rx_stat);
+	len -= sizeof(struct r92c_rx_stat);
+
+	/*
+	 * Note: the valid macid bitmap is rx_desc[5] << 32 | rx_desc[4];
+	 * Note: rx_desc[5] is the TSF, which isn't valid for this report!
+	 */
+	mac_bitmap = ((uint64_t) le32toh(rxs->tsf_low) << 32)
+	    | le32toh(rxs->rxdw4);
+
+	RTWN_DPRINTF(sc, RTWN_DEBUG_RA,
+	    "%s: mac bitmap: 0x%lx\n", __func__, mac_bitmap);
+
+	/*
+	 * Note: the RX reports aren't sparse - invalid entries (ie,
+	 * the bitmap has the macid set to 0) are just populated
+	 * with random data.
+	 */
+	for (macid = 0; (macid < 64) && (macid < sc->macid_rpt2_max_num) &&
+	    (len >= sizeof(struct r88e_fw_c2h_txreport2_entry)); macid++) {
+		struct ieee80211_ratectl_tx_stats txs = { 0 };
+		const struct r88e_fw_c2h_txreport2_entry *rpt;
+		uint32_t ntotal, nsuccess, ndrop, nretry, nframes;
+
+		rpt = (const struct r88e_fw_c2h_txreport2_entry *) buf;
+		buf += sizeof(struct r88e_fw_c2h_txreport2_entry);
+		len -= sizeof(struct r88e_fw_c2h_txreport2_entry);
+
+		if ((mac_bitmap & (1UL << macid)) == 0)
+			continue;
+
+		txs.flags = IEEE80211_RATECTL_TX_STATS_NODE |
+			    IEEE80211_RATECTL_TX_STATS_RETRIES;
+
+		/* calculate all the various combinations of things */
+		nframes = le16toh(rpt->retry0);
+		ntotal = nframes + rpt->retry1 + rpt->retry2
+		    + rpt->retry3 + rpt->retry4 + rpt->drop;
+		/*
+		 * Note: sometimes this is zero or 1, but the retries
+		 * are all capped out at 255!  That means the frame
+		 * transmits are all failing.
+		 */
+		nsuccess = ntotal - rpt->drop;
+		ndrop = rpt->drop;
+		nretry = rpt->retry1 + rpt->retry2 + rpt->retry3
+		    + rpt->retry4;
+
+		txs.nretries = nretry + ndrop;
+		txs.nsuccess = nsuccess;
+		txs.nframes = ntotal;
+
+		RTWN_DPRINTF(sc, RTWN_DEBUG_RA,
+		    "%s: MAC %d rpt retries %d %d %d %d %d, "
+		    "drop %d\n",
+		    __func__,
+		    macid,
+		    le16toh(rpt->retry0),
+		    rpt->retry1,
+		    rpt->retry2,
+		    rpt->retry3,
+		    rpt->retry4,
+		    rpt->drop);
+		if (sc->node_list[macid] != NULL) {
+			struct ieee80211_node *ni;
+			ni = sc->node_list[macid];
+			txs.ni = ni;
+			ieee80211_ratectl_tx_update(ni->ni_vap, &txs);
+		}
+	}
+}
diff --git a/sys/dev/rtwn/rtl8188e/r88e_rx_desc.h b/sys/dev/rtwn/rtl8188e/r88e_rx_desc.h
index f3e1a3c1b9bc..59e885eb4821 100644
--- a/sys/dev/rtwn/rtl8188e/r88e_rx_desc.h
+++ b/sys/dev/rtwn/rtl8188e/r88e_rx_desc.h
@@ -81,6 +81,20 @@ struct r88e_tx_rpt_ccx {
 	uint8_t		rptb7;
 } __packed;
 
+/*
+ * The 8188E periodic TX report entries
+ * (type 2 report.)
+ */
+struct r88e_fw_c2h_txreport2_entry {
+	uint16_t retry0;
+	uint8_t retry1;
+	uint8_t retry2;
+	uint8_t retry3;
+	uint8_t retry4;
+	uint8_t drop;
+	uint8_t reserved;
+} __packed;
+
 /* Interrupt message format. */
 /* XXX recheck */
 struct r88e_intr_msg {
diff --git a/sys/dev/rtwn/rtl8188e/usb/r88eu_attach.c b/sys/dev/rtwn/rtl8188e/usb/r88eu_attach.c
index 2d4713e92bd2..9ace2396d712 100644
--- a/sys/dev/rtwn/rtl8188e/usb/r88eu_attach.c
+++ b/sys/dev/rtwn/rtl8188e/usb/r88eu_attach.c
@@ -138,7 +138,7 @@ r88eu_attach(struct rtwn_usb_softc *uc)
 	sc->sc_get_rssi_ofdm		= r88e_get_rssi_ofdm;
 	sc->sc_classify_intr		= r88e_classify_intr;
 	sc->sc_handle_tx_report		= r88e_ratectl_tx_complete;
-	sc->sc_handle_tx_report2	= rtwn_nop_softc_uint8_int;
+	sc->sc_handle_tx_report2	= r88e_ratectl_tx_complete_periodic;
 	sc->sc_handle_c2h_report	= r88e_handle_c2h_report;
 	sc->sc_check_frame		= rtwn_nop_int_softc_mbuf;
 	sc->sc_rf_read			= r92c_rf_read;
@@ -212,6 +212,8 @@ r88eu_attach(struct rtwn_usb_softc *uc)
 	sc->rx_dma_size			= R88E_RX_DMA_BUFFER_SIZE;
 
 	sc->macid_limit			= R88E_MACID_MAX + 1;
+	/* XXX this limit may be expanded to R88E_MACID_MAX */
+	sc->macid_rpt2_max_num		= 2;
 	sc->cam_entry_limit		= R92C_CAM_ENTRY_COUNT;
 	sc->fwsize_limit		= R92C_MAX_FW_SIZE;
 	sc->temp_delta			= R88E_CALIB_THRESHOLD;
diff --git a/sys/dev/rtwn/rtl8188e/usb/r88eu_init.c b/sys/dev/rtwn/rtl8188e/usb/r88eu_init.c
index f4f936493cda..312e437958ec 100644
--- a/sys/dev/rtwn/rtl8188e/usb/r88eu_init.c
+++ b/sys/dev/rtwn/rtl8188e/usb/r88eu_init.c
@@ -279,9 +279,22 @@ void
 r88eu_post_init(struct rtwn_softc *sc)
 {
 
-	/* Enable per-packet TX report. */
+	/* Enable per-packet TX report (RPT1) */
 	rtwn_setbits_1(sc, R88E_TX_RPT_CTRL, 0, R88E_TX_RPT1_ENA);
 
+#ifndef RTWN_WITHOUT_UCODE
+	/* Enable timer report (RPT2) if requested */
+	if (sc->macid_rpt2_max_num > 0) {
+		rtwn_setbits_1(sc, R88E_TX_RPT_CTRL, 0,
+		    R88E_TX_RPT2_ENA);
+
+		/* Configure how many TX RPT2 entries to populate */
+		rtwn_write_1(sc, R88E_TX_RPT_MACID_MAX,
+		    sc->macid_rpt2_max_num);
+		/* Enable periodic TX report; 32uS units */
+		rtwn_write_2(sc, R88E_TX_RPT_TIME, 0xcdf0);
+	}
+#endif
 	/* Disable Tx if MACID is not associated. */
 	rtwn_write_4(sc, R88E_MACID_NO_LINK, 0xffffffff);
 	rtwn_write_4(sc, R88E_MACID_NO_LINK + 4, 0xffffffff);
diff --git a/sys/dev/rtwn/rtl8192c/r92c_tx.c b/sys/dev/rtwn/rtl8192c/r92c_tx.c
index c60081fc675c..07a6a184e924 100644
--- a/sys/dev/rtwn/rtl8192c/r92c_tx.c
+++ b/sys/dev/rtwn/rtl8192c/r92c_tx.c
@@ -236,6 +236,27 @@ r92c_calculate_tx_agg_window(struct rtwn_softc *sc,
 	return (wnd);
 }
 
+/*
+ * Check whether to enable the per-packet TX CCX report.
+ *
+ * For chipsets that do the RPT2 reports, enabling the TX
+ * CCX report results in the packet not being counted in
+ * the RPT2 counts.
+ */
+static bool
+r92c_check_enable_ccx_report(struct rtwn_softc *sc, int macid)
+{
+	if (sc->sc_ratectl != RTWN_RATECTL_NET80211)
+		return false;
+
+#ifndef RTWN_WITHOUT_UCODE
+	if ((sc->macid_rpt2_max_num != 0) &&
+	    (macid < sc->macid_rpt2_max_num))
+		return false;
+#endif
+	return true;
+}
+
 void
 r92c_fill_tx_desc(struct rtwn_softc *sc, struct ieee80211_node *ni,
     struct mbuf *m, void *buf, uint8_t ridx, int maxretry)
@@ -298,7 +319,7 @@ r92c_fill_tx_desc(struct rtwn_softc *sc, struct ieee80211_node *ni,
 				txd->txdw6 |= htole32(SM(R92C_TXDW6_MAX_AGG,
 				    r92c_calculate_tx_agg_window(sc, ni, tid)));
 			}
-			if (sc->sc_ratectl == RTWN_RATECTL_NET80211) {
+			if (r92c_check_enable_ccx_report(sc, macid)) {
 				txd->txdw2 |= htole32(R92C_TXDW2_CCX_RPT);
 				sc->sc_tx_n_active++;
 #ifndef RTWN_WITHOUT_UCODE