git: 49f12d5b38f6 - stable/14 - e1000: Re-add AIM

From: Kevin Bowling <kbowling_at_FreeBSD.org>
Date: Fri, 18 Oct 2024 01:42:14 UTC
The branch stable/14 has been updated by kbowling:

URL: https://cgit.FreeBSD.org/src/commit/?id=49f12d5b38f6bbc66a81438b9bc7ed21af3be39c

commit 49f12d5b38f6bbc66a81438b9bc7ed21af3be39c
Author:     Kevin Bowling <kbowling@FreeBSD.org>
AuthorDate: 2024-09-22 09:26:05 +0000
Commit:     Kevin Bowling <kbowling@FreeBSD.org>
CommitDate: 2024-10-18 01:41:56 +0000

    e1000: Re-add AIM
    
    We originally left this out because iflib modulates interrupts and
    accomplishes some level of batching versus the custom queues in the
    older driver. Upon more detailed study of the Linux driver which has a
    newer implementation, it finally became clear to me this is actually a
    holdoff timer and not an interrupt limit as it is conventionally
    (statically) programmed and displayed as an interrupt rate. The data
    sheets also make this somewhat clear.
    
    Thus, AIM accomplishes two beneficial things for a wide variety of
    workloads[1]:
    
    1. At low throughput/packet rates, it will significantly lower latency
    (by counter-intuitively "increasing" the interrupt rate.. better
    thought of as decreasing the holdoff timer because you will modulate
    down before coming anywhere near these interrupt rates).
    2. At bulk data rates, it is tuned to achieve a lower interrupt rate
    (by increasing the holdoff timer) than the current static 8000/s. This
    decreases processing overhead and yields more headroom for other work
    such as packet filters or userland.
    
    For a single NIC this might be worth a few sys% on common CPUs, but may
    be meaningful when multiplied such as if_lagg, if_bridge and forwarding
    setups.
    
    The AIM algorithm was re-introduced from the older igb or out of tree
    driver, and then modernized with permission to use Intel code from other
    drivers.
    
    I have retroactively added it to lem(4) and em(4) where the same concept
    applies, albeit to a single ITR register.
    
    [1]: http://iommu.com/datasheets/ethernet/controllers-nics/intel/e1000/gbe-controllers-interrupt-moderation-appl-note.pdf
    
    Tested by:      cc (https://wiki.freebsd.org/chengcui/testD46768)
    Relnotes:       yes
    Sponsored by:   Rubicon Communications, LLC ("Netgate")
    Sponsored by:   BBOX.io
    Differential Revision:  https://reviews.freebsd.org/D46768
    
    (cherry picked from commit 3e501ef896671cb190e8c40c6258b8f27d136f07)
---
 sys/dev/e1000/em_txrx.c  |  10 ++
 sys/dev/e1000/if_em.c    | 269 ++++++++++++++++++++++++++++++++++++++++++++++-
 sys/dev/e1000/if_em.h    |  28 +++--
 sys/dev/e1000/igb_txrx.c |   4 +
 4 files changed, 299 insertions(+), 12 deletions(-)

diff --git a/sys/dev/e1000/em_txrx.c b/sys/dev/e1000/em_txrx.c
index eec198df7466..6e8fff07cd82 100644
--- a/sys/dev/e1000/em_txrx.c
+++ b/sys/dev/e1000/em_txrx.c
@@ -455,6 +455,10 @@ em_isc_txd_encap(void *arg, if_pkt_info_t pi)
 	    "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i);
 	pi->ipi_new_pidx = i;
 
+	/* Sent data accounting for AIM */
+	txr->tx_bytes += pi->ipi_len;
+	++txr->tx_packets;
+
 	return (0);
 }
 
@@ -669,6 +673,7 @@ lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
 
 		len = le16toh(rxd->length);
 		ri->iri_len += len;
+		rxr->rx_bytes += ri->iri_len;
 
 		eop = (status & E1000_RXD_STAT_EOP) != 0;
 
@@ -690,6 +695,8 @@ lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
 		i++;
 	} while (!eop);
 
+	rxr->rx_packets++;
+
 	if (scctx->isc_capenable & IFCAP_RXCSUM)
 		em_receive_checksum(status, errors, ri);
 
@@ -732,6 +739,7 @@ em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
 
 		len = le16toh(rxd->wb.upper.length);
 		ri->iri_len += len;
+		rxr->rx_bytes += ri->iri_len;
 
 		eop = (staterr & E1000_RXD_STAT_EOP) != 0;
 
@@ -752,6 +760,8 @@ em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
 		i++;
 	} while (!eop);
 
+	rxr->rx_packets++;
+
 	if (scctx->isc_capenable & IFCAP_RXCSUM)
 		em_receive_checksum(staterr, staterr >> 24, ri);
 
diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c
index b9c6131e6741..22578a3d8655 100644
--- a/sys/dev/e1000/if_em.c
+++ b/sys/dev/e1000/if_em.c
@@ -1,8 +1,9 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
+ * Copyright (c) 2001-2024, Intel Corporation
  * Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
- * All rights reserved.
+ * Copyright (c) 2024 Kevin Bowling <kbowling@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -329,10 +330,12 @@ static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
 static int	em_get_rs(SYSCTL_HANDLER_ARGS);
 static void	em_print_debug_info(struct e1000_softc *);
 static int 	em_is_valid_ether_addr(u8 *);
+static void	em_newitr(struct e1000_softc *, struct em_rx_queue *,
+    struct tx_ring *, struct rx_ring *);
 static bool	em_automask_tso(if_ctx_t);
 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
 static void	em_add_int_delay_sysctl(struct e1000_softc *, const char *,
-		    const char *, struct em_int_delay_info *, int, int);
+    const char *, struct em_int_delay_info *, int, int);
 /* Management and WOL Support */
 static void	em_init_manageability(struct e1000_softc *);
 static void	em_release_manageability(struct e1000_softc *);
@@ -545,10 +548,19 @@ static int eee_setting = 1;
 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
     "Enable Energy Efficient Ethernet");
 
+/*
+ * AIM: Adaptive Interrupt Moderation
+ * which means that the interrupt rate is varied over time based on the
+ * traffic for that interrupt vector
+ */
+static int em_enable_aim = 1;
+SYSCTL_INT(_hw_em, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &em_enable_aim,
+    0, "Enable adaptive interrupt moderation (1=normal, 2=lowlatency)");
+
 /*
 ** Tuneable Interrupt rate
 */
-static int em_max_interrupt_rate = EM_INTS_PER_SEC;
+static int em_max_interrupt_rate = EM_INTS_DEFAULT;
 SYSCTL_INT(_hw_em, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
     &em_max_interrupt_rate, 0, "Maximum interrupts per second");
 
@@ -832,6 +844,11 @@ em_if_attach_pre(if_ctx_t ctx)
 	    CTLTYPE_INT | CTLFLAG_RW, sc, 0,
 	    em_sysctl_nvm_info, "I", "NVM Information");
 
+	sc->enable_aim = em_enable_aim;
+	SYSCTL_ADD_INT(ctx_list, child, OID_AUTO, "enable_aim",
+	    CTLFLAG_RW, &sc->enable_aim, 0,
+		"Interrupt Moderation (1=normal, 2=lowlatency)");
+
 	SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "fw_version",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    em_sysctl_print_fw_version, "A",
@@ -1437,6 +1454,159 @@ em_if_init(if_ctx_t ctx)
 	}
 }
 
+enum itr_latency_target {
+	itr_latency_disabled = 0,
+	itr_latency_lowest = 1,
+	itr_latency_low = 2,
+	itr_latency_bulk = 3
+};
+/*********************************************************************
+ *
+ *  Helper to calculate next (E)ITR value for AIM
+ *
+ *********************************************************************/
+static void
+em_newitr(struct e1000_softc *sc, struct em_rx_queue *que,
+    struct tx_ring *txr, struct rx_ring *rxr)
+{
+	struct e1000_hw *hw = &sc->hw;
+	u32 newitr;
+	u32 bytes;
+	u32 bytes_packets;
+	u32 packets;
+	u8 nextlatency;
+
+	/* Idle, do nothing */
+	if ((txr->tx_bytes == 0) && (rxr->rx_bytes == 0))
+		return;
+
+	newitr = 0;
+
+	if (sc->enable_aim) {
+		nextlatency = rxr->rx_nextlatency;
+
+		/* Use half default (4K) ITR if sub-gig */
+		if (sc->link_speed != 1000) {
+			newitr = EM_INTS_4K;
+			goto em_set_next_itr;
+		}
+		/* Want at least enough packet buffer for two frames to AIM */
+		if (sc->shared->isc_max_frame_size * 2 > (sc->pba << 10)) {
+			newitr = em_max_interrupt_rate;
+			sc->enable_aim = 0;
+			goto em_set_next_itr;
+		}
+
+		/* Get the largest values from the associated tx and rx ring */
+		if (txr->tx_bytes && txr->tx_packets) {
+			bytes = txr->tx_bytes;
+			bytes_packets = txr->tx_bytes/txr->tx_packets;
+			packets = txr->tx_packets;
+		}
+		if (rxr->rx_bytes && rxr->rx_packets) {
+			bytes = max(bytes, rxr->rx_bytes);
+			bytes_packets = max(bytes_packets, rxr->rx_bytes/rxr->rx_packets);
+			packets = max(packets, rxr->rx_packets);
+		}
+
+		/* Latency state machine */
+		switch (nextlatency) {
+		case itr_latency_disabled: /* Bootstrapping */
+			nextlatency = itr_latency_low;
+			break;
+		case itr_latency_lowest: /* 70k ints/s */
+			/* TSO and jumbo frames */
+			if (bytes_packets > 8000)
+				nextlatency = itr_latency_bulk;
+			else if ((packets < 5) && (bytes > 512))
+				nextlatency = itr_latency_low;
+			break;
+		case itr_latency_low: /* 20k ints/s */
+			if (bytes > 10000) {
+				/* Handle TSO */
+				if (bytes_packets > 8000)
+					nextlatency = itr_latency_bulk;
+				else if ((packets < 10) || (bytes_packets > 1200))
+					nextlatency = itr_latency_bulk;
+				else if (packets > 35)
+					nextlatency = itr_latency_lowest;
+			} else if (bytes_packets > 2000) {
+				nextlatency = itr_latency_bulk;
+			} else if (packets < 3 && bytes < 512) {
+				nextlatency = itr_latency_lowest;
+			}
+			break;
+		case itr_latency_bulk: /* 4k ints/s */
+			if (bytes > 25000) {
+				if (packets > 35)
+					nextlatency = itr_latency_low;
+			} else if (bytes < 1500)
+				nextlatency = itr_latency_low;
+			break;
+		default:
+			nextlatency = itr_latency_low;
+			device_printf(sc->dev, "Unexpected newitr transition %d\n",
+			    nextlatency);
+			break;
+		}
+
+		/* Trim itr_latency_lowest for default AIM setting */
+		if (sc->enable_aim == 1 && nextlatency == itr_latency_lowest)
+			nextlatency = itr_latency_low;
+
+		/* Request new latency */
+		rxr->rx_nextlatency = nextlatency;
+	} else {
+		/* We may have toggled to AIM disabled */
+		nextlatency = itr_latency_disabled;
+		rxr->rx_nextlatency = nextlatency;
+	}
+
+	/* ITR state machine */
+	switch(nextlatency) {
+	case itr_latency_lowest:
+		newitr = EM_INTS_70K;
+		break;
+	case itr_latency_low:
+		newitr = EM_INTS_20K;
+		break;
+	case itr_latency_bulk:
+		newitr = EM_INTS_4K;
+		break;
+	case itr_latency_disabled:
+	default:
+		newitr = em_max_interrupt_rate;
+		break;
+	}
+
+em_set_next_itr:
+	if (hw->mac.type >= igb_mac_min) {
+		newitr = IGB_INTS_TO_EITR(newitr);
+
+		if (hw->mac.type == e1000_82575)
+			newitr |= newitr << 16;
+		else
+			newitr |= E1000_EITR_CNT_IGNR;
+
+		if (newitr != que->itr_setting) {
+			que->itr_setting = newitr;
+			E1000_WRITE_REG(hw, E1000_EITR(que->msix), que->itr_setting);
+		}
+	} else {
+		newitr = EM_INTS_TO_ITR(newitr);
+
+		if (newitr != que->itr_setting) {
+			que->itr_setting = newitr;
+			if (hw->mac.type == e1000_82574 && que->msix) {
+				E1000_WRITE_REG(hw,
+				    E1000_EITR_82574(que->msix), que->itr_setting);
+			} else {
+				E1000_WRITE_REG(hw, E1000_ITR, que->itr_setting);
+			}
+		}
+	}
+}
+
 /*********************************************************************
  *
  *  Fast Legacy/MSI Combined Interrupt Service routine
@@ -1446,10 +1616,14 @@ int
 em_intr(void *arg)
 {
 	struct e1000_softc *sc = arg;
+	struct e1000_hw *hw = &sc->hw;
+	struct em_rx_queue *que = &sc->rx_queues[0];
+	struct tx_ring *txr = &sc->tx_queues[0].txr;
+	struct rx_ring *rxr = &que->rxr;
 	if_ctx_t ctx = sc->ctx;
 	u32 reg_icr;
 
-	reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR);
+	reg_icr = E1000_READ_REG(hw, E1000_ICR);
 
 	/* Hot eject? */
 	if (reg_icr == 0xffffffff)
@@ -1463,7 +1637,7 @@ em_intr(void *arg)
 	 * Starting with the 82571 chip, bit 31 should be used to
 	 * determine whether the interrupt belongs to us.
 	 */
-	if (sc->hw.mac.type >= e1000_82571 &&
+	if (hw->mac.type >= e1000_82571 &&
 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
 		return FILTER_STRAY;
 
@@ -1482,6 +1656,15 @@ em_intr(void *arg)
 	if (reg_icr & E1000_ICR_RXO)
 		sc->rx_overruns++;
 
+	if (hw->mac.type >= e1000_82540)
+		em_newitr(sc, que, txr, rxr);
+
+	/* Reset state */
+	txr->tx_bytes = 0;
+	txr->tx_packets = 0;
+	rxr->rx_bytes = 0;
+	rxr->rx_packets = 0;
+
 	return (FILTER_SCHEDULE_THREAD);
 }
 
@@ -1534,9 +1717,20 @@ static int
 em_msix_que(void *arg)
 {
 	struct em_rx_queue *que = arg;
+	struct e1000_softc *sc = que->sc;
+	struct tx_ring *txr = &sc->tx_queues[que->msix].txr;
+	struct rx_ring *rxr = &que->rxr;
 
 	++que->irqs;
 
+	em_newitr(sc, que, txr, rxr);
+
+	/* Reset state */
+	txr->tx_bytes = 0;
+	txr->tx_packets = 0;
+	rxr->rx_bytes = 0;
+	rxr->rx_packets = 0;
+
 	return (FILTER_SCHEDULE_THREAD);
 }
 
@@ -2882,6 +3076,9 @@ em_reset(if_ctx_t ctx)
 	if (hw->mac.type >= igb_mac_min)
 		igb_init_dmac(sc, pba);
 
+	/* Save the final PBA off if it needs to be used elsewhere i.e. AIM */
+	sc->pba = pba;
+
 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
 	e1000_get_phy_info(hw);
 	e1000_check_for_link(hw);
@@ -3741,6 +3938,7 @@ em_if_intr_enable(if_ctx_t ctx)
 		E1000_WRITE_REG(hw, EM_EIAC, sc->ims);
 		ims_mask |= sc->ims;
 	}
+
 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
 	E1000_WRITE_FLUSH(hw);
 }
@@ -4410,6 +4608,57 @@ em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
 	return (sysctl_handle_int(oidp, &val, 0, req));
 }
 
+/* Per queue holdoff interrupt rate handler */
+static int
+em_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
+{
+	struct em_rx_queue *rque;
+	struct em_tx_queue *tque;
+	struct e1000_hw *hw;
+	int error;
+	u32 reg, usec, rate;
+
+	bool tx = oidp->oid_arg2;
+
+	if (tx) {
+		tque = oidp->oid_arg1;
+		hw = &tque->sc->hw;
+		if (hw->mac.type >= igb_mac_min)
+			reg = E1000_READ_REG(hw, E1000_EITR(tque->me));
+		else if (hw->mac.type == e1000_82574 && tque->msix)
+			reg = E1000_READ_REG(hw, E1000_EITR_82574(tque->me));
+		else
+			reg = E1000_READ_REG(hw, E1000_ITR);
+	} else {
+		rque = oidp->oid_arg1;
+		hw = &rque->sc->hw;
+		if (hw->mac.type >= igb_mac_min)
+			reg = E1000_READ_REG(hw, E1000_EITR(rque->msix));
+		else if (hw->mac.type == e1000_82574 && rque->msix)
+			reg = E1000_READ_REG(hw, E1000_EITR_82574(rque->msix));
+		else
+			reg = E1000_READ_REG(hw, E1000_ITR);
+	}
+
+	if (hw->mac.type < igb_mac_min) {
+		if (reg > 0)
+			rate = EM_INTS_TO_ITR(reg);
+		else
+			rate = 0;
+	} else {
+		usec = (reg & IGB_QVECTOR_MASK);
+		if (usec > 0)
+			rate = IGB_INTS_TO_EITR(usec);
+		else
+			rate = 0;
+	}
+
+	error = sysctl_handle_int(oidp, &rate, 0, req);
+	if (error || !req->newptr)
+		return error;
+	return 0;
+}
+
 /*
  * Add sysctl variables, one per statistic, to the system.
  */
@@ -4466,6 +4715,11 @@ em_add_hw_stats(struct e1000_softc *sc)
 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
+		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
+		    CTLTYPE_UINT | CTLFLAG_RD, tx_que,
+		    true, em_sysctl_interrupt_rate_handler,
+		    "IU", "Interrupt Rate");
+
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
 		    CTLTYPE_UINT | CTLFLAG_RD, sc,
 		    E1000_TDH(txr->me), em_sysctl_reg_handler, "IU",
@@ -4486,6 +4740,11 @@ em_add_hw_stats(struct e1000_softc *sc)
 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
+		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
+		    CTLTYPE_UINT | CTLFLAG_RD, rx_que,
+		    false, em_sysctl_interrupt_rate_handler,
+		    "IU", "Interrupt Rate");
+
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
 		    CTLTYPE_UINT | CTLFLAG_RD, sc,
 		    E1000_RDH(rxr->me), em_sysctl_reg_handler, "IU",
diff --git a/sys/dev/e1000/if_em.h b/sys/dev/e1000/if_em.h
index 7219dc57c333..52bfed0f9a42 100644
--- a/sys/dev/e1000/if_em.h
+++ b/sys/dev/e1000/if_em.h
@@ -1,8 +1,9 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
+ * Copyright (c) 2001-2024, Intel Corporation
  * Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
- * All rights reserved.
+ * Copyright (c) 2024 Kevin Bowling <kbowling@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -243,16 +244,19 @@
 /* Support AutoMediaDetect for Marvell M88 PHY in i354 */
 #define IGB_MEDIA_RESET		(1 << 0)
 
-/* Define the starting Interrupt rate per Queue */
-#define EM_INTS_PER_SEC		8000
+/* Define the interrupt rates and ITR helpers */
+#define EM_INTS_4K		4000
+#define EM_INTS_20K		20000
+#define EM_INTS_70K		70000
+#define EM_INTS_DEFAULT		8000
 #define EM_INTS_MULTIPLIER	256
 #define EM_ITR_DIVIDEND		1000000000
 #define EM_INTS_TO_ITR(i)	(EM_ITR_DIVIDEND/(i * EM_INTS_MULTIPLIER))
-#define IGB_ITR_DIVIDEND	1000000
-#define IGB_ITR_SHIFT		2
+#define IGB_EITR_DIVIDEND	1000000
+#define IGB_EITR_SHIFT		2
 #define IGB_QVECTOR_MASK	0x7FFC
-#define IGB_INTS_TO_EITR(i)	(((IGB_ITR_DIVIDEND/i) & IGB_QVECTOR_MASK) << \
-				    IGB_ITR_SHIFT)
+#define IGB_INTS_TO_EITR(i)	(((IGB_EITR_DIVIDEND/i) & IGB_QVECTOR_MASK) << \
+				    IGB_EITR_SHIFT)
 
 #define IGB_LINK_ITR		2000
 #define I210_LINK_DELAY		1000
@@ -390,7 +394,11 @@ struct tx_ring {
 	/* Interrupt resources */
 	void			*tag;
 	struct resource		*res;
+
+	/* Soft stats */
 	unsigned long		tx_irq;
+	unsigned long		tx_packets;
+	unsigned long		tx_bytes;
 
 	/* Saved csum offloading context information */
 	int			csum_flags;
@@ -426,6 +434,9 @@ struct rx_ring {
 	unsigned long		rx_discarded;
 	unsigned long		rx_packets;
 	unsigned long		rx_bytes;
+
+	/* Next requested ITR latency */
+	u8			rx_nextlatency;
 };
 
 struct em_tx_queue {
@@ -441,6 +452,7 @@ struct em_rx_queue {
 	u32			me;
 	u32			msix;
 	u32			eims;
+	u32			itr_setting;
 	struct rx_ring		rxr;
 	u64			irqs;
 	struct if_irq		que_irq;
@@ -489,6 +501,7 @@ struct e1000_softc {
 
 	u32			rx_mbuf_sz;
 
+	int			enable_aim;
 	/* Management and WOL features */
 	u32			wol;
 	bool			has_manage;
@@ -512,6 +525,7 @@ struct e1000_softc {
 	u16			link_duplex;
 	u32			smartspeed;
 	u32			dmac;
+	u32			pba;
 	int			link_mask;
 	int			tso_automasked;
 
diff --git a/sys/dev/e1000/igb_txrx.c b/sys/dev/e1000/igb_txrx.c
index 2819150acba6..82cbb37ce4cd 100644
--- a/sys/dev/e1000/igb_txrx.c
+++ b/sys/dev/e1000/igb_txrx.c
@@ -292,6 +292,10 @@ igb_isc_txd_encap(void *arg, if_pkt_info_t pi)
 	txd->read.cmd_type_len |= htole32(E1000_TXD_CMD_EOP | txd_flags);
 	pi->ipi_new_pidx = i;
 
+	/* Sent data accounting for AIM */
+	txr->tx_bytes += pi->ipi_len;
+	++txr->tx_packets;
+
 	return (0);
 }