svn commit: r289476 - head/sys/mips/atheros
Adrian Chadd
adrian at FreeBSD.org
Sun Oct 18 00:59:30 UTC 2015
Author: adrian
Date: Sun Oct 18 00:59:28 2015
New Revision: 289476
URL: https://svnweb.freebsd.org/changeset/base/289476
Log:
if_arge: fix up TX workaround; add TX/RX requirements for busdma; add stats
The early ethernet MACs (I think AR71xx and AR913x) require that both
TX and RX require 4-byte alignment for all packets.
The later MACs have started relaxing the requirements.
For now, the 1-byte TX and 1-byte RX alignment requirements are only for
the QCA955x SoCs. I'll add in the relaxed requirements as I review the
datasheets and do testing.
* Add a hardware flags field and 1-byte / 4-byte TX/RX alignment.
* .. defaulting to 4-byte TX and 4-byte RX alignment.
* Only enforce the TX alignment fixup if the hardware requires a 4-byte
TX alignment. This avoids a call to m_defrag().
* Add counters for various situations for further debugging.
* Set the 1-byte and 4-byte busdma alignment requirement when
the tag is created.
This improves the straight bridging performance from 130mbit/sec
to 180mbit/sec, purely by removing the need for TX path bounce buffers.
The main performance issue is the RX alignment requirement and any RX
bounce buffering that's occuring. (In a local test, removing the RX
fixup path and just aligning buffers raises the performance to above
400mbit/sec.
In theory it's a no-op for SoCs before the QCA955x.
Tested:
* QCA9558 SoC in AP135 board, using software bridging between arge0/arge1.
Modified:
head/sys/mips/atheros/if_arge.c
head/sys/mips/atheros/if_argevar.h
Modified: head/sys/mips/atheros/if_arge.c
==============================================================================
--- head/sys/mips/atheros/if_arge.c Sat Oct 17 22:41:30 2015 (r289475)
+++ head/sys/mips/atheros/if_arge.c Sun Oct 18 00:59:28 2015 (r289476)
@@ -298,6 +298,29 @@ arge_attach_sysctl(device_t dev)
"tx_pkts_unaligned", CTLFLAG_RW, &sc->stats.tx_pkts_unaligned,
0, "number of TX unaligned packets");
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "tx_pkts_unaligned_start", CTLFLAG_RW, &sc->stats.tx_pkts_unaligned_start,
+ 0, "number of TX unaligned packets (start)");
+
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "tx_pkts_unaligned_len", CTLFLAG_RW, &sc->stats.tx_pkts_unaligned_len,
+ 0, "number of TX unaligned packets (len)");
+
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "tx_pkts_nosegs", CTLFLAG_RW, &sc->stats.tx_pkts_nosegs,
+ 0, "number of TX packets fail with no ring slots avail");
+
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "intr_stray_filter", CTLFLAG_RW, &sc->stats.intr_stray,
+ 0, "number of stray interrupts (filter)");
+
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "intr_stray_intr", CTLFLAG_RW, &sc->stats.intr_stray2,
+ 0, "number of stray interrupts (intr)");
+
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "intr_ok", CTLFLAG_RW, &sc->stats.intr_ok,
+ 0, "number of OK interrupts");
#ifdef ARGE_DEBUG
SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "tx_prod",
CTLFLAG_RW, &sc->arge_cdata.arge_tx_prod, 0, "");
@@ -627,6 +650,22 @@ arge_attach(device_t dev)
}
/*
+ * Hardware workarounds.
+ */
+ switch (ar71xx_soc) {
+ case AR71XX_SOC_QCA9556:
+ case AR71XX_SOC_QCA9558:
+ /* Arbitrary alignment */
+ sc->arge_hw_flags |= ARGE_HW_FLG_TX_DESC_ALIGN_1BYTE;
+ sc->arge_hw_flags |= ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE;
+ break;
+ default:
+ sc->arge_hw_flags |= ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE;
+ sc->arge_hw_flags |= ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE;
+ break;
+ }
+
+ /*
* Some units (eg the TP-Link WR-1043ND) do not have a convenient
* EEPROM location to read the ethernet MAC address from.
* OpenWRT simply snaffles it from a fixed location.
@@ -825,6 +864,9 @@ arge_attach(device_t dev)
ARGE_WRITE(sc, AR71XX_MAC_FIFO_CFG0,
FIFO_CFG0_ALL << FIFO_CFG0_ENABLE_SHIFT);
+ /*
+ * SoC specific bits.
+ */
switch (ar71xx_soc) {
case AR71XX_SOC_AR7240:
case AR71XX_SOC_AR7241:
@@ -1351,24 +1393,35 @@ arge_init_locked(struct arge_softc *sc)
* Return whether the mbuf chain is correctly aligned
* for the arge TX engine.
*
- * The TX engine requires each fragment to be aligned to a
- * 4 byte boundary and the size of each fragment except
- * the last to be a multiple of 4 bytes.
+ * All the MACs have a length requirement: any non-final
+ * fragment (ie, descriptor with MORE bit set) needs to have
+ * a length divisible by 4.
*
- * XXX TODO: I believe this is only a bug on the AR71xx and
- * AR913x MACs. The later MACs (AR724x and later) does not
- * need this workaround.
+ * The AR71xx, AR913x require the start address also be
+ * DWORD aligned. The later MACs don't.
*/
static int
-arge_mbuf_chain_is_tx_aligned(struct mbuf *m0)
+arge_mbuf_chain_is_tx_aligned(struct arge_softc *sc, struct mbuf *m0)
{
struct mbuf *m;
for (m = m0; m != NULL; m = m->m_next) {
- if((mtod(m, intptr_t) & 3) != 0)
+ /*
+ * Only do this for chips that require it.
+ */
+ if ((sc->arge_hw_flags & ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE) &&
+ (mtod(m, intptr_t) & 3) != 0) {
+ sc->stats.tx_pkts_unaligned_start++;
return 0;
- if ((m->m_next != NULL) && ((m->m_len & 0x03) != 0))
+ }
+
+ /*
+ * All chips have this requirement for length.
+ */
+ if ((m->m_next != NULL) && ((m->m_len & 0x03) != 0)) {
+ sc->stats.tx_pkts_unaligned_len++;
return 0;
+ }
}
return 1;
}
@@ -1389,15 +1442,10 @@ arge_encap(struct arge_softc *sc, struct
ARGE_LOCK_ASSERT(sc);
/*
- * Fix mbuf chain, all fragments should be 4 bytes aligned and
- * even 4 bytes
- *
- * XXX TODO: I believe this is only a bug on the AR71xx and
- * AR913x MACs. The later MACs (AR724x and later) does not
- * need this workaround.
+ * Fix mbuf chain based on hardware alignment constraints.
*/
m = *m_head;
- if (! arge_mbuf_chain_is_tx_aligned(m)) {
+ if (! arge_mbuf_chain_is_tx_aligned(sc, m)) {
sc->stats.tx_pkts_unaligned++;
m = m_defrag(*m_head, M_NOWAIT);
if (m == NULL) {
@@ -1427,6 +1475,7 @@ arge_encap(struct arge_softc *sc, struct
/* Check number of available descriptors. */
if (sc->arge_cdata.arge_tx_cnt + nsegs >= (ARGE_TX_RING_COUNT - 1)) {
bus_dmamap_unload(sc->arge_cdata.arge_tx_tag, txd->tx_dmamap);
+ sc->stats.tx_pkts_nosegs++;
return (ENOBUFS);
}
@@ -1444,7 +1493,9 @@ arge_encap(struct arge_softc *sc, struct
desc = &sc->arge_rdata.arge_tx_ring[prod];
desc->packet_ctrl = ARGE_DMASIZE(txsegs[i].ds_len);
- if (txsegs[i].ds_addr & 3)
+ /* XXX Note: only relevant for older MACs; but check length! */
+ if ((sc->arge_hw_flags & ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE) &&
+ (txsegs[i].ds_addr & 3))
panic("TX packet address unaligned\n");
desc->packet_addr = txsegs[i].ds_addr;
@@ -1715,6 +1766,16 @@ arge_dma_alloc(struct arge_softc *sc)
struct arge_txdesc *txd;
struct arge_rxdesc *rxd;
int error, i;
+ int arge_tx_align, arge_rx_align;
+
+ /* Assume 4 byte alignment by default */
+ arge_tx_align = 4;
+ arge_rx_align = 4;
+
+ if (sc->arge_hw_flags & ARGE_HW_FLG_TX_DESC_ALIGN_1BYTE)
+ arge_tx_align = 1;
+ if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE)
+ arge_rx_align = 1;
/* Create parent DMA tag. */
error = bus_dma_tag_create(
@@ -1775,7 +1836,7 @@ arge_dma_alloc(struct arge_softc *sc)
/* Create tag for Tx buffers. */
error = bus_dma_tag_create(
sc->arge_cdata.arge_parent_tag, /* parent */
- sizeof(uint32_t), 0, /* alignment, boundary */
+ arge_tx_align, 0, /* alignment, boundary */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
@@ -1793,7 +1854,7 @@ arge_dma_alloc(struct arge_softc *sc)
/* Create tag for Rx buffers. */
error = bus_dma_tag_create(
sc->arge_cdata.arge_parent_tag, /* parent */
- ARGE_RX_ALIGN, 0, /* alignment, boundary */
+ arge_rx_align, 0, /* alignment, boundary */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
@@ -2108,6 +2169,11 @@ arge_newbuf(struct arge_softc *sc, int i
if (m == NULL)
return (ENOBUFS);
m->m_len = m->m_pkthdr.len = MCLBYTES;
+
+ /*
+ * Add extra space to "adjust" (copy) the packet back to be aligned
+ * for purposes of IPv4/IPv6 header contents.
+ */
m_adj(m, sizeof(uint64_t));
if (bus_dmamap_load_mbuf_sg(sc->arge_cdata.arge_rx_tag,
@@ -2126,7 +2192,8 @@ arge_newbuf(struct arge_softc *sc, int i
sc->arge_cdata.arge_rx_sparemap = map;
rxd->rx_m = m;
desc = rxd->desc;
- if (segs[0].ds_addr & 3)
+ if ((sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE) &&
+ segs[0].ds_addr & 3)
panic("RX packet address unaligned");
desc->packet_addr = segs[0].ds_addr;
desc->packet_ctrl = ARGE_DESC_EMPTY | ARGE_DMASIZE(segs[0].ds_len);
@@ -2331,10 +2398,12 @@ arge_intr_filter(void *arg)
if (status & DMA_INTR_ALL) {
sc->arge_intr_status |= status;
ARGE_WRITE(sc, AR71XX_DMA_INTR, 0);
+ sc->stats.intr_ok++;
return (FILTER_SCHEDULE_THREAD);
}
sc->arge_intr_status = 0;
+ sc->stats.intr_stray++;
return (FILTER_STRAY);
}
@@ -2355,8 +2424,10 @@ arge_intr(void *arg)
/*
* Is it our interrupt at all?
*/
- if (status == 0)
+ if (status == 0) {
+ sc->stats.intr_stray2++;
return;
+ }
if (status & DMA_INTR_RX_BUS_ERROR) {
ARGE_WRITE(sc, AR71XX_DMA_RX_STATUS, DMA_RX_STATUS_BUS_ERROR);
Modified: head/sys/mips/atheros/if_argevar.h
==============================================================================
--- head/sys/mips/atheros/if_argevar.h Sat Oct 17 22:41:30 2015 (r289475)
+++ head/sys/mips/atheros/if_argevar.h Sun Oct 18 00:59:28 2015 (r289476)
@@ -37,7 +37,10 @@
#define ARGE_TX_DMA_SIZE ARGE_TX_RING_COUNT * sizeof(struct arge_desc)
#define ARGE_MAXFRAGS 8
#define ARGE_RING_ALIGN sizeof(struct arge_desc)
-#define ARGE_RX_ALIGN sizeof(uint32_t)
+#define ARGE_RX_ALIGN_4BYTE sizeof(uint32_t)
+#define ARGE_RX_ALIGN_1BYTE sizeof(char)
+#define ARGE_TX_ALIGN_4BYTE sizeof(uint32_t)
+#define ARGE_TX_ALIGN_1BYTE sizeof(char)
#define ARGE_MAXFRAGS 8
#define ARGE_TX_RING_ADDR(sc, i) \
((sc)->arge_rdata.arge_tx_ring_paddr + sizeof(struct arge_desc) * (i))
@@ -149,6 +152,22 @@ struct arge_pll_data {
uint32_t pll_1000;
};
+/*
+ * Hardware specific behaviours.
+ */
+
+/*
+ * Older chips support 4 byte only transmit and receive
+ * addresses.
+ *
+ * Later chips support arbitrary TX and later later,
+ * arbitrary RX addresses.
+ */
+#define ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE 0x00000001
+#define ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE 0x00000002
+#define ARGE_HW_FLG_TX_DESC_ALIGN_1BYTE 0x00000004
+#define ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE 0x00000008
+
struct arge_softc {
struct ifnet *arge_ifp; /* interface info */
device_t arge_dev;
@@ -180,13 +199,20 @@ struct arge_softc {
uint32_t arge_intr_status;
int arge_mac_unit;
int arge_if_flags;
+ uint32_t arge_hw_flags;
uint32_t arge_debug;
uint32_t arge_mdiofreq;
struct {
uint32_t tx_pkts_unaligned;
+ uint32_t tx_pkts_unaligned_start;
+ uint32_t tx_pkts_unaligned_len;
+ uint32_t tx_pkts_nosegs;
uint32_t tx_pkts_aligned;
uint32_t rx_overflow;
uint32_t tx_underflow;
+ uint32_t intr_stray;
+ uint32_t intr_stray2;
+ uint32_t intr_ok;
} stats;
};
More information about the svn-src-all
mailing list