svn commit: r240718 - stable/9/sys/dev/ixgbe
Scott Long
scottl at FreeBSD.org
Thu Sep 20 01:23:55 UTC 2012
Author: scottl
Date: Thu Sep 20 01:23:54 2012
New Revision: 240718
URL: http://svn.freebsd.org/changeset/base/240718
Log:
Sync the ixgbe driver from HEAD to stable/9
r236627 - Fix driver deadlock due to OACTIVE flag
r236729 - Fix prefetch programming typo
r239940 - Improve small RX packet performance
r240155 - Fix missing braces in PHY configuration
r240366 - Remove a prefetch directive that hurts performance
Approved by: jfv
Obtained from: Netflix, inc.
Modified:
stable/9/sys/dev/ixgbe/ixgbe.c
stable/9/sys/dev/ixgbe/ixgbe.h
stable/9/sys/dev/ixgbe/ixgbe_osdep.h
Directory Properties:
stable/9/sys/dev/ixgbe/ (props changed)
Modified: stable/9/sys/dev/ixgbe/ixgbe.c
==============================================================================
--- stable/9/sys/dev/ixgbe/ixgbe.c Thu Sep 20 00:51:09 2012 (r240717)
+++ stable/9/sys/dev/ixgbe/ixgbe.c Thu Sep 20 01:23:54 2012 (r240718)
@@ -1145,7 +1145,7 @@ ixgbe_init_locked(struct adapter *adapte
* from the Intel linux driver 3.8.21.
* Prefetching enables tx line rate even with 1 queue.
*/
- txdctl |= (16 << 0) | (1 << 8);
+ txdctl |= (32 << 0) | (1 << 8);
IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
}
@@ -1390,7 +1390,7 @@ ixgbe_handle_que(void *context, int pend
ixgbe_start_locked(txr, ifp);
#endif
IXGBE_TX_UNLOCK(txr);
- if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
+ if (more) {
taskqueue_enqueue(que->tq, &que->que_task);
return;
}
@@ -3698,21 +3698,30 @@ no_split:
mp = rxbuf->m_pack;
mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
- /* Get the memory mapping */
- error = bus_dmamap_load_mbuf_sg(rxr->ptag,
- rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
- if (error != 0) {
- printf("Refresh mbufs: payload dmamap load"
- " failure - %d\n", error);
- m_free(mp);
- rxbuf->m_pack = NULL;
- goto update;
+
+ /* If we're dealing with an mbuf that was copied rather
+ * than replaced, there's no need to go through busdma.
+ */
+ if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
+ /* Get the memory mapping */
+ error = bus_dmamap_load_mbuf_sg(rxr->ptag,
+ rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
+ if (error != 0) {
+ printf("Refresh mbufs: payload dmamap load"
+ " failure - %d\n", error);
+ m_free(mp);
+ rxbuf->m_pack = NULL;
+ goto update;
+ }
+ rxbuf->m_pack = mp;
+ bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
+ BUS_DMASYNC_PREREAD);
+ rxbuf->paddr = rxr->rx_base[i].read.pkt_addr =
+ htole64(pseg[0].ds_addr);
+ } else {
+ rxr->rx_base[i].read.pkt_addr = rxbuf->paddr;
+ rxbuf->flags &= ~IXGBE_RX_COPY;
}
- rxbuf->m_pack = mp;
- bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
- BUS_DMASYNC_PREREAD);
- rxr->rx_base[i].read.pkt_addr =
- htole64(pseg[0].ds_addr);
refreshed = TRUE;
/* Next is precalculated */
@@ -4025,6 +4034,7 @@ skip_head:
rxr->next_to_refresh = 0;
rxr->lro_enabled = FALSE;
rxr->rx_split_packets = 0;
+ rxr->rx_copies = 0;
rxr->rx_bytes = 0;
rxr->discard = FALSE;
rxr->vtag_strip = FALSE;
@@ -4580,14 +4590,36 @@ ixgbe_rxeof(struct ix_queue *que, int co
** that determines what we are
*/
sendmp = rbuf->fmp;
- rbuf->m_pack = rbuf->fmp = NULL;
if (sendmp != NULL) { /* secondary frag */
+ rbuf->m_pack = rbuf->fmp = NULL;
mp->m_flags &= ~M_PKTHDR;
sendmp->m_pkthdr.len += mp->m_len;
} else {
+ /*
+ * Optimize. This might be a small packet,
+ * maybe just a TCP ACK. Do a fast copy that
+ * is cache aligned into a new mbuf, and
+ * leave the old mbuf+cluster for re-use.
+ */
+ if (eop && plen <= IXGBE_RX_COPY_LEN) {
+ sendmp = m_gethdr(M_DONTWAIT, MT_DATA);
+ if (sendmp != NULL) {
+ sendmp->m_data +=
+ IXGBE_RX_COPY_ALIGN;
+ ixgbe_bcopy(mp->m_data,
+ sendmp->m_data, plen);
+ sendmp->m_len = plen;
+ rxr->rx_copies++;
+ rbuf->flags |= IXGBE_RX_COPY;
+ }
+ }
+ if (sendmp == NULL) {
+ rbuf->m_pack = rbuf->fmp = NULL;
+ sendmp = mp;
+ }
+
/* first desc of a non-ps chain */
- sendmp = mp;
sendmp->m_flags |= M_PKTHDR;
sendmp->m_pkthdr.len = mp->m_len;
if (staterr & IXGBE_RXD_STAT_VP) {
@@ -5438,6 +5470,9 @@ ixgbe_add_hw_stats(struct adapter *adapt
SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
CTLFLAG_RD, &rxr->rx_bytes,
"Queue Bytes Received");
+ SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies",
+ CTLFLAG_RD, &rxr->rx_copies,
+ "Copied RX Frames");
SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
CTLFLAG_RD, &lro->lro_queued, 0,
"LRO Queued");
Modified: stable/9/sys/dev/ixgbe/ixgbe.h
==============================================================================
--- stable/9/sys/dev/ixgbe/ixgbe.h Thu Sep 20 00:51:09 2012 (r240717)
+++ stable/9/sys/dev/ixgbe/ixgbe.h Thu Sep 20 01:23:54 2012 (r240718)
@@ -154,6 +154,19 @@
#define IXGBE_FC_HI 0x20000
#define IXGBE_FC_LO 0x10000
+/*
+ * Used for optimizing small rx mbufs. Effort is made to keep the copy
+ * small and aligned for the CPU L1 cache.
+ *
+ * MHLEN is typically 168 bytes, giving us 8-byte alignment. Getting
+ * 32 byte alignment needed for the fast bcopy results in 8 bytes being
+ * wasted. Getting 64 byte alignment, which _should_ be ideal for
+ * modern Intel CPUs, results in 40 bytes wasted and a significant drop
+ * in observed efficiency of the optimization, 97.9% -> 81.8%.
+ */
+#define IXGBE_RX_COPY_LEN 160
+#define IXGBE_RX_COPY_ALIGN (MHLEN - IXGBE_RX_COPY_LEN)
+
/* Keep older OS drivers building... */
#if !defined(SYSCTL_ADD_UQUAD)
#define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD
@@ -245,6 +258,9 @@ struct ixgbe_rx_buf {
struct mbuf *fmp;
bus_dmamap_t hmap;
bus_dmamap_t pmap;
+ u_int flags;
+#define IXGBE_RX_COPY 0x01
+ uint64_t paddr;
};
/*
@@ -339,6 +355,7 @@ struct rx_ring {
/* Soft stats */
u64 rx_irq;
u64 rx_split_packets;
+ u64 rx_copies;
u64 rx_packets;
u64 rx_bytes;
u64 rx_discarded;
Modified: stable/9/sys/dev/ixgbe/ixgbe_osdep.h
==============================================================================
--- stable/9/sys/dev/ixgbe/ixgbe_osdep.h Thu Sep 20 00:51:09 2012 (r240717)
+++ stable/9/sys/dev/ixgbe/ixgbe_osdep.h Thu Sep 20 01:23:54 2012 (r240718)
@@ -143,6 +143,25 @@ void prefetch(void *x)
#define prefetch(x)
#endif
+/*
+ * Optimized bcopy thanks to Luigi Rizzo's investigative work. Assumes
+ * non-overlapping regions and 32-byte padding on both src and dst.
+ */
+static __inline int
+ixgbe_bcopy(void *_src, void *_dst, int l)
+{
+ uint64_t *src = _src;
+ uint64_t *dst = _dst;
+
+ for (; l > 0; l -= 32) {
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ }
+ return (0);
+}
+
struct ixgbe_osdep
{
bus_space_tag_t mem_bus_space_tag;
More information about the svn-src-stable-9
mailing list