svn commit: r230572 - in head/sys/dev: ixgbe netmap
Luigi Rizzo
luigi at FreeBSD.org
Thu Jan 26 09:55:16 UTC 2012
Author: luigi
Date: Thu Jan 26 09:55:16 2012
New Revision: 230572
URL: http://svn.freebsd.org/changeset/base/230572
Log:
ixgbe changes:
- remove experimental code for disabling CRC
- use the correct constant for conversion between interrupt rate
and EITR values (the previous values were off by a factor of 2)
- make dev.ix.N.queueM.interrupt_rate a RW sysctl variable.
Changing individual values affects the queue immediately,
and propagates to all interfaces at the next reinit.
- add dev.ix.N.queueM.irqs rdonly sysctl, to export the actual
interrupt counts
Netmap-related changes for ixgbe:
- use the "new" format for TX descriptors in netmap mode.
- pass interrupt mitigation delays to the user process doing poll()
on a netmap file descriptor.
On the RX side this means we will not check the ring more than once
per interrupt. This gives the process a chance to sleep and process
packets in larger batches, thus reducing CPU usage.
On the TX side we take this even further: completed transmissions are
reclaimed every half ring even if the NIC interrupts more often.
This saves even more CPU without any additional tx delays.
Generic Netmap-related changes:
- align the netmap_kring to cache lines so that there is no false sharing
(possibly useful for multiqueue NICs and MSIX interrupts, which are
handled by different cores). It's a minor improvement but it does not
cost anything.
Reviewed by: Jack Vogel
Approved by: Jack Vogel
Modified:
head/sys/dev/ixgbe/ixgbe.c
head/sys/dev/netmap/ixgbe_netmap.h
head/sys/dev/netmap/netmap.c
head/sys/dev/netmap/netmap_kern.h
Modified: head/sys/dev/ixgbe/ixgbe.c
==============================================================================
--- head/sys/dev/ixgbe/ixgbe.c Thu Jan 26 09:45:14 2012 (r230571)
+++ head/sys/dev/ixgbe/ixgbe.c Thu Jan 26 09:55:16 2012 (r230572)
@@ -232,7 +232,7 @@ MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
static int ixgbe_enable_aim = TRUE;
TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
-static int ixgbe_max_interrupt_rate = (8000000 / IXGBE_LOW_LATENCY);
+static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
/* How many packets rxeof tries to clean at a time */
@@ -3385,22 +3385,41 @@ ixgbe_txeof(struct tx_ring *txr)
#ifdef DEV_NETMAP
if (ifp->if_capenable & IFCAP_NETMAP) {
struct netmap_adapter *na = NA(ifp);
+ struct netmap_kring *kring = &na->tx_rings[txr->me];
+ tx_desc = (struct ixgbe_legacy_tx_desc *)txr->tx_base;
+
+ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+ BUS_DMASYNC_POSTREAD);
/*
* In netmap mode, all the work is done in the context
* of the client thread. Interrupt handlers only wake up
* clients, which may be sleeping on individual rings
* or on a global resource for all rings.
+ * To implement tx interrupt mitigation, we wake up the client
+ * thread roughly every half ring, even if the NIC interrupts
+ * more frequently. This is implemented as follows:
+ * - ixgbe_txsync() sets kring->nr_kflags with the index of
+ * the slot that should wake up the thread (nkr_num_slots
+ * means the user thread should not be woken up);
+ * - the driver ignores tx interrupts unless netmap_mitigate=0
+ * or the slot has the DD bit set.
+ *
* When the driver has separate locks, we need to
* release and re-acquire txlock to avoid deadlocks.
* XXX see if we can find a better way.
*/
- selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
- IXGBE_TX_UNLOCK(txr);
- IXGBE_CORE_LOCK(adapter);
- selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET);
- IXGBE_CORE_UNLOCK(adapter);
- IXGBE_TX_LOCK(txr);
+ if (!netmap_mitigate ||
+ (kring->nr_kflags < kring->nkr_num_slots &&
+ tx_desc[kring->nr_kflags].upper.fields.status & IXGBE_TXD_STAT_DD)) {
+ kring->nr_kflags = kring->nkr_num_slots;
+ selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
+ IXGBE_TX_UNLOCK(txr);
+ IXGBE_CORE_LOCK(adapter);
+ selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET);
+ IXGBE_CORE_UNLOCK(adapter);
+ IXGBE_TX_LOCK(txr);
+ }
return FALSE;
}
#endif /* DEV_NETMAP */
@@ -3928,21 +3947,6 @@ skip_head:
lro->ifp = adapter->ifp;
}
-#ifdef DEV_NETMAP1 /* XXX experimental CRC strip */
- {
- struct ixgbe_hw *hw = &adapter->hw;
- u32 rdrxctl;
-
- rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
- rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
- if (slot)
- rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
- else
- rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
- rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
- IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
- }
-#endif /* DEV_NETMAP1 */
IXGBE_RX_UNLOCK(rxr);
return (0);
@@ -4022,12 +4026,6 @@ ixgbe_initialize_receive_units(struct ad
hlreg |= IXGBE_HLREG0_JUMBOEN;
else
hlreg &= ~IXGBE_HLREG0_JUMBOEN;
-#ifdef DEV_NETMAP1 /* XXX experimental CRCSTRIP */
- if (ifp->if_capenable & IFCAP_NETMAP)
- hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
- else
- hlreg |= IXGBE_HLREG0_RXCRCSTRP;
-#endif /* DEV_NETMAP1 */
IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
bufsz = (adapter->rx_mbuf_sz + BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
@@ -4297,11 +4295,14 @@ ixgbe_rxeof(struct ix_queue *que, int co
#ifdef DEV_NETMAP
if (ifp->if_capenable & IFCAP_NETMAP) {
/*
- * Same as the txeof routine, only wakeup clients
- * and make sure there are no deadlocks.
+ * Same as the txeof routine: only wakeup clients on intr.
+ * NKR_PENDINTR in nr_kflags is used to implement interrupt
+ * mitigation (ixgbe_rxsync() will not look for new packets
+ * unless NKR_PENDINTR is set).
*/
struct netmap_adapter *na = NA(ifp);
+ na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
IXGBE_RX_UNLOCK(rxr);
IXGBE_CORE_LOCK(adapter);
@@ -4830,7 +4831,7 @@ ixgbe_configure_ivars(struct adapter *ad
u32 newitr;
if (ixgbe_max_interrupt_rate > 0)
- newitr = (8000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
+ newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
else
newitr = 0;
@@ -5193,12 +5194,21 @@ ixgbe_sysctl_interrupt_rate_handler(SYSC
reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
usec = ((reg & 0x0FF8) >> 3);
if (usec > 0)
- rate = 1000000 / usec;
+ rate = 500000 / usec;
else
rate = 0;
error = sysctl_handle_int(oidp, &rate, 0, req);
if (error || !req->newptr)
return error;
+ reg &= ~0xfff; /* default, no limitation */
+ ixgbe_max_interrupt_rate = 0;
+ if (rate > 0 && rate < 500000) {
+ if (rate < 1000)
+ rate = 1000;
+ ixgbe_max_interrupt_rate = rate;
+ reg |= ((4000000/rate) & 0xff8 );
+ }
+ IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
return 0;
}
@@ -5252,10 +5262,13 @@ ixgbe_add_hw_stats(struct adapter *adapt
queue_list = SYSCTL_CHILDREN(queue_node);
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
- CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
+ CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i],
sizeof(&adapter->queues[i]),
ixgbe_sysctl_interrupt_rate_handler, "IU",
"Interrupt Rate");
+ SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
+ CTLFLAG_RD, &(adapter->queues[i].irqs),
+ "irqs on this queue");
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
ixgbe_sysctl_tdh_handler, "IU",
Modified: head/sys/dev/netmap/ixgbe_netmap.h
==============================================================================
--- head/sys/dev/netmap/ixgbe_netmap.h Thu Jan 26 09:45:14 2012 (r230571)
+++ head/sys/dev/netmap/ixgbe_netmap.h Thu Jan 26 09:55:16 2012 (r230572)
@@ -191,6 +191,10 @@ fail:
* (this is also true for every use of ring in the kernel).
*
* ring->avail is never used, only checked for bogus values.
+ *
+ * do_lock is set iff the function is called from the ioctl handler.
+ * In this case, grab a lock around the body, and also reclaim transmitted
+ * buffers irrespective of interrupt mitigation.
*/
static int
ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock)
@@ -292,10 +296,11 @@ ring_reset:
* need this.
*/
curr->read.buffer_addr = htole64(paddr);
- curr->read.olinfo_status = 0;
+ curr->read.olinfo_status = htole32(len << IXGBE_ADVTXD_PAYLEN_SHIFT);
curr->read.cmd_type_len =
htole32(txr->txd_cmd | len |
(IXGBE_ADVTXD_DTYP_DATA |
+ IXGBE_ADVTXD_DCMD_DEXT |
IXGBE_ADVTXD_DCMD_IFCS |
IXGBE_TXD_CMD_EOP | flags) );
/* If the buffer has changed, unload and reload map
@@ -328,15 +333,41 @@ ring_reset:
}
/*
- * If no packets are sent, or there is no room in the tx ring,
- * Check whether there are completed transmissions.
- * Because this is expensive (we need a register etc.)
- * we only do it if absolutely necessary, i.e. there is no room
- * in the tx ring, or where were no completed transmissions
- * (meaning that probably the caller really wanted to check
- * for completed transmissions).
+ * Reclaim buffers for completed transmissions.
+ * Because this is expensive (we read a NIC register etc.)
+ * we only do it in specific cases (see below).
+ * In all cases kring->nr_kflags indicates which slot will be
+ * checked upon a tx interrupt (nkr_num_slots means none).
*/
- if (n == 0 || kring->nr_hwavail < 1) {
+ if (do_lock) {
+ j = 1; /* forced reclaim, ignore interrupts */
+ kring->nr_kflags = kring->nkr_num_slots;
+ } else if (kring->nr_hwavail > 0) {
+ j = 0; /* buffers still available: no reclaim, ignore intr. */
+ kring->nr_kflags = kring->nkr_num_slots;
+ } else {
+ /*
+ * no buffers available, locate a slot for which we request
+ * ReportStatus (approximately half ring after next_to_clean)
+ * and record it in kring->nr_kflags.
+ * If the slot has DD set, do the reclaim looking at TDH,
+ * otherwise we go to sleep (in netmap_poll()) and will be
+ * woken up when slot nr_kflags will be ready.
+ */
+ struct ixgbe_legacy_tx_desc *txd = (struct ixgbe_legacy_tx_desc *)txr->tx_base;
+
+ j = txr->next_to_clean + kring->nkr_num_slots/2;
+ if (j >= kring->nkr_num_slots)
+ j -= kring->nkr_num_slots;
+ // round to the closest with dd set
+ j= (j < kring->nkr_num_slots / 4 || j >= kring->nkr_num_slots*3/4) ?
+ 0 : report_frequency;
+ kring->nr_kflags = j; /* the slot to check */
+ j = txd[j].upper.fields.status & IXGBE_TXD_STAT_DD;
+ }
+ if (!j) {
+ netmap_skip_txsync++;
+ } else {
int delta;
/*
@@ -391,6 +422,8 @@ ring_reset:
* We must subtract the newly consumed slots (cur - nr_hwcur)
* from nr_hwavail, make the descriptors available for the next reads,
* and set kring->nr_hwcur = ring->cur and ring->avail = kring->nr_hwavail.
+ *
+ * do_lock has a special meaning: please refer to txsync.
*/
static int
ixgbe_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
@@ -401,6 +434,7 @@ ixgbe_netmap_rxsync(void *a, u_int ring_
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
int j, k, l, n, lim = kring->nkr_num_slots - 1;
+ int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR;
k = ring->cur; /* cache and check value, same as in txsync */
n = k - kring->nr_hwcur;
@@ -437,6 +471,7 @@ ixgbe_netmap_rxsync(void *a, u_int ring_
if (j > lim)
j -= lim + 1;
+ if (force_update) {
for (n = 0; ; n++) {
union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l];
uint32_t staterr = le32toh(curr->wb.upper.status_error);
@@ -453,6 +488,8 @@ ixgbe_netmap_rxsync(void *a, u_int ring_
rxr->next_to_check = l;
kring->nr_hwavail += n;
}
+ kring->nr_kflags &= ~NKR_PENDINTR;
+ }
/*
* Skip past packets that userspace has already processed
Modified: head/sys/dev/netmap/netmap.c
==============================================================================
--- head/sys/dev/netmap/netmap.c Thu Jan 26 09:45:14 2012 (r230571)
+++ head/sys/dev/netmap/netmap.c Thu Jan 26 09:55:16 2012 (r230572)
@@ -146,6 +146,12 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, total_
CTLFLAG_RD, &nm_buf_pool.total_buffers, 0, "total_buffers");
SYSCTL_INT(_dev_netmap, OID_AUTO, free_buffers,
CTLFLAG_RD, &nm_buf_pool.free, 0, "free_buffers");
+int netmap_mitigate = 1;
+SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, "");
+int netmap_skip_txsync;
+SYSCTL_INT(_dev_netmap, OID_AUTO, skip_txsync, CTLFLAG_RW, &netmap_skip_txsync, 0, "");
+int netmap_skip_rxsync;
+SYSCTL_INT(_dev_netmap, OID_AUTO, skip_rxsync, CTLFLAG_RW, &netmap_skip_rxsync, 0, "");
/*
* Allocate n buffers from the ring, and fill the slot.
Modified: head/sys/dev/netmap/netmap_kern.h
==============================================================================
--- head/sys/dev/netmap/netmap_kern.h Thu Jan 26 09:45:14 2012 (r230571)
+++ head/sys/dev/netmap/netmap_kern.h Thu Jan 26 09:55:16 2012 (r230572)
@@ -65,13 +65,14 @@ struct netmap_kring {
struct netmap_ring *ring;
u_int nr_hwcur;
int nr_hwavail;
- u_int nr_kflags;
+ u_int nr_kflags; /* private driver flags */
+#define NKR_PENDINTR 0x1 // Pending interrupt.
u_int nkr_num_slots;
int nkr_hwofs; /* offset between NIC and netmap ring */
struct netmap_adapter *na; // debugging
struct selinfo si; /* poll/select wait queue */
-};
+} __attribute__((__aligned__(64)));
/*
* This struct is part of and extends the 'struct adapter' (or
@@ -171,6 +172,8 @@ struct netmap_slot *netmap_reset(struct
enum txrx tx, int n, u_int new_cur);
int netmap_ring_reinit(struct netmap_kring *);
+extern int netmap_mitigate;
+extern int netmap_skip_txsync, netmap_skip_rxsync;
extern u_int netmap_total_buffers;
extern char *netmap_buffer_base;
extern int netmap_verbose; // XXX debugging
More information about the svn-src-all
mailing list