ixgbe & if_igb RX ring locking
John Baldwin
jhb at freebsd.org
Mon Oct 15 13:04:30 UTC 2012
On Monday, October 15, 2012 10:10:40 am Alexander V. Chernikov wrote:
> On 13.10.2012 23:24, Jack Vogel wrote:
> > On Sat, Oct 13, 2012 at 11:22 AM, Luigi Rizzo<rizzo at iet.unipi.it> wrote:
>
> >>
> >> one option could be (same as it is done in the timer
> >> routine in dummynet) to build a list of all the packets
> >> that need to be sent to if_input(), and then call
> >> if_input with the entire list outside the lock.
> >>
> >> It would be even easier if we modify the various *_input()
> >> routines to handle a list of mbufs instead of just one.
>
> Bulk processing is generally a good idea we probably should implement.
> Probably starting from driver queue ending with marked mbufs
> (OURS/forward/legacy processing (appletalk and similar))?
>
> This can minimize an impact for all
> locks on RX side:
> L2
> * rx PFIL hook
> L3 (both IPv4 and IPv6)
> * global IF_ADDR_RLOCK (currently commented out)
> * Per-interface ADDR_RLOCK
> * PFIL hook
>
> From the first glance, there can be problems with:
> * Increased latency (we should have some kind of rx_process_limit), but
> still
> * reader locks being acquired for much longer amount of time
>
> >>
> >> cheers
> >> luigi
> >>
> >> Very interesting idea Luigi, will have to get that some thought.
> >
> > Jack
>
> Returning to original post topic:
>
> Given
> 1) we are currently binding ixgbe ithreads to CPU cores
> 2) RX queue lock is used by (indirectly) in only 2 places:
> a) ISR routine (msix or legacy irq)
> b) taskqueue routine which is scheduled if some packets remains in RX
> queue and rx_process_limit ended OR we need something to TX
>
> 3) in practice taskqueue routine is a nightmare for many people since
> there is no way to stop "kernel {ix0 que}" thread eating 100% cpu after
> some traffic burst happens: once it is called it starts to schedule
> itself more and more replacing original ISR routine. Additionally,
> increasing rx_process_limit does not help since taskqueue is called with
> the same limit. Finally, currently netisr taskq threads are not bound to
> any CPU which makes the process even more uncontrollable.
I think part of the problem here is that the taskqueue in ixgbe(4) is
bogusly rescheduled for TX handling. Instead, ixgbe_msix_que() should
just start transmitting packets directly.
I fixed this in igb(4) here:
http://svnweb.freebsd.org/base?view=revision&revision=233708
You can try this for ixgbe(4). It also comments out a spurious taskqueue
reschedule from the watchdog handler that might also lower the taskqueue
usage. You can try changing that #if 0 to an #if 1 to test just the txeof
changes:
Index: ixgbe.c
===================================================================
--- ixgbe.c (revision 241579)
+++ ixgbe.c (working copy)
@@ -149,7 +149,7 @@
static void ixgbe_enable_intr(struct adapter *);
static void ixgbe_disable_intr(struct adapter *);
static void ixgbe_update_stats_counters(struct adapter *);
-static bool ixgbe_txeof(struct tx_ring *);
+static void ixgbe_txeof(struct tx_ring *);
static bool ixgbe_rxeof(struct ix_queue *, int);
static void ixgbe_rx_checksum(u32, struct mbuf *, u32);
static void ixgbe_set_promisc(struct adapter *);
@@ -1439,8 +1439,9 @@
struct adapter *adapter = que->adapter;
struct ixgbe_hw *hw = &adapter->hw;
struct tx_ring *txr = adapter->tx_rings;
- bool more_tx, more_rx;
- u32 reg_eicr, loop = MAX_LOOP;
+ struct ifnet *ifp = adapter->ifp;
+ bool more_rx;
+ u32 reg_eicr;
reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
@@ -1454,14 +1455,16 @@
more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
IXGBE_TX_LOCK(txr);
- do {
- more_tx = ixgbe_txeof(txr);
- } while (loop-- && more_tx);
+ ixgbe_txeof(txr);
+#if __FreeBSD_version >= 800000
+ if (!drbr_empty(ifp, txr->br))
+ ixgbe_mq_start_locked(ifp, txr, NULL);
+#else
+ if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ ixgbe_start_locked(txr, ifp);
+#endif
IXGBE_TX_UNLOCK(txr);
- if (more_rx || more_tx)
- taskqueue_enqueue(que->tq, &que->que_task);
-
/* Check for fan failure */
if ((hw->phy.media_type == ixgbe_media_type_copper) &&
(reg_eicr & IXGBE_EICR_GPI_SDP1)) {
@@ -1474,7 +1477,10 @@
if (reg_eicr & IXGBE_EICR_LSC)
taskqueue_enqueue(adapter->tq, &adapter->link_task);
- ixgbe_enable_intr(adapter);
+ if (more_rx)
+ taskqueue_enqueue(que->tq, &que->que_task);
+ else
+ ixgbe_enable_intr(adapter);
return;
}
@@ -1491,7 +1497,8 @@
struct adapter *adapter = que->adapter;
struct tx_ring *txr = que->txr;
struct rx_ring *rxr = que->rxr;
- bool more_tx, more_rx;
+ struct ifnet *ifp = adapter->ifp;
+ bool more_rx;
u32 newitr = 0;
ixgbe_disable_queue(adapter, que->msix);
@@ -1500,18 +1507,14 @@
more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
IXGBE_TX_LOCK(txr);
- more_tx = ixgbe_txeof(txr);
- /*
- ** Make certain that if the stack
- ** has anything queued the task gets
- ** scheduled to handle it.
- */
-#if __FreeBSD_version < 800000
- if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd))
+ ixgbe_txeof(txr);
+#if __FreeBSD_version >= 800000
+ if (!drbr_empty(ifp, txr->br))
+ ixgbe_mq_start_locked(ifp, txr, NULL);
#else
- if (!drbr_empty(adapter->ifp, txr->br))
+ if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ ixgbe_start_locked(txr, ifp);
#endif
- more_tx = 1;
IXGBE_TX_UNLOCK(txr);
/* Do AIM now? */
@@ -1565,7 +1568,7 @@
rxr->packets = 0;
no_calc:
- if (more_tx || more_rx)
+ if (more_rx)
taskqueue_enqueue(que->tq, &que->que_task);
else /* Reenable this interrupt */
ixgbe_enable_queue(adapter, que->msix);
@@ -2049,8 +2052,10 @@
++hung;
if (txr->queue_status & IXGBE_QUEUE_DEPLETED)
++busy;
+#if 0
if ((txr->queue_status & IXGBE_QUEUE_IDLE) == 0)
taskqueue_enqueue(que->tq, &que->que_task);
+#endif
}
/* Only truely watchdog if all queues show hung */
if (hung == adapter->num_queues)
@@ -3548,7 +3556,7 @@
* tx_buffer is put back on the free queue.
*
**********************************************************************/
-static bool
+static void
ixgbe_txeof(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
@@ -3597,13 +3605,13 @@
IXGBE_CORE_UNLOCK(adapter);
IXGBE_TX_LOCK(txr);
}
- return FALSE;
+ return;
}
#endif /* DEV_NETMAP */
if (txr->tx_avail == adapter->num_tx_desc) {
txr->queue_status = IXGBE_QUEUE_IDLE;
- return FALSE;
+ return;
}
processed = 0;
@@ -3613,7 +3621,7 @@
tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
last = tx_buffer->eop_index;
if (last == -1)
- return FALSE;
+ return;
eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
/*
@@ -3693,12 +3701,8 @@
if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD)
txr->queue_status &= ~IXGBE_QUEUE_DEPLETED;
- if (txr->tx_avail == adapter->num_tx_desc) {
+ if (txr->tx_avail == adapter->num_tx_desc)
txr->queue_status = IXGBE_QUEUE_IDLE;
- return (FALSE);
- }
-
- return TRUE;
}
/*********************************************************************
--
John Baldwin
More information about the freebsd-net
mailing list