kern/176446: [netinet] [patch] Concurrency in ixgbe driving out-of-order packet process and spurious RST
John Baldwin
jhb at freebsd.org
Fri Apr 19 17:50:02 UTC 2013
The following reply was made to PR kern/176446; it has been noted by GNATS.
From: John Baldwin <jhb at freebsd.org>
To: freebsd-net at freebsd.org
Cc: Jack Vogel <jfvogel at gmail.com>,
bug-followup at freebsd.org,
Mike Karels <mike at karels.net>
Subject: Re: kern/176446: [netinet] [patch] Concurrency in ixgbe driving out-of-order packet process and spurious RST
Date: Fri, 19 Apr 2013 12:27:09 -0400
A second patch. This is not something I mentioned before, but I had this in
my checkout. In the legacy IRQ case this could also result in out-of-order
processing. It also fixes a potential OACTIVE-stuck type bug that we used to
have in igb. I have no way to test this, so it would be good if some other
folks could test this.
The patch changes ixgbe_txeof() return void and changes the few places that
checked its return value to ignore it. While it is true that ixgbe has a tx
processing limit (which I think is dubious.. TX completion processing is very
cheap unlike RX processing, so it seems to me like it should always run to
completion as in igb), in the common case I think the result will be to do
what igb used to do: poll the ring at 100% CPU (either in the interrupt
handler or in the task it keeps rescheduling) waiting for pending TX packets
to be completed (which is pointless: the host CPU can't make the NIC transmit
packets any faster by polling).
It also changes the interrupt handlers to restart packet transmission
synchronously rather than always deferring that to a task (the former is what
(nearly) all other drivers do). It also fixes the interrupt handlers to be
consistent (one looped on txeof but not the others). In the case of the
legacy interrupt handler it is possible it could fail to restart packet
transmission if there were no pending RX packets after rxeof returned and
txeof fully cleaned its ring without this change.
It also fixes the legacy interrupt handler to not re-enable the interrupt if
it schedules the task but to wait until the task completes (this could result
in concurrent, out-of-order RX processing).
Index: /home/jhb/work/freebsd/svn/head/sys/dev/ixgbe/ixgbe.c
===================================================================
--- /home/jhb/work/freebsd/svn/head/sys/dev/ixgbe/ixgbe.c (revision 249553)
+++ /home/jhb/work/freebsd/svn/head/sys/dev/ixgbe/ixgbe.c (working copy)
@@ -149,7 +149,7 @@
static void ixgbe_enable_intr(struct adapter *);
static void ixgbe_disable_intr(struct adapter *);
static void ixgbe_update_stats_counters(struct adapter *);
-static bool ixgbe_txeof(struct tx_ring *);
+static void ixgbe_txeof(struct tx_ring *);
static bool ixgbe_rxeof(struct ix_queue *);
static void ixgbe_rx_checksum(u32, struct mbuf *, u32);
static void ixgbe_set_promisc(struct adapter *);
@@ -1431,7 +1414,10 @@
}
/* Reenable this interrupt */
- ixgbe_enable_queue(adapter, que->msix);
+ if (que->res != NULL)
+ ixgbe_enable_queue(adapter, que->msix);
+ else
+ ixgbe_enable_intr(adapter);
return;
}
@@ -1449,8 +1435,9 @@
struct adapter *adapter = que->adapter;
struct ixgbe_hw *hw = &adapter->hw;
struct tx_ring *txr = adapter->tx_rings;
- bool more_tx, more_rx;
- u32 reg_eicr, loop = MAX_LOOP;
+ struct ifnet *ifp = adapter->ifp;
+ bool more;
+ u32 reg_eicr;
reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
@@ -1461,17 +1448,19 @@
return;
}
- more_rx = ixgbe_rxeof(que);
+ more = ixgbe_rxeof(que);
IXGBE_TX_LOCK(txr);
- do {
- more_tx = ixgbe_txeof(txr);
- } while (loop-- && more_tx);
+ ixgbe_txeof(txr);
+#if __FreeBSD_version >= 800000
+ if (!drbr_empty(ifp, txr->br))
+ ixgbe_mq_start_locked(ifp, txr, NULL);
+#else
+ if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ ixgbe_start_locked(txr, ifp);
+#endif
IXGBE_TX_UNLOCK(txr);
- if (more_rx || more_tx)
- taskqueue_enqueue(que->tq, &que->que_task);
-
/* Check for fan failure */
if ((hw->phy.media_type == ixgbe_media_type_copper) &&
(reg_eicr & IXGBE_EICR_GPI_SDP1)) {
@@ -1484,7 +1473,10 @@
if (reg_eicr & IXGBE_EICR_LSC)
taskqueue_enqueue(adapter->tq, &adapter->link_task);
- ixgbe_enable_intr(adapter);
+ if (more)
+ taskqueue_enqueue(que->tq, &que->que_task);
+ else
+ ixgbe_enable_intr(adapter);
return;
}
@@ -1501,27 +1493,24 @@
struct adapter *adapter = que->adapter;
struct tx_ring *txr = que->txr;
struct rx_ring *rxr = que->rxr;
- bool more_tx, more_rx;
+ struct ifnet *ifp = adapter->ifp;
+ bool more;
u32 newitr = 0;
ixgbe_disable_queue(adapter, que->msix);
++que->irqs;
- more_rx = ixgbe_rxeof(que);
+ more = ixgbe_rxeof(que);
IXGBE_TX_LOCK(txr);
- more_tx = ixgbe_txeof(txr);
- /*
- ** Make certain that if the stack
- ** has anything queued the task gets
- ** scheduled to handle it.
- */
+ ixgbe_txeof(txr);
#ifdef IXGBE_LEGACY_TX
if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd))
+ ixgbe_start_locked(txr, ifp);
#else
- if (!drbr_empty(adapter->ifp, txr->br))
+ if (!drbr_empty(ifp, txr->br))
+ ixgbe_mq_start_locked(ifp, txr, NULL);
#endif
- more_tx = 1;
IXGBE_TX_UNLOCK(txr);
/* Do AIM now? */
@@ -1575,7 +1564,7 @@
rxr->packets = 0;
no_calc:
- if (more_tx || more_rx)
+ if (more)
taskqueue_enqueue(que->tq, &que->que_task);
else /* Reenable this interrupt */
ixgbe_enable_queue(adapter, que->msix);
@@ -3557,7 +3545,7 @@
* tx_buffer is put back on the free queue.
*
**********************************************************************/
-static bool
+static void
ixgbe_txeof(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
@@ -3605,13 +3593,13 @@
IXGBE_CORE_UNLOCK(adapter);
IXGBE_TX_LOCK(txr);
}
- return FALSE;
+ return;
}
#endif /* DEV_NETMAP */
if (txr->tx_avail == txr->num_desc) {
txr->queue_status = IXGBE_QUEUE_IDLE;
- return FALSE;
+ return;
}
/* Get work starting point */
@@ -3705,12 +3693,8 @@
if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
txr->queue_status = IXGBE_QUEUE_HUNG;
- if (txr->tx_avail == txr->num_desc) {
+ if (txr->tx_avail == txr->num_desc)
txr->queue_status = IXGBE_QUEUE_IDLE;
- return (FALSE);
- }
-
- return TRUE;
}
/*********************************************************************
--
John Baldwin
More information about the freebsd-net
mailing list