FreeBSD 7-STABLE mbuf corruption
Sean Bruno
seanbru at yahoo-inc.com
Thu Sep 8 00:19:12 UTC 2011
On Wed, 2011-09-07 at 16:19 -0700, Arnaud Lacombe wrote:
> Hi,
>
> On Mon, Sep 5, 2011 at 2:59 AM, Arnaud Lacombe <lacombar at gmail.com> wrote:
> > Hi folks,
> >
> > We have been trying to track down a bad mbuf management for about two
> > weeks on a customized 7.1 base. I have finally been able to reproduce
> > it with a stock FreeBSD 7-STABLE (kernel from r225276, userland from
> > 7.4).
> >
> > With the help of the attached patches, I have just been able to
> > trigger the following panic:
> >
> > panic: Corrupted unused flags, expected 0xffffffff00000000, got 0x0, flags 0x3
> > cpuid = 1
> > Uptime: 3d10h5m3s
> > Cannot dump. No dump device defined
> >
> General form of the crash is:
>
> panic: Corrupted unused flags, expected 0xffffffff00000000, got
> 0xbabe0000000000, flags 0xbabe0000babe00
> cpuid = 0
> KDB: stack backtrace:
> db_trace_self_wrapper(c0874e29,0,c0835757,f4574c48,0,...) at
> db_trace_self_wrapper+0x26
> panic(c0835757,0,ffffffff,0,babe00,...) at panic+0x10b
> igb_txeof(c6a25008,0,c0837083,5ea,17c,...) at igb_txeof+0x399
> igb_msix_que(c6a2b800,0,c084d367,4b6,c69dd068,...) at igb_msix_que+0x7b
> ithread_loop(c6a29090,f4574d38,c084d0db,31c,c6a16828,...) at ithread_loop+0xc3
> fork_exit(c061d520,c6a29090,f4574d38) at fork_exit+0xa6
> fork_trampoline() at fork_trampoline+0x8
> --- trap 0, eip = 0, esp = 0xf4574d70, ebp = 0 ---
> Uptime: 1m42s
>
> It happens particularly easily when the box receives wall of SYN
> (about 1000 cnx attempts at once) every 5s or so.
>
> - Arnaud
>
> >
I swear that we squashed this last year on "Yahoo BSD" last year with
Jack's help.
I know that we set:
hw.igb.num_queues="1"
hw.igb.rxd="4096"
hw.igb.txd="4096"
In addition to some txeof() handling patches ... Jack can see if this is
still relevant in the freebsd7 universe. Note that Yahoo is "special"
and we pluck and chuck code/drivers/whatever at will so I don't know how
this code will apply to stable-7 or if it has been applied already.
Note that this code "rage-quits" the EIAM/EIAC auto ack strategy and
tries to handle the OACTIVE state handling better. I hope these clues
find you well.
--- //depot/yahoo/ybsd_7/src/sys/dev/e1000/if_igb.c 2010-11-29
20:47:23.000000000 0000
+++ /home/seanbru/ybsd_7/src/sys/dev/e1000/if_igb.c 2010-11-29
20:47:23.000000000 0000
@@ -30,7 +30,7 @@
POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
-/*$FreeBSD: src/sys/dev/e1000/if_igb.c,v 1.3.2.13 2010/11/27 01:09:54
jfv Exp $*/
+/*$FreeBSD: stable/7/sys/dev/e1000/if_igb.c 216465 2010-12-15 22:48:44Z
jfv $*/
#ifdef HAVE_KERNEL_OPTION_HEADERS
@@ -318,10 +318,6 @@
static bool igb_header_split = FALSE;
TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
-/*
-** This will autoconfigure based on
-** the number of CPUs if left at 0.
-*/
static int igb_num_queues = 0;
TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
@@ -784,10 +780,14 @@
return;
/* Call cleanup if number of TX descriptors low */
+#if 0
if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
igb_txeof(txr);
+#endif
while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
+ if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
+ igb_txeof(txr);
if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
break;
@@ -1162,10 +1162,10 @@
IGB_TX_LOCK(txr);
if (igb_txeof(txr))
more = TRUE;
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
- igb_start_locked(txr, ifp);
+ /*if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) Pointless as
igb_start_locked() checks this right off the bat*/
+ igb_start_locked(txr, ifp);
IGB_TX_UNLOCK(txr);
- if (more) {
+ if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
taskqueue_enqueue(que->tq, &que->que_task);
return;
}
@@ -1298,15 +1298,20 @@
struct rx_ring *rxr = que->rxr;
u32 newitr = 0;
bool more_tx, more_rx;
+ struct ifnet *ifp = adapter->ifp;
+#if 0
E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
+ newitr = E1000_READ_REG(&adapter->hw, E1000_EICR);
+#endif
+ E1000_WRITE_REG(&adapter->hw, E1000_EICR, que->eims);
++que->irqs;
IGB_TX_LOCK(txr);
more_tx = igb_txeof(txr);
IGB_TX_UNLOCK(txr);
- more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
+ more_rx = igb_rxeof(que, -1, NULL);
if (igb_enable_aim == FALSE)
goto no_calc;
@@ -1361,7 +1366,7 @@
no_calc:
/* Schedule a clean task if needed*/
- if (more_tx || more_rx)
+ if (more_tx || more_rx || (ifp->if_drv_flags & IFF_DRV_OACTIVE))
taskqueue_enqueue(que->tq, &que->que_task);
else
/* Reenable this interrupt */
@@ -1382,6 +1387,7 @@
struct adapter *adapter = arg;
u32 icr;
+ E1000_WRITE_REG(&adapter->hw, E1000_EICR, adapter->link_mask);
++adapter->link_irq;
icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
if (!(icr & E1000_ICR_LSC))
@@ -1535,6 +1541,14 @@
if (m_head->m_flags & M_VLANTAG)
cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
+/*
+ * We just did this in before invocation, seems completely
+ * redundant, igb_handle_queue -> igb_txeof
+ * Pretty sure this is impossible as we check for the
+ * IGB_TX_CLEANUP_THRESHOLD in igb_start_locked() which happens
+ * before this func in invoked
+ */
+#if 0
/*
* Force a cleanup if number of TX descriptors
* available hits the threshold
@@ -1547,6 +1561,7 @@
return (ENOBUFS);
}
}
+#endif
/*
* Map the packet for DMA.
@@ -2082,6 +2097,7 @@
que->eims = E1000_EICR_TX_QUEUE0 << i;
else
que->eims = 1 << vector;
+ device_printf(adapter->dev, "que %d eims= 0x%x\n", i,
que->eims);
/*
** Bind the msix vector, and thus the
** rings to the corresponding cpu.
@@ -3264,8 +3280,6 @@
case ETHERTYPE_IPV6:
ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
ip_hlen = sizeof(struct ip6_hdr);
- if (mp->m_len < ehdrlen + ip_hlen)
- return (FALSE);
ipproto = ip6->ip6_nxt;
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
break;
@@ -4461,6 +4475,24 @@
static void
igb_enable_intr(struct adapter *adapter)
{
+ uint32_t ims, regval;
+
+ if (adapter->msix_mem) {
+ ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
+ regval = E1000_READ_REG(&adapter->hw, E1000_EIAC);
+#if 0
+ E1000_WRITE_REG(&adapter->hw, E1000_EIAC, regval |
adapter->eims_mask);
+ regval = E1000_READ_REG(&adapter->hw, E1000_EIAM);
+ E1000_WRITE_REG(&adapter->hw, E1000_EIAM, regval |
adapter->eims_mask);
+#endif
+ E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
adapter->eims_mask);
+ E1000_WRITE_REG(&adapter->hw, E1000_IMS, ims);
+ } else {
+ E1000_WRITE_REG(&adapter->hw, E1000_IMS,
IMS_ENABLE_MASK | E1000_IMS_DRSTA);
+ E1000_WRITE_REG(&adapter->hw, E1000_IAM,
IMS_ENABLE_MASK | E1000_IMS_DRSTA);
+ }
+
+#if 0
/* With RSS set up what to auto clear */
if (adapter->msix_mem) {
E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
@@ -4475,6 +4507,7 @@
E1000_WRITE_REG(&adapter->hw, E1000_IMS,
IMS_ENABLE_MASK);
}
+#endif
E1000_WRITE_FLUSH(&adapter->hw);
return;
@@ -4483,11 +4516,33 @@
static void
igb_disable_intr(struct adapter *adapter)
{
+ uint32_t regval;
+
+ /*
+ * we need to be careful when disabling interrupts. The VFs
are also
+ * mapped into these registers and so clearing the bits can
cause
+ * issues on the VF drivers so we only need to clear what we
set
+ */
+ if (adapter->msix_mem) {
+ regval = E1000_READ_REG(&adapter->hw, E1000_EIAM);
+#if 0
+ E1000_WRITE_REG(&adapter->hw, E1000_EIAM, regval &
~adapter->eims_mask);
+ regval = E1000_READ_REG(&adapter->hw, E1000_EIAC);
+ E1000_WRITE_REG(&adapter->hw, E1000_EIAC, regval &
~adapter->eims_mask);
+#endif
+ E1000_WRITE_REG(&adapter->hw, E1000_EIMC,
adapter->eims_mask);
+ }
+
+ E1000_WRITE_REG(&adapter->hw, E1000_IAM, 0);
+ E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
+
+#if 0
if (adapter->msix_mem) {
E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
}
E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
+#endif
E1000_WRITE_FLUSH(&adapter->hw);
return;
}
@@ -4876,8 +4931,8 @@
struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
struct e1000_hw_stats *stats = adapter->stats;
- struct sysctl_oid *stat_node, *queue_node, *int_node,
*host_node;
- struct sysctl_oid_list *stat_list, *queue_list, *int_list,
*host_list;
+ struct sysctl_oid *stat_node, *queue_node, *int_node,
*host_node, *debug_node;
+ struct sysctl_oid_list *stat_list, *queue_list, *int_list,
*host_list, *debug_list;
#define QUEUE_NAME_LEN 32
char namebuf[QUEUE_NAME_LEN];
@@ -5245,6 +5300,27 @@
SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
CTLFLAG_RD, &stats->hrmpc,
"Header Redirection Missed Packet Count");
+
+
+ debug_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "debug",
+ CTLFLAG_RD, NULL,
+ "Debug info");
+
+ debug_list = SYSCTL_CHILDREN(debug_node);
+
+ txr = adapter->tx_rings;
+ SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "desc_avail",
+ CTLFLAG_RD, (u_int *)(uintptr_t)&txr->tx_avail,
0,
+ "");
+
+ SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "next_to_clean",
+ CTLFLAG_RD, &txr->next_to_clean, 0,
+ "");
+
+ SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "if_flags",
+ CTLFLAG_RD, &adapter->ifp->if_drv_flags, 0,
+ "");
+
}
More information about the freebsd-net
mailing list