svn commit: r197607 - in stable/8/sys: . amd64/include/xen
cddl/contrib/opensolaris contrib/dev/acpica contrib/pf
dev/mxge dev/xen/xenpci
Andrew Gallatin
gallatin at FreeBSD.org
Mon Sep 28 23:48:16 UTC 2009
Author: gallatin
Date: Mon Sep 28 23:48:16 2009
New Revision: 197607
URL: http://svn.freebsd.org/changeset/base/197607
Log:
MFC 197395: Improve mxge watchdog routine's ability to reliably reset a failed NIC
Approved by: re (kib)
Modified:
stable/8/sys/ (props changed)
stable/8/sys/amd64/include/xen/ (props changed)
stable/8/sys/cddl/contrib/opensolaris/ (props changed)
stable/8/sys/contrib/dev/acpica/ (props changed)
stable/8/sys/contrib/pf/ (props changed)
stable/8/sys/dev/mxge/if_mxge.c
stable/8/sys/dev/xen/xenpci/ (props changed)
Modified: stable/8/sys/dev/mxge/if_mxge.c
==============================================================================
--- stable/8/sys/dev/mxge/if_mxge.c Mon Sep 28 22:41:28 2009 (r197606)
+++ stable/8/sys/dev/mxge/if_mxge.c Mon Sep 28 23:48:16 2009 (r197607)
@@ -143,7 +143,7 @@ MODULE_DEPEND(mxge, zlib, 1, 1, 1);
static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
-static int mxge_close(mxge_softc_t *sc);
+static int mxge_close(mxge_softc_t *sc, int down);
static int mxge_open(mxge_softc_t *sc);
static void mxge_tick(void *arg);
@@ -1305,8 +1305,7 @@ mxge_reset(mxge_softc_t *sc, int interru
ss->lro_queued = 0;
ss->lro_flushed = 0;
if (ss->fw_stats != NULL) {
- ss->fw_stats->valid = 0;
- ss->fw_stats->send_done_count = 0;
+ bzero(ss->fw_stats, sizeof *ss->fw_stats);
}
}
sc->rdma_tags_available = 15;
@@ -1379,7 +1378,7 @@ mxge_change_lro_locked(mxge_softc_t *sc,
ifp->if_capenable |= IFCAP_LRO;
sc->lro_cnt = lro_cnt;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- mxge_close(sc);
+ mxge_close(sc, 0);
err = mxge_open(sc);
}
return err;
@@ -1495,6 +1494,10 @@ mxge_add_sysctls(mxge_softc_t *sc)
"read_write_dma_MBs",
CTLFLAG_RD, &sc->read_write_dma,
0, "DMA concurrent Read/Write speed in MB/s");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "watchdog_resets",
+ CTLFLAG_RD, &sc->watchdog_resets,
+ 0, "Number of times NIC was reset");
/* performance related tunables */
@@ -3600,7 +3603,7 @@ abort:
}
static int
-mxge_close(mxge_softc_t *sc)
+mxge_close(mxge_softc_t *sc, int down)
{
mxge_cmd_t cmd;
int err, old_down_cnt;
@@ -3617,21 +3620,23 @@ mxge_close(mxge_softc_t *sc)
}
#endif
sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- old_down_cnt = sc->down_cnt;
- wmb();
- err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
- if (err) {
- device_printf(sc->dev, "Couldn't bring down link\n");
- }
- if (old_down_cnt == sc->down_cnt) {
- /* wait for down irq */
- DELAY(10 * sc->intr_coal_delay);
- }
- wmb();
- if (old_down_cnt == sc->down_cnt) {
- device_printf(sc->dev, "never got down irq\n");
+ if (!down) {
+ old_down_cnt = sc->down_cnt;
+ wmb();
+ err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
+ if (err) {
+ device_printf(sc->dev,
+ "Couldn't bring down link\n");
+ }
+ if (old_down_cnt == sc->down_cnt) {
+ /* wait for down irq */
+ DELAY(10 * sc->intr_coal_delay);
+ }
+ wmb();
+ if (old_down_cnt == sc->down_cnt) {
+ device_printf(sc->dev, "never got down irq\n");
+ }
}
-
mxge_free_mbufs(sc);
return 0;
@@ -3684,8 +3689,9 @@ static int
mxge_watchdog_reset(mxge_softc_t *sc, int slice)
{
struct pci_devinfo *dinfo;
+ struct mxge_slice_state *ss;
mxge_tx_ring_t *tx;
- int err;
+ int err, running, s, num_tx_slices = 1;
uint32_t reboot;
uint16_t cmd;
@@ -3719,6 +3725,30 @@ mxge_watchdog_reset(mxge_softc_t *sc, in
reboot = mxge_read_reboot(sc);
device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
reboot);
+ running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
+ if (running) {
+
+ /*
+ * quiesce NIC so that TX routines will not try to
+ * xmit after restoration of BAR
+ */
+
+ /* Mark the link as down */
+ if (sc->link_state) {
+ sc->link_state = 0;
+ if_link_state_change(sc->ifp,
+ LINK_STATE_DOWN);
+ }
+#ifdef IFNET_BUF_RING
+ num_tx_slices = sc->num_slices;
+#endif
+ /* grab all TX locks to ensure no tx */
+ for (s = 0; s < num_tx_slices; s++) {
+ ss = &sc->ss[s];
+ mtx_lock(&ss->tx.mtx);
+ }
+ mxge_close(sc, 1);
+ }
/* restore PCI configuration space */
dinfo = device_get_ivars(sc->dev);
pci_cfg_restore(sc->dev, dinfo);
@@ -3726,10 +3756,22 @@ mxge_watchdog_reset(mxge_softc_t *sc, in
/* and redo any changes we made to our config space */
mxge_setup_cfg_space(sc);
- if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) {
- mxge_close(sc);
- err = mxge_open(sc);
+ /* reload f/w */
+ err = mxge_load_firmware(sc, 0);
+ if (err) {
+ device_printf(sc->dev,
+ "Unable to re-load f/w\n");
}
+ if (running) {
+ if (!err)
+ err = mxge_open(sc);
+ /* release all TX locks */
+ for (s = 0; s < num_tx_slices; s++) {
+ ss = &sc->ss[s];
+ mtx_unlock(&ss->tx.mtx);
+ }
+ }
+ sc->watchdog_resets++;
} else {
tx = &sc->ss[slice].tx;
device_printf(sc->dev,
@@ -3745,6 +3787,9 @@ mxge_watchdog_reset(mxge_softc_t *sc, in
be32toh(sc->ss->fw_stats->send_done_count));
device_printf(sc->dev, "not resetting\n");
}
+ if (err)
+ device_printf(sc->dev, "watchdog reset failed\n");
+
return (err);
}
@@ -3860,11 +3905,11 @@ mxge_change_mtu(mxge_softc_t *sc, int mt
old_mtu = ifp->if_mtu;
ifp->if_mtu = mtu;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- mxge_close(sc);
+ mxge_close(sc, 0);
err = mxge_open(sc);
if (err != 0) {
ifp->if_mtu = old_mtu;
- mxge_close(sc);
+ mxge_close(sc, 0);
(void) mxge_open(sc);
}
}
@@ -3922,7 +3967,7 @@ mxge_ioctl(struct ifnet *ifp, u_long com
}
} else {
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- mxge_close(sc);
+ mxge_close(sc, 0);
}
}
mtx_unlock(&sc->driver_mtx);
@@ -4645,7 +4690,7 @@ mxge_detach(device_t dev)
mtx_lock(&sc->driver_mtx);
sc->dying = 1;
if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
- mxge_close(sc);
+ mxge_close(sc, 0);
mtx_unlock(&sc->driver_mtx);
ether_ifdetach(sc->ifp);
callout_drain(&sc->co_hdl);
More information about the svn-src-stable-8
mailing list