svn commit: r330557 - stable/9/sys/dev/qlxgbe
David C Somayajulu
davidcs at FreeBSD.org
Tue Mar 6 23:28:14 UTC 2018
Author: davidcs
Date: Tue Mar 6 23:28:12 2018
New Revision: 330557
URL: https://svnweb.freebsd.org/changeset/base/330557
Log:
MFC r329855
1. Added support to offline a port if is error recovery on successful.
2. Sysctls to enable/disable driver_state_dump and error_recovery.
3. Sysctl to control the delay between hw/fw reinitialization and
restarting the fastpath.
4. Stop periodic stats retrieval if interface has IFF_DRV_RUNNING flag off.
5. Print contents of PEG_HALT_STATUS1 and PEG_HALT_STATUS2 on heartbeat
failure.
6. Speed up slowpath shutdown during error recovery.
7. link_state update using atomic_store.
8. Added timestamp information on driver state and minidump captures.
9. Added support for Slowpath event logging
10.Added additional failure injection types to simulate failures.
Modified:
stable/9/sys/dev/qlxgbe/ql_dbg.h
stable/9/sys/dev/qlxgbe/ql_def.h
stable/9/sys/dev/qlxgbe/ql_glbl.h
stable/9/sys/dev/qlxgbe/ql_hw.c
stable/9/sys/dev/qlxgbe/ql_hw.h
stable/9/sys/dev/qlxgbe/ql_inline.h
stable/9/sys/dev/qlxgbe/ql_ioctl.c
stable/9/sys/dev/qlxgbe/ql_ioctl.h
stable/9/sys/dev/qlxgbe/ql_isr.c
stable/9/sys/dev/qlxgbe/ql_misc.c
stable/9/sys/dev/qlxgbe/ql_os.c
stable/9/sys/dev/qlxgbe/ql_os.h
stable/9/sys/dev/qlxgbe/ql_ver.h
Directory Properties:
stable/9/ (props changed)
stable/9/sys/ (props changed)
Modified: stable/9/sys/dev/qlxgbe/ql_dbg.h
==============================================================================
--- stable/9/sys/dev/qlxgbe/ql_dbg.h Tue Mar 6 23:17:56 2018 (r330556)
+++ stable/9/sys/dev/qlxgbe/ql_dbg.h Tue Mar 6 23:28:12 2018 (r330557)
@@ -42,17 +42,21 @@ extern void ql_dump_buf16(qla_host_t *ha, const char *
extern void ql_dump_buf32(qla_host_t *ha, const char *str, void *dbuf,
uint32_t len32);
-#define INJCT_RX_RXB_INVAL 0x00001
-#define INJCT_RX_MP_NULL 0x00002
-#define INJCT_LRO_RXB_INVAL 0x00003
-#define INJCT_LRO_MP_NULL 0x00004
-#define INJCT_NUM_HNDLE_INVALID 0x00005
-#define INJCT_RDWR_INDREG_FAILURE 0x00006
-#define INJCT_RDWR_OFFCHIPMEM_FAILURE 0x00007
-#define INJCT_MBX_CMD_FAILURE 0x00008
-#define INJCT_HEARTBEAT_FAILURE 0x00009
-#define INJCT_TEMPERATURE_FAILURE 0x0000A
-#define INJCT_M_GETCL_M_GETJCL_FAILURE 0x0000B
+#define INJCT_RX_RXB_INVAL 0x00001
+#define INJCT_RX_MP_NULL 0x00002
+#define INJCT_LRO_RXB_INVAL 0x00003
+#define INJCT_LRO_MP_NULL 0x00004
+#define INJCT_NUM_HNDLE_INVALID 0x00005
+#define INJCT_RDWR_INDREG_FAILURE 0x00006
+#define INJCT_RDWR_OFFCHIPMEM_FAILURE 0x00007
+#define INJCT_MBX_CMD_FAILURE 0x00008
+#define INJCT_HEARTBEAT_FAILURE 0x00009
+#define INJCT_TEMPERATURE_FAILURE 0x0000A
+#define INJCT_M_GETCL_M_GETJCL_FAILURE 0x0000B
+#define INJCT_INV_CONT_OPCODE 0x0000C
+#define INJCT_SGL_RCV_INV_DESC_COUNT 0x0000D
+#define INJCT_SGL_LRO_INV_DESC_COUNT 0x0000E
+#define INJCT_PEER_PORT_FAILURE_ERR_RECOVERY 0x0000F
#ifdef QL_DBG
Modified: stable/9/sys/dev/qlxgbe/ql_def.h
==============================================================================
--- stable/9/sys/dev/qlxgbe/ql_def.h Tue Mar 6 23:17:56 2018 (r330556)
+++ stable/9/sys/dev/qlxgbe/ql_def.h Tue Mar 6 23:28:12 2018 (r330557)
@@ -144,12 +144,12 @@ struct qla_host {
volatile uint32_t qla_watchdog_paused;
volatile uint32_t qla_initiate_recovery;
volatile uint32_t qla_detach_active;
+ volatile uint32_t offline;
device_t pci_dev;
- uint16_t watchdog_ticks;
+ volatile uint16_t watchdog_ticks;
uint8_t pci_func;
- uint8_t resvd;
/* ioctl related */
struct cdev *ioctl_dev;
@@ -182,6 +182,7 @@ struct qla_host {
/* hardware access lock */
+ struct mtx sp_log_lock;
struct mtx hw_lock;
volatile uint32_t hw_lock_held;
uint64_t hw_lock_failed;
@@ -239,6 +240,9 @@ struct qla_host {
volatile const char *qla_unlock;
uint32_t dbg_level;
uint32_t enable_minidump;
+ uint32_t enable_driverstate_dump;
+ uint32_t enable_error_recovery;
+ uint32_t ms_delay_after_init;
uint8_t fw_ver_str[32];
@@ -272,5 +276,7 @@ typedef struct qla_host qla_host_t;
#define QL_MAC_CMP(mac1, mac2) \
((((*(uint32_t *) mac1) == (*(uint32_t *) mac2) && \
(*(uint16_t *)(mac1 + 4)) == (*(uint16_t *)(mac2 + 4)))) ? 0 : 1)
+
+#define QL_INITIATE_RECOVERY(ha) qla_set_error_recovery(ha)
#endif /* #ifndef _QL_DEF_H_ */
Modified: stable/9/sys/dev/qlxgbe/ql_glbl.h
==============================================================================
--- stable/9/sys/dev/qlxgbe/ql_glbl.h Tue Mar 6 23:17:56 2018 (r330556)
+++ stable/9/sys/dev/qlxgbe/ql_glbl.h Tue Mar 6 23:28:12 2018 (r330557)
@@ -47,6 +47,7 @@ extern uint32_t ql_rcv_isr(qla_host_t *ha, uint32_t sd
extern int ql_alloc_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf);
extern void ql_free_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf);
extern int ql_get_mbuf(qla_host_t *ha, qla_rx_buf_t *rxb, struct mbuf *nmp);
+extern void qla_set_error_recovery(qla_host_t *ha);
/*
* from ql_hw.c
@@ -115,5 +116,11 @@ extern unsigned int ql83xx_minidump_len;
extern void ql_alloc_drvr_state_buffer(qla_host_t *ha);
extern void ql_free_drvr_state_buffer(qla_host_t *ha);
extern void ql_capture_drvr_state(qla_host_t *ha);
+extern void ql_sp_log(qla_host_t *ha, uint16_t fmtstr_idx, uint16_t num_params,
+ uint32_t param0, uint32_t param1, uint32_t param2,
+ uint32_t param3, uint32_t param4);
+extern void ql_alloc_sp_log_buffer(qla_host_t *ha);
+extern void ql_free_sp_log_buffer(qla_host_t *ha);
+
#endif /* #ifndef_QL_GLBL_H_ */
Modified: stable/9/sys/dev/qlxgbe/ql_hw.c
==============================================================================
--- stable/9/sys/dev/qlxgbe/ql_hw.c Tue Mar 6 23:17:56 2018 (r330556)
+++ stable/9/sys/dev/qlxgbe/ql_hw.c Tue Mar 6 23:28:12 2018 (r330557)
@@ -49,7 +49,7 @@ __FBSDID("$FreeBSD$");
static void qla_del_rcv_cntxt(qla_host_t *ha);
static int qla_init_rcv_cntxt(qla_host_t *ha);
-static void qla_del_xmt_cntxt(qla_host_t *ha);
+static int qla_del_xmt_cntxt(qla_host_t *ha);
static int qla_init_xmt_cntxt(qla_host_t *ha);
static int qla_mbx_cmd(qla_host_t *ha, uint32_t *h_mbox, uint32_t n_hmbox,
uint32_t *fw_mbox, uint32_t n_fwmbox, uint32_t no_pause);
@@ -647,11 +647,118 @@ qlnx_add_hw_xmt_stats_sysctls(qla_host_t *ha)
}
static void
+qlnx_add_hw_mbx_cmpl_stats_sysctls(qla_host_t *ha)
+{
+ struct sysctl_ctx_list *ctx;
+ struct sysctl_oid_list *node_children;
+
+ ctx = device_get_sysctl_ctx(ha->pci_dev);
+ node_children = SYSCTL_CHILDREN(device_get_sysctl_tree(ha->pci_dev));
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_lt_200ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[0],
+ "mbx_completion_time_lt_200ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_200ms_400ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[1],
+ "mbx_completion_time_200ms_400ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_400ms_600ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[2],
+ "mbx_completion_time_400ms_600ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_600ms_800ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[3],
+ "mbx_completion_time_600ms_800ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_800ms_1000ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[4],
+ "mbx_completion_time_800ms_1000ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_1000ms_1200ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[5],
+ "mbx_completion_time_1000ms_1200ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_1200ms_1400ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[6],
+ "mbx_completion_time_1200ms_1400ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_1400ms_1600ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[7],
+ "mbx_completion_time_1400ms_1600ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_1600ms_1800ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[8],
+ "mbx_completion_time_1600ms_1800ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_1800ms_2000ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[9],
+ "mbx_completion_time_1800ms_2000ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_2000ms_2200ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[10],
+ "mbx_completion_time_2000ms_2200ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_2200ms_2400ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[11],
+ "mbx_completion_time_2200ms_2400ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_2400ms_2600ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[12],
+ "mbx_completion_time_2400ms_2600ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_2600ms_2800ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[13],
+ "mbx_completion_time_2600ms_2800ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_2800ms_3000ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[14],
+ "mbx_completion_time_2800ms_3000ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_3000ms_4000ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[15],
+ "mbx_completion_time_3000ms_4000ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_time_4000ms_5000ms",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[16],
+ "mbx_completion_time_4000ms_5000ms");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_host_mbx_cntrl_timeout",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[17],
+ "mbx_completion_host_mbx_cntrl_timeout");
+
+ SYSCTL_ADD_QUAD(ctx, node_children,
+ OID_AUTO, "mbx_completion_fw_mbx_cntrl_timeout",
+ CTLFLAG_RD, &ha->hw.mbx_comp_msecs[18],
+ "mbx_completion_fw_mbx_cntrl_timeout");
+ return;
+}
+
+static void
qlnx_add_hw_stats_sysctls(qla_host_t *ha)
{
qlnx_add_hw_mac_stats_sysctls(ha);
qlnx_add_hw_rcv_stats_sysctls(ha);
qlnx_add_hw_xmt_stats_sysctls(ha);
+ qlnx_add_hw_mbx_cmpl_stats_sysctls(ha);
return;
}
@@ -918,6 +1025,30 @@ ql_hw_add_sysctls(qla_host_t *ha)
"\t Any change requires ifconfig down/up to take effect\n"
"\t Note that LRO may be turned off/on via ifconfig\n");
+ SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
+ SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "sp_log_index", CTLFLAG_RW, &ha->hw.sp_log_index,
+ ha->hw.sp_log_index, "sp_log_index");
+
+ SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
+ SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "sp_log_stop", CTLFLAG_RW, &ha->hw.sp_log_stop,
+ ha->hw.sp_log_stop, "sp_log_stop");
+
+ ha->hw.sp_log_stop_events = 0;
+
+ SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
+ SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "sp_log_stop_events", CTLFLAG_RW,
+ &ha->hw.sp_log_stop_events,
+ ha->hw.sp_log_stop_events, "Slow path event log is stopped"
+ " when OR of the following events occur \n"
+ "\t 0x01 : Heart beat Failure\n"
+ "\t 0x02 : Temperature Failure\n"
+ "\t 0x04 : HW Initialization Failure\n"
+ "\t 0x08 : Interface Initialization Failure\n"
+ "\t 0x10 : Error Recovery Failure\n");
+
ha->hw.mdump_active = 0;
SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
@@ -998,13 +1129,13 @@ ql_hw_link_status(qla_host_t *ha)
device_printf(ha->pci_dev, "link Down\n");
}
- if (ha->hw.flags.fduplex) {
+ if (ha->hw.fduplex) {
device_printf(ha->pci_dev, "Full Duplex\n");
} else {
device_printf(ha->pci_dev, "Half Duplex\n");
}
- if (ha->hw.flags.autoneg) {
+ if (ha->hw.autoneg) {
device_printf(ha->pci_dev, "Auto Negotiation Enabled\n");
} else {
device_printf(ha->pci_dev, "Auto Negotiation Disabled\n");
@@ -1255,19 +1386,39 @@ qla_mbx_cmd(qla_host_t *ha, uint32_t *h_mbox, uint32_t
uint32_t i;
uint32_t data;
int ret = 0;
+ uint64_t start_usecs;
+ uint64_t end_usecs;
+ uint64_t msecs_200;
- if (QL_ERR_INJECT(ha, INJCT_MBX_CMD_FAILURE)) {
+ ql_sp_log(ha, 0, 5, no_pause, h_mbox[0], h_mbox[1], h_mbox[2], h_mbox[3]);
+
+ if (ha->offline || ha->qla_initiate_recovery) {
+ ql_sp_log(ha, 1, 2, ha->offline, ha->qla_initiate_recovery, 0, 0, 0);
+ goto exit_qla_mbx_cmd;
+ }
+
+ if (((ha->err_inject & 0xFFFF) == INJCT_MBX_CMD_FAILURE) &&
+ (((ha->err_inject & ~0xFFFF) == ((h_mbox[0] & 0xFFFF) << 16))||
+ !(ha->err_inject & ~0xFFFF))) {
ret = -3;
- ha->qla_initiate_recovery = 1;
+ QL_INITIATE_RECOVERY(ha);
goto exit_qla_mbx_cmd;
}
+ start_usecs = qla_get_usec_timestamp();
+
if (no_pause)
i = 1000;
else
i = Q8_MBX_MSEC_DELAY;
while (i) {
+
+ if (ha->qla_initiate_recovery) {
+ ql_sp_log(ha, 2, 1, ha->qla_initiate_recovery, 0, 0, 0, 0);
+ return (-1);
+ }
+
data = READ_REG32(ha, Q8_HOST_MBOX_CNTRL);
if (data == 0)
break;
@@ -1282,8 +1433,10 @@ qla_mbx_cmd(qla_host_t *ha, uint32_t *h_mbox, uint32_t
if (i == 0) {
device_printf(ha->pci_dev, "%s: host_mbx_cntrl 0x%08x\n",
__func__, data);
+ ql_sp_log(ha, 3, 1, data, 0, 0, 0, 0);
ret = -1;
- ha->qla_initiate_recovery = 1;
+ ha->hw.mbx_comp_msecs[(Q8_MBX_COMP_MSECS - 2)]++;
+ QL_INITIATE_RECOVERY(ha);
goto exit_qla_mbx_cmd;
}
@@ -1297,6 +1450,12 @@ qla_mbx_cmd(qla_host_t *ha, uint32_t *h_mbox, uint32_t
i = Q8_MBX_MSEC_DELAY;
while (i) {
+
+ if (ha->qla_initiate_recovery) {
+ ql_sp_log(ha, 4, 1, ha->qla_initiate_recovery, 0, 0, 0, 0);
+ return (-1);
+ }
+
data = READ_REG32(ha, Q8_FW_MBOX_CNTRL);
if ((data & 0x3) == 1) {
@@ -1314,18 +1473,44 @@ qla_mbx_cmd(qla_host_t *ha, uint32_t *h_mbox, uint32_t
if (i == 0) {
device_printf(ha->pci_dev, "%s: fw_mbx_cntrl 0x%08x\n",
__func__, data);
+ ql_sp_log(ha, 5, 1, data, 0, 0, 0, 0);
ret = -2;
- ha->qla_initiate_recovery = 1;
+ ha->hw.mbx_comp_msecs[(Q8_MBX_COMP_MSECS - 1)]++;
+ QL_INITIATE_RECOVERY(ha);
goto exit_qla_mbx_cmd;
}
for (i = 0; i < n_fwmbox; i++) {
+
+ if (ha->qla_initiate_recovery) {
+ ql_sp_log(ha, 6, 1, ha->qla_initiate_recovery, 0, 0, 0, 0);
+ return (-1);
+ }
+
*fw_mbox++ = READ_REG32(ha, (Q8_FW_MBOX0 + (i << 2)));
}
WRITE_REG32(ha, Q8_FW_MBOX_CNTRL, 0x0);
WRITE_REG32(ha, ha->hw.mbx_intr_mask_offset, 0x0);
+ end_usecs = qla_get_usec_timestamp();
+
+ if (end_usecs > start_usecs) {
+ msecs_200 = (end_usecs - start_usecs)/(1000 * 200);
+
+ if (msecs_200 < 15)
+ ha->hw.mbx_comp_msecs[msecs_200]++;
+ else if (msecs_200 < 20)
+ ha->hw.mbx_comp_msecs[15]++;
+ else {
+ device_printf(ha->pci_dev, "%s: [%ld, %ld] %ld\n", __func__,
+ start_usecs, end_usecs, msecs_200);
+ ha->hw.mbx_comp_msecs[16]++;
+ }
+ }
+ ql_sp_log(ha, 7, 5, fw_mbox[0], fw_mbox[1], fw_mbox[2], fw_mbox[3], fw_mbox[4]);
+
+
exit_qla_mbx_cmd:
return (ret);
}
@@ -1401,7 +1586,8 @@ qla_config_intr_cntxt(qla_host_t *ha, uint32_t start_i
if (qla_mbx_cmd(ha, (uint32_t *)c_intr,
(sizeof (q80_config_intr_t) >> 2),
ha->hw.mbox, (sizeof (q80_config_intr_rsp_t) >> 2), 0)) {
- device_printf(dev, "%s: failed0\n", __func__);
+ device_printf(dev, "%s: %s failed0\n", __func__,
+ (create ? "create" : "delete"));
return (-1);
}
@@ -1410,8 +1596,8 @@ qla_config_intr_cntxt(qla_host_t *ha, uint32_t start_i
err = Q8_MBX_RSP_STATUS(c_intr_rsp->regcnt_status);
if (err) {
- device_printf(dev, "%s: failed1 [0x%08x, %d]\n", __func__, err,
- c_intr_rsp->nentries);
+ device_printf(dev, "%s: %s failed1 [0x%08x, %d]\n", __func__,
+ (create ? "create" : "delete"), err, c_intr_rsp->nentries);
for (i = 0; i < c_intr_rsp->nentries; i++) {
device_printf(dev, "%s: [%d]:[0x%x 0x%x 0x%x]\n",
@@ -2015,7 +2201,8 @@ ql_get_stats(qla_host_t *ha)
cmd |= ((ha->pci_func & 0x1) << 16);
- if (ha->qla_watchdog_pause)
+ if (ha->qla_watchdog_pause || (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ||
+ ha->offline)
goto ql_get_stats_exit;
if (qla_get_hw_stats(ha, cmd, sizeof (q80_get_stats_rsp_t)) == 0) {
@@ -2032,7 +2219,8 @@ ql_get_stats(qla_host_t *ha)
// cmd |= Q8_GET_STATS_CMD_CLEAR;
cmd |= (ha->hw.rcv_cntxt_id << 16);
- if (ha->qla_watchdog_pause)
+ if (ha->qla_watchdog_pause || (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ||
+ ha->offline)
goto ql_get_stats_exit;
if (qla_get_hw_stats(ha, cmd, sizeof (q80_get_stats_rsp_t)) == 0) {
@@ -2043,13 +2231,18 @@ ql_get_stats(qla_host_t *ha)
__func__, ha->hw.mbox[0]);
}
- if (ha->qla_watchdog_pause)
+ if (ha->qla_watchdog_pause || (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ||
+ ha->offline)
goto ql_get_stats_exit;
/*
* Get XMT Statistics
*/
- for (i = 0 ; ((i < ha->hw.num_tx_rings) && (!ha->qla_watchdog_pause));
- i++) {
+ for (i = 0 ; (i < ha->hw.num_tx_rings); i++) {
+ if (ha->qla_watchdog_pause ||
+ (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ||
+ ha->offline)
+ goto ql_get_stats_exit;
+
cmd = Q8_GET_STATS_CMD_XMT | Q8_GET_STATS_CMD_TYPE_CNTXT;
// cmd |= Q8_GET_STATS_CMD_CLEAR;
cmd |= (ha->hw.tx_cntxt[i].tx_cntxt_id << 16);
@@ -2679,7 +2872,8 @@ ql_del_hw_if(qla_host_t *ha)
qla_del_rcv_cntxt(ha);
- qla_del_xmt_cntxt(ha);
+ if(qla_del_xmt_cntxt(ha))
+ goto ql_del_hw_if_exit;
if (ha->hw.flags.init_intr_cnxt) {
for (i = 0; i < ha->hw.num_sds_rings; ) {
@@ -2688,14 +2882,17 @@ ql_del_hw_if(qla_host_t *ha)
num_msix = Q8_MAX_INTR_VECTORS;
else
num_msix = ha->hw.num_sds_rings - i;
- qla_config_intr_cntxt(ha, i, num_msix, 0);
+ if (qla_config_intr_cntxt(ha, i, num_msix, 0))
+ break;
+
i += num_msix;
}
ha->hw.flags.init_intr_cnxt = 0;
}
+ql_del_hw_if_exit:
if (ha->hw.enable_soft_lro) {
qla_drain_soft_lro(ha);
qla_free_soft_lro(ha);
@@ -3328,19 +3525,22 @@ qla_del_xmt_cntxt_i(qla_host_t *ha, uint32_t txr_idx)
return (0);
}
-static void
+static int
qla_del_xmt_cntxt(qla_host_t *ha)
{
uint32_t i;
+ int ret = 0;
if (!ha->hw.flags.init_tx_cnxt)
- return;
+ return (ret);
for (i = 0; i < ha->hw.num_tx_rings; i++) {
- if (qla_del_xmt_cntxt_i(ha, i))
+ if ((ret = qla_del_xmt_cntxt_i(ha, i)) != 0)
break;
}
ha->hw.flags.init_tx_cnxt = 0;
+
+ return (ret);
}
static int
@@ -3350,8 +3550,10 @@ qla_init_xmt_cntxt(qla_host_t *ha)
for (i = 0; i < ha->hw.num_tx_rings; i++) {
if (qla_init_xmt_cntxt_i(ha, i) != 0) {
- for (j = 0; j < i; j++)
- qla_del_xmt_cntxt_i(ha, j);
+ for (j = 0; j < i; j++) {
+ if (qla_del_xmt_cntxt_i(ha, j))
+ break;
+ }
return (-1);
}
}
@@ -3627,22 +3829,23 @@ ql_hw_tx_done_locked(qla_host_t *ha, uint32_t txr_idx)
void
ql_update_link_state(qla_host_t *ha)
{
- uint32_t link_state;
+ uint32_t link_state = 0;
uint32_t prev_link_state;
- if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
- ha->hw.link_up = 0;
- return;
- }
- link_state = READ_REG32(ha, Q8_LINK_STATE);
-
prev_link_state = ha->hw.link_up;
- if (ha->pci_func == 0)
- ha->hw.link_up = (((link_state & 0xF) == 1)? 1 : 0);
- else
- ha->hw.link_up = ((((link_state >> 4)& 0xF) == 1)? 1 : 0);
+ if (ha->ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ link_state = READ_REG32(ha, Q8_LINK_STATE);
+ if (ha->pci_func == 0) {
+ link_state = (((link_state & 0xF) == 1)? 1 : 0);
+ } else {
+ link_state = ((((link_state >> 4)& 0xF) == 1)? 1 : 0);
+ }
+ }
+
+ atomic_store_rel_8(&ha->hw.link_up, (uint8_t)link_state);
+
if (prev_link_state != ha->hw.link_up) {
if (ha->hw.link_up) {
if_link_state_change(ha->ifp, LINK_STATE_UP);
@@ -3669,8 +3872,14 @@ ql_hw_check_health(qla_host_t *ha)
if (((val & 0xFFFF) == 2) || ((val & 0xFFFF) == 3) ||
(QL_ERR_INJECT(ha, INJCT_TEMPERATURE_FAILURE))) {
- device_printf(ha->pci_dev, "%s: Temperature Alert [0x%08x]\n",
- __func__, val);
+ device_printf(ha->pci_dev, "%s: Temperature Alert"
+ " at ts_usecs %ld ts_reg = 0x%08x\n",
+ __func__, qla_get_usec_timestamp(), val);
+
+ if (ha->hw.sp_log_stop_events & Q8_SP_LOG_STOP_TEMP_FAILURE)
+ ha->hw.sp_log_stop = -1;
+
+ QL_INITIATE_RECOVERY(ha);
return -1;
}
@@ -3691,10 +3900,26 @@ ql_hw_check_health(qla_host_t *ha)
__func__, val);
if (ha->hw.hbeat_failure < 2) /* we ignore the first failure */
return 0;
- else
- device_printf(ha->pci_dev, "%s: Heartbeat Failue [0x%08x]\n",
- __func__, val);
+ else {
+ uint32_t peg_halt_status1;
+ uint32_t peg_halt_status2;
+ peg_halt_status1 = READ_REG32(ha, Q8_PEG_HALT_STATUS1);
+ peg_halt_status2 = READ_REG32(ha, Q8_PEG_HALT_STATUS2);
+
+ device_printf(ha->pci_dev,
+ "%s: Heartbeat Failue at ts_usecs = %ld "
+ "fw_heart_beat = 0x%08x "
+ "peg_halt_status1 = 0x%08x "
+ "peg_halt_status2 = 0x%08x\n",
+ __func__, qla_get_usec_timestamp(), val,
+ peg_halt_status1, peg_halt_status2);
+
+ if (ha->hw.sp_log_stop_events & Q8_SP_LOG_STOP_HBEAT_FAILURE)
+ ha->hw.sp_log_stop = -1;
+ }
+ QL_INITIATE_RECOVERY(ha);
+
return -1;
}
@@ -4429,8 +4654,8 @@ ql_minidump(qla_host_t *ha)
if (ha->hw.mdump_done)
return;
-
- ha->hw.mdump_start_seq_index = ql_stop_sequence(ha);
+ ha->hw.mdump_usec_ts = qla_get_usec_timestamp();
+ ha->hw.mdump_start_seq_index = ql_stop_sequence(ha);
bzero(ha->hw.mdump_buffer, ha->hw.mdump_buffer_size);
bzero(ha->hw.mdump_template, ha->hw.mdump_template_size);
Modified: stable/9/sys/dev/qlxgbe/ql_hw.h
==============================================================================
--- stable/9/sys/dev/qlxgbe/ql_hw.h Tue Mar 6 23:17:56 2018 (r330556)
+++ stable/9/sys/dev/qlxgbe/ql_hw.h Tue Mar 6 23:28:12 2018 (r330557)
@@ -1600,26 +1600,26 @@ typedef struct _qla_hw {
uint32_t
unicast_mac :1,
bcast_mac :1,
- loopback_mode :2,
init_tx_cnxt :1,
init_rx_cnxt :1,
init_intr_cnxt :1,
- fduplex :1,
- autoneg :1,
fdt_valid :1;
} flags;
- uint16_t link_speed;
- uint16_t cable_length;
- uint32_t cable_oui;
- uint8_t link_up;
- uint8_t module_type;
- uint8_t link_faults;
+ volatile uint16_t link_speed;
+ volatile uint16_t cable_length;
+ volatile uint32_t cable_oui;
+ volatile uint8_t link_up;
+ volatile uint8_t module_type;
+ volatile uint8_t link_faults;
+ volatile uint8_t loopback_mode;
+ volatile uint8_t fduplex;
+ volatile uint8_t autoneg;
- uint8_t mac_rcv_mode;
+ volatile uint8_t mac_rcv_mode;
- uint32_t max_mtu;
+ volatile uint32_t max_mtu;
uint8_t mac_addr[ETHER_ADDR_LEN];
@@ -1703,9 +1703,25 @@ typedef struct _qla_hw {
uint32_t mdump_buffer_size;
void *mdump_template;
uint32_t mdump_template_size;
+ uint64_t mdump_usec_ts;
+#define Q8_MBX_COMP_MSECS (19)
+ uint64_t mbx_comp_msecs[Q8_MBX_COMP_MSECS];
/* driver state related */
void *drvr_state;
+
+ /* slow path trace */
+ uint32_t sp_log_stop_events;
+#define Q8_SP_LOG_STOP_HBEAT_FAILURE 0x001
+#define Q8_SP_LOG_STOP_TEMP_FAILURE 0x002
+#define Q8_SP_LOG_STOP_HW_INIT_FAILURE 0x004
+#define Q8_SP_LOG_STOP_IF_START_FAILURE 0x008
+#define Q8_SP_LOG_STOP_ERR_RECOVERY_FAILURE 0x010
+
+ uint32_t sp_log_stop;
+ uint32_t sp_log_index;
+ uint32_t sp_log_num_entries;
+ void *sp_log;
} qla_hw_t;
#define QL_UPDATE_RDS_PRODUCER_INDEX(ha, prod_reg, val) \
Modified: stable/9/sys/dev/qlxgbe/ql_inline.h
==============================================================================
--- stable/9/sys/dev/qlxgbe/ql_inline.h Tue Mar 6 23:17:56 2018 (r330556)
+++ stable/9/sys/dev/qlxgbe/ql_inline.h Tue Mar 6 23:28:12 2018 (r330557)
@@ -166,7 +166,7 @@ qla_lock(qla_host_t *ha, const char *str, uint32_t tim
while (1) {
mtx_lock(&ha->hw_lock);
- if (ha->qla_detach_active) {
+ if (ha->qla_detach_active || ha->offline) {
mtx_unlock(&ha->hw_lock);
break;
}
@@ -191,7 +191,10 @@ qla_lock(qla_host_t *ha, const char *str, uint32_t tim
}
}
- //device_printf(ha->pci_dev, "%s: %s ret = %d\n", __func__, str,ret);
+// if (!ha->enable_error_recovery)
+// device_printf(ha->pci_dev, "%s: %s ret = %d\n", __func__,
+// str,ret);
+
return (ret);
}
@@ -202,7 +205,9 @@ qla_unlock(qla_host_t *ha, const char *str)
ha->hw_lock_held = 0;
ha->qla_unlock = str;
mtx_unlock(&ha->hw_lock);
- //device_printf(ha->pci_dev, "%s: %s\n", __func__, str);
+
+// if (!ha->enable_error_recovery)
+// device_printf(ha->pci_dev, "%s: %s\n", __func__, str);
return;
}
Modified: stable/9/sys/dev/qlxgbe/ql_ioctl.c
==============================================================================
--- stable/9/sys/dev/qlxgbe/ql_ioctl.c Tue Mar 6 23:17:56 2018 (r330556)
+++ stable/9/sys/dev/qlxgbe/ql_ioctl.c Tue Mar 6 23:28:12 2018 (r330557)
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include "ql_ver.h"
#include "ql_dbg.h"
+static int ql_slowpath_log(qla_host_t *ha, qla_sp_log_t *log);
static int ql_drvr_state(qla_host_t *ha, qla_driver_state_t *drvr_state);
static uint32_t ql_drvr_state_size(qla_host_t *ha);
static int ql_eioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
@@ -224,6 +225,7 @@ ql_eioctl(struct cdev *dev, u_long cmd, caddr_t data,
case QLA_RD_FW_DUMP:
if (ha->hw.mdump_init == 0) {
+ device_printf(pci_dev, "%s: minidump not initialized\n", __func__);
rval = EINVAL;
break;
}
@@ -233,45 +235,85 @@ ql_eioctl(struct cdev *dev, u_long cmd, caddr_t data,
if ((fw_dump->minidump == NULL) ||
(fw_dump->minidump_size != (ha->hw.mdump_buffer_size +
ha->hw.mdump_template_size))) {
+ device_printf(pci_dev,
+ "%s: minidump buffer [%p] size = [%d, %d] invalid\n", __func__,
+ fw_dump->minidump, fw_dump->minidump_size,
+ (ha->hw.mdump_buffer_size + ha->hw.mdump_template_size));
rval = EINVAL;
break;
}
- if (QLA_LOCK(ha, __func__, QLA_LOCK_DEFAULT_MS_TIMEOUT, 0) == 0) {
- if (!ha->hw.mdump_done)
- ha->qla_initiate_recovery = 1;
- QLA_UNLOCK(ha, __func__);
- } else {
+ if ((ha->pci_func & 0x1)) {
+ device_printf(pci_dev, "%s: mindump allowed only on Port0\n", __func__);
rval = ENXIO;
break;
}
+
+ fw_dump->saved = 1;
+
+ if (ha->offline) {
+
+ if (ha->enable_minidump)
+ ql_minidump(ha);
+
+ fw_dump->saved = 0;
+ fw_dump->usec_ts = ha->hw.mdump_usec_ts;
+
+ if (!ha->hw.mdump_done) {
+ device_printf(pci_dev,
+ "%s: port offline minidump failed\n", __func__);
+ rval = ENXIO;
+ break;
+ }
+ } else {
+
+ if (QLA_LOCK(ha, __func__, QLA_LOCK_DEFAULT_MS_TIMEOUT, 0) == 0) {
+ if (!ha->hw.mdump_done) {
+ fw_dump->saved = 0;
+ QL_INITIATE_RECOVERY(ha);
+ device_printf(pci_dev, "%s: recovery initiated "
+ " to trigger minidump\n",
+ __func__);
+ }
+ QLA_UNLOCK(ha, __func__);
+ } else {
+ device_printf(pci_dev, "%s: QLA_LOCK() failed0\n", __func__);
+ rval = ENXIO;
+ break;
+ }
#define QLNX_DUMP_WAIT_SECS 30
- count = QLNX_DUMP_WAIT_SECS * 1000;
+ count = QLNX_DUMP_WAIT_SECS * 1000;
- while (count) {
- if (ha->hw.mdump_done)
- break;
- qla_mdelay(__func__, 100);
- count -= 100;
- }
+ while (count) {
+ if (ha->hw.mdump_done)
+ break;
+ qla_mdelay(__func__, 100);
+ count -= 100;
+ }
- if (!ha->hw.mdump_done) {
- rval = ENXIO;
- break;
- }
+ if (!ha->hw.mdump_done) {
+ device_printf(pci_dev,
+ "%s: port not offline minidump failed\n", __func__);
+ rval = ENXIO;
+ break;
+ }
+ fw_dump->usec_ts = ha->hw.mdump_usec_ts;
- if (QLA_LOCK(ha, __func__, QLA_LOCK_DEFAULT_MS_TIMEOUT, 0) == 0) {
- ha->hw.mdump_done = 0;
- QLA_UNLOCK(ha, __func__);
- } else {
- rval = ENXIO;
- break;
+ if (QLA_LOCK(ha, __func__, QLA_LOCK_DEFAULT_MS_TIMEOUT, 0) == 0) {
+ ha->hw.mdump_done = 0;
+ QLA_UNLOCK(ha, __func__);
+ } else {
+ device_printf(pci_dev, "%s: QLA_LOCK() failed1\n", __func__);
+ rval = ENXIO;
+ break;
+ }
}
if ((rval = copyout(ha->hw.mdump_template,
fw_dump->minidump, ha->hw.mdump_template_size))) {
+ device_printf(pci_dev, "%s: template copyout failed\n", __func__);
rval = ENXIO;
break;
}
@@ -279,14 +321,20 @@ ql_eioctl(struct cdev *dev, u_long cmd, caddr_t data,
if ((rval = copyout(ha->hw.mdump_buffer,
((uint8_t *)fw_dump->minidump +
ha->hw.mdump_template_size),
- ha->hw.mdump_buffer_size)))
+ ha->hw.mdump_buffer_size))) {
+ device_printf(pci_dev, "%s: minidump copyout failed\n", __func__);
rval = ENXIO;
+ }
break;
case QLA_RD_DRVR_STATE:
rval = ql_drvr_state(ha, (qla_driver_state_t *)data);
break;
+ case QLA_RD_SLOWPATH_LOG:
+ rval = ql_slowpath_log(ha, (qla_sp_log_t *)data);
+ break;
+
case QLA_RD_PCI_IDS:
pci_ids = (qla_rd_pci_ids_t *)data;
pci_ids->ven_id = pci_get_vendor(pci_dev);
@@ -304,12 +352,12 @@ ql_eioctl(struct cdev *dev, u_long cmd, caddr_t data,
}
+
static int
ql_drvr_state(qla_host_t *ha, qla_driver_state_t *state)
{
int rval = 0;
uint32_t drvr_state_size;
- qla_drvr_state_hdr_t *hdr;
drvr_state_size = ql_drvr_state_size(ha);
@@ -324,11 +372,8 @@ ql_drvr_state(qla_host_t *ha, qla_driver_state_t *stat
if (ha->hw.drvr_state == NULL)
return (ENOMEM);
- hdr = ha->hw.drvr_state;
+ ql_capture_drvr_state(ha);
- if (!hdr->drvr_version_major)
- ql_capture_drvr_state(ha);
-
rval = copyout(ha->hw.drvr_state, state->buffer, drvr_state_size);
bzero(ha->hw.drvr_state, drvr_state_size);
@@ -416,22 +461,26 @@ ql_capture_drvr_state(qla_host_t *ha)
{
uint8_t *state_buffer;
uint8_t *ptr;
- uint32_t drvr_state_size;
qla_drvr_state_hdr_t *hdr;
uint32_t size;
int i;
- drvr_state_size = ql_drvr_state_size(ha);
-
state_buffer = ha->hw.drvr_state;
if (state_buffer == NULL)
return;
-
- bzero(state_buffer, drvr_state_size);
hdr = (qla_drvr_state_hdr_t *)state_buffer;
+
+ hdr->saved = 0;
+ if (hdr->drvr_version_major) {
+ hdr->saved = 1;
+ return;
+ }
+
+ hdr->usec_ts = qla_get_usec_timestamp();
+
hdr->drvr_version_major = QLA_VERSION_MAJOR;
hdr->drvr_version_minor = QLA_VERSION_MINOR;
hdr->drvr_version_build = QLA_VERSION_BUILD;
@@ -512,6 +561,9 @@ ql_alloc_drvr_state_buffer(qla_host_t *ha)
ha->hw.drvr_state = malloc(drvr_state_size, M_QLA83XXBUF, M_NOWAIT);
+ if (ha->hw.drvr_state != NULL)
+ bzero(ha->hw.drvr_state, drvr_state_size);
+
return;
}
@@ -521,5 +573,95 @@ ql_free_drvr_state_buffer(qla_host_t *ha)
if (ha->hw.drvr_state != NULL)
free(ha->hw.drvr_state, M_QLA83XXBUF);
return;
+}
+
+void
+ql_sp_log(qla_host_t *ha, uint16_t fmtstr_idx, uint16_t num_params,
+ uint32_t param0, uint32_t param1, uint32_t param2, uint32_t param3,
+ uint32_t param4)
+{
+ qla_sp_log_entry_t *sp_e, *sp_log;
+
+ if (((sp_log = ha->hw.sp_log) == NULL) || ha->hw.sp_log_stop)
+ return;
+
+ mtx_lock(&ha->sp_log_lock);
+
+ sp_e = &sp_log[ha->hw.sp_log_index];
+
+ bzero(sp_e, sizeof (qla_sp_log_entry_t));
+
+ sp_e->fmtstr_idx = fmtstr_idx;
+ sp_e->num_params = num_params;
+
+ sp_e->usec_ts = qla_get_usec_timestamp();
+
+ sp_e->params[0] = param0;
+ sp_e->params[1] = param1;
+ sp_e->params[2] = param2;
+ sp_e->params[3] = param3;
+ sp_e->params[4] = param4;
+
+ ha->hw.sp_log_index = (ha->hw.sp_log_index + 1) & (NUM_LOG_ENTRIES - 1);
+
+ if (ha->hw.sp_log_num_entries < NUM_LOG_ENTRIES)
+ ha->hw.sp_log_num_entries++;
+
+ mtx_unlock(&ha->sp_log_lock);
+
+ return;
+}
+
+void
+ql_alloc_sp_log_buffer(qla_host_t *ha)
+{
+ uint32_t size;
+
+ size = (sizeof(qla_sp_log_entry_t)) * NUM_LOG_ENTRIES;
+
+ ha->hw.sp_log = malloc(size, M_QLA83XXBUF, M_NOWAIT);
+
+ if (ha->hw.sp_log != NULL)
+ bzero(ha->hw.sp_log, size);
+
+ ha->hw.sp_log_index = 0;
+ ha->hw.sp_log_num_entries = 0;
+
+ return;
+}
+
+void
+ql_free_sp_log_buffer(qla_host_t *ha)
+{
+ if (ha->hw.sp_log != NULL)
+ free(ha->hw.sp_log, M_QLA83XXBUF);
+ return;
+}
+
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable-9
mailing list