svn commit: r218552 -
projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib
Jeff Roberson
jeff at FreeBSD.org
Fri Feb 11 11:22:14 UTC 2011
Author: jeff
Date: Fri Feb 11 11:22:14 2011
New Revision: 218552
URL: http://svn.freebsd.org/changeset/base/218552
Log:
- Eliminate zero length mbufs when loading the tx descriptor. These cause
the driver to hang. The stack creates them when making ip fragments for
unknown reasons.
- Make it safe to poll tx completions without the device lock held. This
significantly improves TCP performance.
- I had erroneously set the mtu based on the receive size which includes
the GRH. Correct this.
Modified:
projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
==============================================================================
--- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h Fri Feb 11 10:50:33 2011 (r218551)
+++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h Fri Feb 11 11:22:14 2011 (r218552)
@@ -91,7 +91,7 @@
/* constants */
#define INFINIBAND_ALEN 20 /* Octets in IPoIB HW addr */
-#define MAX_MB_FRAGS (8192 / MCLBYTES)
+#define MAX_MB_FRAGS ((8192 / MCLBYTES) + 2)
#ifdef IPOIB_CM
#define CONFIG_INFINIBAND_IPOIB_CM
@@ -99,6 +99,7 @@
#ifdef IPOIB_DEBUG
#define CONFIG_INFINIBAND_IPOIB_DEBUG
+#define CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
#endif
enum ipoib_flush_level {
@@ -110,7 +111,6 @@ enum ipoib_flush_level {
enum {
IPOIB_ENCAP_LEN = 4,
IPOIB_HEADER_LEN = IPOIB_ENCAP_LEN + INFINIBAND_ALEN,
- IPOIB_UD_HEAD_SIZE = IB_GRH_BYTES + IPOIB_ENCAP_LEN,
IPOIB_UD_RX_SG = 1, /* max buffer needed for 4K mtu */
IPOIB_CM_MAX_MTU = MJUM16BYTES,
@@ -286,7 +286,6 @@ struct ipoib_cm_dev_priv {
struct ifqueue mb_queue;
struct list_head start_list;
struct list_head reap_list;
- struct ib_wc ibwc[IPOIB_NUM_WC];
struct ib_sge rx_sge[IPOIB_CM_RX_SG];
struct ib_recv_wr rx_wr;
int nonsrq_conn_qp;
@@ -414,7 +413,7 @@ struct ipoib_path {
};
/* UD Only transmits encap len but we want the two sizes to be symmetrical. */
-#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IB_GRH_BYTES)
+#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN)
#define IPOIB_CM_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN)
#define IPOIB_IS_MULTICAST(addr) ((addr)[4] == 0xff)
@@ -519,6 +518,8 @@ void ipoib_drain_cq(struct ipoib_dev_pri
int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req);
void ipoib_dma_unmap_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req);
+int ipoib_poll_tx(struct ipoib_dev_priv *priv);
+
void ipoib_set_ethtool_ops(struct ifnet *dev);
int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca);
Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
==============================================================================
--- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c Fri Feb 11 10:50:33 2011 (r218551)
+++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c Fri Feb 11 11:22:14 2011 (r218552)
@@ -94,6 +94,7 @@ static int ipoib_cm_post_receive_srq(str
priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
priv->cm.rx_sge[0].addr = priv->cm.srq_ring[id].mapping[0];
+ priv->cm.rx_sge[0].length = priv->cm.max_cm_mtu;
ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
if (unlikely(ret)) {
@@ -117,6 +118,7 @@ static int ipoib_cm_post_receive_nonsrq(
wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
sge[0].addr = rx->rx_ring[id].mapping[0];
+ priv->cm.rx_sge[0].length = priv->cm.max_cm_mtu;
ret = ib_post_recv(rx->qp, wr, &bad_wr);
if (unlikely(ret)) {
@@ -505,11 +507,13 @@ void ipoib_cm_handle_rx_wc(struct ipoib_
if (unlikely(wr_id >= ipoib_recvq_size)) {
if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) {
+ spin_lock(&priv->lock);
list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
ipoib_cm_start_rx_drain(priv);
if (priv->cm.id != NULL)
queue_work(ipoib_workqueue,
&priv->cm.rx_reap_task);
+ spin_unlock(&priv->lock);
} else
ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
wr_id, ipoib_recvq_size);
@@ -532,8 +536,10 @@ void ipoib_cm_handle_rx_wc(struct ipoib_
goto repost;
else {
if (!--p->recv_count) {
+ spin_lock(&priv->lock);
list_move(&p->list, &priv->cm.rx_reap_list);
queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+ spin_unlock(&priv->lock);
}
return;
}
@@ -574,13 +580,9 @@ void ipoib_cm_handle_rx_wc(struct ipoib_
mb->m_pkthdr.rcvif = dev;
proto = *mtod(mb, uint16_t *);
m_adj(mb, IPOIB_ENCAP_LEN);
- if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok))
- mb->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID;
IPOIB_MTAP_PROTO(dev, mb, proto);
- spin_unlock(&priv->lock);
ipoib_demux(dev, mb, ntohs(proto));
- spin_lock(&priv->lock);
repost:
if (has_srq) {
@@ -626,8 +628,11 @@ void ipoib_cm_send(struct ipoib_dev_priv
struct ipoib_tx_buf *tx_req;
struct ifnet *dev = priv->dev;
+ if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
+ while (ipoib_poll_tx(priv)); /* nothing */
+
m_adj(mb, sizeof(struct ipoib_pseudoheader));
- if (unlikely(mb->m_pkthdr.len > IPOIB_CM_MTU(tx->mtu))) {
+ if (unlikely(mb->m_pkthdr.len > tx->mtu)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
mb->m_pkthdr.len, tx->mtu);
++dev->if_oerrors;
@@ -655,11 +660,6 @@ void ipoib_cm_send(struct ipoib_dev_priv
return;
}
- if (mb->m_pkthdr.csum_flags & (CSUM_IP|CSUM_TCP|CSUM_UDP))
- priv->tx_wr.send_flags |= IB_SEND_IP_CSUM;
- else
- priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
-
if (unlikely(post_send(priv, tx, tx_req, tx->tx_head & (ipoib_sendq_size - 1)))) {
ipoib_warn(priv, "post_send failed\n");
++dev->if_oerrors;
@@ -676,6 +676,7 @@ void ipoib_cm_send(struct ipoib_dev_priv
dev->if_drv_flags |= IFF_DRV_OACTIVE;
}
}
+
}
void ipoib_cm_handle_tx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc)
@@ -936,7 +937,7 @@ static struct ib_qp *ipoib_cm_create_tx_
struct ipoib_cm_tx *tx)
{
struct ib_qp_init_attr attr = {
- .send_cq = priv->recv_cq,
+ .send_cq = priv->send_cq,
.recv_cq = priv->recv_cq,
.srq = priv->cm.srq,
.cap.max_send_wr = ipoib_sendq_size,
Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
==============================================================================
--- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c Fri Feb 11 10:50:33 2011 (r218551)
+++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c Fri Feb 11 11:22:14 2011 (r218552)
@@ -90,8 +90,8 @@ void ipoib_free_ah(struct kref *kref)
static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
u64 mapping[IPOIB_UD_RX_SG])
{
- ib_dma_unmap_single(priv->ca, mapping[0], priv->max_ib_mtu,
- DMA_FROM_DEVICE);
+ ib_dma_unmap_single(priv->ca, mapping[0],
+ priv->max_ib_mtu + IB_GRH_BYTES, DMA_FROM_DEVICE);
}
static void ipoib_ud_mb_put_frags(struct ipoib_dev_priv *priv,
@@ -110,6 +110,8 @@ static int ipoib_ib_post_receive(struct
priv->rx_wr.wr_id = id | IPOIB_OP_RECV;
priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0];
+ priv->rx_sge[0].length = priv->max_ib_mtu + IB_GRH_BYTES;
+
ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr);
if (unlikely(ret)) {
@@ -131,7 +133,7 @@ static struct mbuf *ipoib_alloc_rx_mb(st
/*
* XXX Should be calculated once and cached.
*/
- buf_size = priv->max_ib_mtu;
+ buf_size = priv->max_ib_mtu + IB_GRH_BYTES;
if (buf_size <= MCLBYTES)
buf_size = MCLBYTES;
else if (buf_size <= MJUMPAGESIZE)
@@ -198,13 +200,18 @@ ipoib_ib_handle_rx_wc(struct ipoib_dev_p
mb = priv->rx_ring[wr_id].mb;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
- if (wc->status != IB_WC_WR_FLUSH_ERR)
+ if (wc->status != IB_WC_WR_FLUSH_ERR) {
ipoib_warn(priv, "failed recv event "
"(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err);
- ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping);
- m_freem(mb);
- priv->rx_ring[wr_id].mb = NULL;
+ goto repost;
+ }
+ if (mb) {
+ ipoib_ud_dma_unmap_rx(priv,
+ priv->rx_ring[wr_id].mapping);
+ m_freem(mb);
+ priv->rx_ring[wr_id].mb = NULL;
+ }
return;
}
@@ -243,9 +250,7 @@ ipoib_ib_handle_rx_wc(struct ipoib_dev_p
if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok))
mb->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID;
- spin_unlock(&priv->lock);
dev->if_input(dev, mb);
- spin_lock(&priv->lock);
repost:
if (unlikely(ipoib_ib_post_receive(priv, wr_id)))
@@ -257,11 +262,19 @@ int ipoib_dma_map_tx(struct ib_device *c
{
struct mbuf *mb = tx_req->mb;
u64 *mapping = tx_req->mapping;
- struct mbuf *m;
+ struct mbuf *m, *p;
int error;
int i;
- for (m = mb, i = 0; m != NULL; m = m->m_next, i++);
+ for (m = mb, p = NULL, i = 0; m != NULL; p = m, m = m->m_next, i++) {
+ if (m->m_len != 0)
+ continue;
+ if (p == NULL)
+ panic("ipoib_dma_map_tx: First mbuf empty\n");
+ p->m_next = m_free(m);
+ m = p;
+ i--;
+ }
i--;
if (i >= MAX_MB_FRAGS) {
tx_req->mb = mb = m_defrag(mb, M_DONTWAIT);
@@ -339,13 +352,19 @@ static void ipoib_ib_handle_tx_wc(struct
wc->status, wr_id, wc->vendor_err);
}
-static int poll_tx(struct ipoib_dev_priv *priv)
+int
+ipoib_poll_tx(struct ipoib_dev_priv *priv)
{
int n, i;
n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
- for (i = 0; i < n; ++i)
- ipoib_ib_handle_tx_wc(priv, priv->send_wc + i);
+ for (i = 0; i < n; ++i) {
+ struct ib_wc *wc = priv->send_wc + i;
+ if (wc->wr_id & IPOIB_OP_CM)
+ ipoib_cm_handle_tx_wc(priv, wc);
+ else
+ ipoib_ib_handle_tx_wc(priv, wc);
+ }
return n == MAX_SEND_CQE;
}
@@ -362,13 +381,13 @@ poll_more:
for (i = 0; i < n; i++) {
struct ib_wc *wc = priv->ibwc + i;
- if (wc->wr_id & IPOIB_OP_RECV) {
- if (wc->wr_id & IPOIB_OP_CM)
- ipoib_cm_handle_rx_wc(priv, wc);
- else
- ipoib_ib_handle_rx_wc(priv, wc);
- } else
- ipoib_cm_handle_tx_wc(priv, wc);
+ if ((wc->wr_id & IPOIB_OP_RECV) == 0)
+ panic("ipoib_poll: Bad wr_id 0x%jX\n",
+ (intmax_t)wc->wr_id);
+ if (wc->wr_id & IPOIB_OP_CM)
+ ipoib_cm_handle_rx_wc(priv, wc);
+ else
+ ipoib_ib_handle_rx_wc(priv, wc);
}
if (n != IPOIB_NUM_WC)
@@ -384,9 +403,7 @@ void ipoib_ib_completion(struct ib_cq *c
{
struct ipoib_dev_priv *priv = dev_ptr;
- spin_lock(&priv->lock);
ipoib_poll(priv);
- spin_unlock(&priv->lock);
}
static void drain_tx_cq(struct ipoib_dev_priv *priv)
@@ -394,7 +411,7 @@ static void drain_tx_cq(struct ipoib_dev
struct ifnet *dev = priv->dev;
spin_lock(&priv->lock);
- while (poll_tx(priv))
+ while (ipoib_poll_tx(priv))
; /* nothing */
if (dev->if_drv_flags & IFF_DRV_OACTIVE)
@@ -430,6 +447,7 @@ post_send(struct ipoib_dev_priv *priv, u
priv->tx_wr.wr.ud.remote_qpn = qpn;
priv->tx_wr.wr.ud.ah = address;
+
if (head) {
priv->tx_wr.wr.ud.mss = 0; /* XXX mb_shinfo(mb)->gso_size; */
priv->tx_wr.wr.ud.header = head;
@@ -450,6 +468,10 @@ ipoib_send(struct ipoib_dev_priv *priv,
int hlen;
void *phead;
+ if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
+ while (ipoib_poll_tx(priv))
+ ; /* nothing */
+
m_adj(mb, sizeof (struct ipoib_pseudoheader));
if (0 /* XXX segment offload mb_is_gso(mb) */) {
/* XXX hlen = mb_transport_offset(mb) + tcp_hdrlen(mb); */
@@ -462,7 +484,7 @@ ipoib_send(struct ipoib_dev_priv *priv,
}
m_adj(mb, hlen);
} else {
- if (unlikely(mb->m_pkthdr.len > priv->mcast_mtu)) {
+ if (unlikely(mb->m_pkthdr.len - IPOIB_ENCAP_LEN > priv->mcast_mtu)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
mb->m_pkthdr.len, priv->mcast_mtu);
++dev->if_oerrors;
@@ -518,10 +540,6 @@ ipoib_send(struct ipoib_dev_priv *priv,
address->last_send = priv->tx_head;
++priv->tx_head;
}
-
- if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
- while (poll_tx(priv))
- ; /* nothing */
}
static void __ipoib_reap_ah(struct ipoib_dev_priv *priv)
@@ -681,7 +699,6 @@ void ipoib_drain_cq(struct ipoib_dev_pri
{
int i, n;
- spin_lock(&priv->lock);
do {
n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
for (i = 0; i < n; ++i) {
@@ -693,17 +710,18 @@ void ipoib_drain_cq(struct ipoib_dev_pri
if (priv->ibwc[i].status == IB_WC_SUCCESS)
priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR;
- if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) {
- if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
- ipoib_cm_handle_rx_wc(priv, priv->ibwc + i);
- else
- ipoib_ib_handle_rx_wc(priv, priv->ibwc + i);
- } else
- ipoib_cm_handle_tx_wc(priv, priv->ibwc + i);
+ if ((priv->ibwc[i].wr_id & IPOIB_OP_RECV) == 0)
+ panic("ipoib_drain_cq: Bad wrid 0x%jX\n",
+ (intmax_t)priv->ibwc[i].wr_id);
+ if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
+ ipoib_cm_handle_rx_wc(priv, priv->ibwc + i);
+ else
+ ipoib_ib_handle_rx_wc(priv, priv->ibwc + i);
}
} while (n == IPOIB_NUM_WC);
- while (poll_tx(priv))
+ spin_lock(&priv->lock);
+ while (ipoib_poll_tx(priv))
; /* nothing */
spin_unlock(&priv->lock);
Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
==============================================================================
--- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c Fri Feb 11 10:50:33 2011 (r218551)
+++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c Fri Feb 11 11:22:14 2011 (r218552)
@@ -596,7 +596,7 @@ path_rec_start(struct ipoib_dev_priv *pr
p_rec = path->pathrec;
p_rec.mtu_selector = IB_SA_GT;
- switch (roundup_pow_of_two(dev->if_mtu + IB_GRH_BYTES)) {
+ switch (roundup_pow_of_two(dev->if_mtu + IPOIB_ENCAP_LEN)) {
case 512:
p_rec.mtu = IB_MTU_256;
break;
@@ -923,9 +923,11 @@ ipoib_set_dev_features(struct ipoib_dev_
priv->dev->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
}
+#if 0
if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO)
priv->dev->if_capabilities |= IFCAP_TSO4 | CSUM_TSO;
#endif
+#endif
priv->dev->if_capabilities |=
IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_LINKSTATE;
priv->dev->if_capenable = priv->dev->if_capabilities;
Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
==============================================================================
--- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c Fri Feb 11 10:50:33 2011 (r218551)
+++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c Fri Feb 11 11:22:14 2011 (r218552)
@@ -222,7 +222,6 @@ int ipoib_transport_dev_init(struct ipoi
priv->tx_wr.send_flags = IB_SEND_SIGNALED;
priv->rx_sge[0].lkey = priv->mr->lkey;
- priv->rx_sge[0].length = priv->max_ib_mtu;
priv->rx_wr.num_sge = 1;
priv->rx_wr.next = NULL;
priv->rx_wr.sg_list = priv->rx_sge;
More information about the svn-src-projects
mailing list