svn commit: r297879 - head/sys/contrib/rdma/krping
Navdeep Parhar
np at FreeBSD.org
Tue Apr 12 21:34:05 UTC 2016
Author: np
Date: Tue Apr 12 21:34:04 2016
New Revision: 297879
URL: https://svnweb.freebsd.org/changeset/base/297879
Log:
Add fastreg support to krping (ported from upstream).
Submitted by: Krishnamraju Eraparaju @ Chelsio
Sponsored by: Chelsio Communications
Differential Revision: https://reviews.freebsd.org/D5777
Modified:
head/sys/contrib/rdma/krping/krping.c
Modified: head/sys/contrib/rdma/krping/krping.c
==============================================================================
--- head/sys/contrib/rdma/krping/krping.c Tue Apr 12 21:29:06 2016 (r297878)
+++ head/sys/contrib/rdma/krping/krping.c Tue Apr 12 21:34:04 2016 (r297879)
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
extern int krping_debug;
#define DEBUG_LOG(cb, x...) if (krping_debug) krping_printf((cb)->cookie, x)
#define PRINTF(cb, x...) krping_printf((cb)->cookie, x)
+#define BIND_INFO 1
MODULE_AUTHOR("Steve Wise");
MODULE_DESCRIPTION("RDMA ping client/server");
@@ -99,7 +100,7 @@ static const struct krping_option krping
{"poll", OPT_NOPARAM, 'P'},
{"local_dma_lkey", OPT_NOPARAM, 'Z'},
{"read_inv", OPT_NOPARAM, 'R'},
- {"fr", OPT_NOPARAM, 'f'},
+ {"fr", OPT_INT, 'f'},
{NULL, 0, 0}
};
@@ -232,6 +233,7 @@ struct krping_cb {
int txdepth; /* SQ depth */
int local_dma_lkey; /* use 0 for lkey */
int frtest; /* fastreg test */
+ int testnum;
/* CM stuff */
struct rdma_cm_id *cm_id; /* connection on client side,*/
@@ -365,11 +367,7 @@ static void krping_cq_event_handler(stru
PRINTF(cb, "cq completion in ERROR state\n");
return;
}
- if (cb->frtest) {
- PRINTF(cb, "cq completion event in frtest!\n");
- return;
- }
- if (!cb->wlat && !cb->rlat && !cb->bw)
+ if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest)
ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) {
if (wc.status) {
@@ -411,7 +409,7 @@ static void krping_cq_event_handler(stru
DEBUG_LOG(cb, "recv completion\n");
cb->stats.recv_bytes += sizeof(cb->recv_buf);
cb->stats.recv_msgs++;
- if (cb->wlat || cb->rlat || cb->bw)
+ if (cb->wlat || cb->rlat || cb->bw || cb->frtest)
ret = server_recv(cb, &wc);
else
ret = cb->server ? server_recv(cb, &wc) :
@@ -464,7 +462,7 @@ static int krping_accept(struct krping_c
return ret;
}
- if (!cb->wlat && !cb->rlat && !cb->bw) {
+ if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest) {
wait_event_interruptible(cb->sem, cb->state >= CONNECTED);
if (cb->state == ERROR) {
PRINTF(cb, "wait for CONNECTED state %d\n",
@@ -502,7 +500,7 @@ static void krping_setup_wr(struct krpin
cb->sq_wr.sg_list = &cb->send_sgl;
cb->sq_wr.num_sge = 1;
- if (cb->server || cb->wlat || cb->rlat || cb->bw) {
+ if (cb->server || cb->wlat || cb->rlat || cb->bw || cb->frtest) {
cb->rdma_sgl.addr = cb->rdma_dma_addr;
if (cb->mem == MR)
cb->rdma_sgl.lkey = cb->rdma_mr->lkey;
@@ -531,7 +529,11 @@ static void krping_setup_wr(struct krpin
case MW:
cb->bind_attr.wr_id = 0xabbaabba;
cb->bind_attr.send_flags = 0; /* unsignaled */
+#ifdef BIND_INFO
cb->bind_attr.bind_info.length = cb->size;
+#else
+ cb->bind_attr.length = cb->size;
+#endif
break;
default:
break;
@@ -646,7 +648,7 @@ static int krping_setup_buffers(struct k
buf.size = cb->size;
iovbase = cb->rdma_dma_addr;
cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1,
- IB_ACCESS_LOCAL_WRITE|
+ IB_ACCESS_LOCAL_WRITE|
IB_ACCESS_REMOTE_READ|
IB_ACCESS_REMOTE_WRITE,
&iovbase);
@@ -665,7 +667,7 @@ static int krping_setup_buffers(struct k
}
}
- if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
+ if (!cb->server || cb->wlat || cb->rlat || cb->bw || cb->frtest) {
cb->start_buf = kmalloc(cb->size, GFP_KERNEL);
if (!cb->start_buf) {
@@ -682,9 +684,9 @@ static int krping_setup_buffers(struct k
if (cb->mem == MR || cb->mem == MW) {
unsigned flags = IB_ACCESS_REMOTE_READ;
- if (cb->wlat || cb->rlat || cb->bw) {
+ if (cb->wlat || cb->rlat || cb->bw || cb->frtest) {
flags |= IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE;
+ IB_ACCESS_REMOTE_WRITE;
}
buf.addr = cb->start_dma_addr;
@@ -907,15 +909,33 @@ static u32 krping_rdma_rkey(struct krpin
* Update the MW with new buf info.
*/
if (buf == (u64)cb->start_dma_addr) {
+#ifdef BIND_INFO
cb->bind_attr.bind_info.mw_access_flags = IB_ACCESS_REMOTE_READ;
cb->bind_attr.bind_info.mr = cb->start_mr;
+#else
+ cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_READ;
+ cb->bind_attr.mr = cb->start_mr;
+#endif
} else {
+#ifdef BIND_INFO
cb->bind_attr.bind_info.mw_access_flags = IB_ACCESS_REMOTE_WRITE;
cb->bind_attr.bind_info.mr = cb->rdma_mr;
+#else
+ cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_WRITE;
+ cb->bind_attr.mr = cb->rdma_mr;
+#endif
}
+#ifdef BIND_INFO
cb->bind_attr.bind_info.addr = buf;
+#else
+ cb->bind_attr.addr = buf;
+#endif
DEBUG_LOG(cb, "binding mw rkey 0x%x to buf %llx mr rkey 0x%x\n",
+#ifdef BIND_INFO
cb->mw->rkey, buf, cb->bind_attr.bind_info.mr->rkey);
+#else
+ cb->mw->rkey, buf, cb->bind_attr.mr->rkey);
+#endif
ret = ib_bind_mw(cb->qp, cb->mw, &cb->bind_attr);
if (ret) {
PRINTF(cb, "bind mw error %d\n", ret);
@@ -950,7 +970,7 @@ static void krping_format_send(struct kr
* advertising the rdma buffer. Server side
* sends have no data.
*/
- if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
+ if (!cb->server || cb->wlat || cb->rlat || cb->bw || cb->frtest) {
rkey = krping_rdma_rkey(cb, buf, !cb->server_invalidate);
info->buf = htonll(buf);
info->rkey = htonl(rkey);
@@ -980,7 +1000,6 @@ static void krping_test_server(struct kr
cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
cb->rdma_sq_wr.sg_list->length = cb->remote_len;
cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 1);
- cb->rdma_sq_wr.next = NULL;
/* Issue RDMA Read. */
if (cb->read_inv)
@@ -1484,7 +1503,6 @@ static void krping_rlat_test_server(stru
PRINTF(cb, "send completiong error %d\n", wc.status);
return;
}
-
wait_event_interruptible(cb->sem, cb->state == ERROR);
}
@@ -1557,9 +1575,10 @@ static void krping_bw_test_server(struct
wait_event_interruptible(cb->sem, cb->state == ERROR);
}
-static int fastreg_supported(struct krping_cb *cb)
+static int fastreg_supported(struct krping_cb *cb, int server)
{
- struct ib_device *dev = cb->child_cm_id->device;
+ struct ib_device *dev = server?cb->child_cm_id->device:
+ cb->cm_id->device;
struct ib_device_attr attr;
int ret;
@@ -1610,158 +1629,259 @@ static int krping_bind_server(struct krp
return -1;
}
- if (cb->mem == FASTREG && !fastreg_supported(cb))
+ if (cb->mem == FASTREG && !fastreg_supported(cb, 1))
return -EINVAL;
return 0;
}
-static void krping_run_server(struct krping_cb *cb)
+/*
+ * sq-depth worth of fastreg + 0B read-inv pairs, reposting them as the reads
+ * complete.
+ * NOTE: every 9 seconds we sleep for 1 second to keep the kernel happy.
+ */
+static void krping_fr_test5(struct krping_cb *cb)
{
- struct ib_recv_wr *bad_wr;
+ struct ib_fast_reg_page_list **pl;
+ struct ib_send_wr *fr, *read, *bad;
+ struct ib_wc wc;
+ struct ib_sge *sgl;
+ u8 key = 0;
+ struct ib_mr **mr;
+ u8 **buf;
+ dma_addr_t *dma_addr;
+ int i;
int ret;
+ int plen = (((cb->size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+ time_t start;
+ int count = 0;
+ int scnt;
+ int depth = cb->txdepth >> 1;
- ret = krping_bind_server(cb);
- if (ret)
+ if (!depth) {
+ PRINTF(cb, "txdepth must be > 1 for this test!\n");
return;
-
- ret = krping_setup_qp(cb, cb->child_cm_id);
- if (ret) {
- PRINTF(cb, "setup_qp failed: %d\n", ret);
- goto err0;
}
- ret = krping_setup_buffers(cb);
- if (ret) {
- PRINTF(cb, "krping_setup_buffers failed: %d\n", ret);
+ pl = kzalloc(sizeof *pl * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s pl %p size %lu\n", __func__, pl, sizeof *pl * depth);
+ mr = kzalloc(sizeof *mr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s mr %p size %lu\n", __func__, mr, sizeof *mr * depth);
+ fr = kzalloc(sizeof *fr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s fr %p size %lu\n", __func__, fr, sizeof *fr * depth);
+ sgl = kzalloc(sizeof *sgl * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s sgl %p size %lu\n", __func__, sgl, sizeof *sgl * depth);
+ read = kzalloc(sizeof *read * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s read %p size %lu\n", __func__, read, sizeof *read * depth);
+ buf = kzalloc(sizeof *buf * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s buf %p size %lu\n", __func__, buf, sizeof *buf * depth);
+ dma_addr = kzalloc(sizeof *dma_addr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s dma_addr %p size %lu\n", __func__, dma_addr, sizeof *dma_addr * depth);
+ if (!pl || !mr || !fr || !read || !sgl || !buf || !dma_addr) {
+ PRINTF(cb, "kzalloc failed\n");
goto err1;
}
- ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
- if (ret) {
- PRINTF(cb, "ib_post_recv failed: %d\n", ret);
- goto err2;
- }
-
- ret = krping_accept(cb);
- if (ret) {
- PRINTF(cb, "connect error %d\n", ret);
- goto err2;
- }
-
- if (cb->wlat)
- krping_wlat_test_server(cb);
- else if (cb->rlat)
- krping_rlat_test_server(cb);
- else if (cb->bw)
- krping_bw_test_server(cb);
- else
- krping_test_server(cb);
- rdma_disconnect(cb->child_cm_id);
-err2:
- krping_free_buffers(cb);
-err1:
- krping_free_qp(cb);
-err0:
- rdma_destroy_id(cb->child_cm_id);
-}
-
-static void krping_test_client(struct krping_cb *cb)
-{
- int ping, start, cc, i, ret;
- struct ib_send_wr *bad_wr;
- unsigned char c;
-
- start = 65;
- for (ping = 0; !cb->count || ping < cb->count; ping++) {
- cb->state = RDMA_READ_ADV;
-
- /* Put some ascii text in the buffer. */
- cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping);
- for (i = cc, c = start; i < cb->size; i++) {
- cb->start_buf[i] = c;
- c++;
- if (c > 122)
- c = 65;
+ for (scnt = 0; scnt < depth; scnt++) {
+ pl[scnt] = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+ if (IS_ERR(pl[scnt])) {
+ PRINTF(cb, "alloc_fr_page_list failed %ld\n",
+ PTR_ERR(pl[scnt]));
+ goto err2;
}
- start++;
- if (start > 122)
- start = 65;
- cb->start_buf[cb->size - 1] = 0;
+ DEBUG_LOG(cb, "%s pl[%u] %p\n", __func__, scnt, pl[scnt]);
- krping_format_send(cb, cb->start_dma_addr);
- if (cb->state == ERROR) {
- PRINTF(cb, "krping_format_send failed\n");
- break;
- }
- ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
- if (ret) {
- PRINTF(cb, "post send error %d\n", ret);
- break;
+ mr[scnt] = ib_alloc_fast_reg_mr(cb->pd, plen);
+ if (IS_ERR(mr[scnt])) {
+ PRINTF(cb, "alloc_fr failed %ld\n",
+ PTR_ERR(mr[scnt]));
+ goto err2;
}
+ DEBUG_LOG(cb, "%s mr[%u] %p\n", __func__, scnt, mr[scnt]);
+ ib_update_fast_reg_key(mr[scnt], ++key);
- /* Wait for server to ACK */
- wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV);
- if (cb->state != RDMA_WRITE_ADV) {
- PRINTF(cb,
- "wait for RDMA_WRITE_ADV state %d\n",
- cb->state);
- break;
+ buf[scnt] = kmalloc(cb->size, GFP_KERNEL);
+ if (!buf[scnt]) {
+ PRINTF(cb, "kmalloc failed\n");
+ ret = -ENOMEM;
+ goto err2;
}
-
- krping_format_send(cb, cb->rdma_dma_addr);
- ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ DEBUG_LOG(cb, "%s buf[%u] %p\n", __func__, scnt, buf[scnt]);
+ dma_addr[scnt] = dma_map_single(cb->pd->device->dma_device,
+ buf[scnt], cb->size,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(cb->pd->device->dma_device,
+ dma_addr[scnt])) {
+ PRINTF(cb, "dma_map failed\n");
+ ret = -ENOMEM;
+ goto err2;
+ }
+ DEBUG_LOG(cb, "%s dma_addr[%u] %p\n", __func__, scnt, (void *)dma_addr[scnt]);
+ for (i=0; i<plen; i++) {
+ pl[scnt]->page_list[i] = ((unsigned long)dma_addr[scnt] & PAGE_MASK) + (i * PAGE_SIZE);
+ DEBUG_LOG(cb, "%s pl[%u]->page_list[%u] 0x%llx\n",
+ __func__, scnt, i, pl[scnt]->page_list[i]);
+ }
+
+ sgl[scnt].lkey = mr[scnt]->rkey;
+ sgl[scnt].length = cb->size;
+ sgl[scnt].addr = (u64)buf[scnt];
+ DEBUG_LOG(cb, "%s sgl[%u].lkey 0x%x length %u addr 0x%llx\n",
+ __func__, scnt, sgl[scnt].lkey, sgl[scnt].length,
+ sgl[scnt].addr);
+
+ fr[scnt].opcode = IB_WR_FAST_REG_MR;
+ fr[scnt].wr_id = scnt;
+ fr[scnt].send_flags = 0;
+ fr[scnt].wr.fast_reg.page_shift = PAGE_SHIFT;
+ fr[scnt].wr.fast_reg.length = cb->size;
+ fr[scnt].wr.fast_reg.page_list = pl[scnt];
+ fr[scnt].wr.fast_reg.page_list_len = plen;
+ fr[scnt].wr.fast_reg.iova_start = (u64)buf[scnt];
+ fr[scnt].wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
+ fr[scnt].wr.fast_reg.rkey = mr[scnt]->rkey;
+ fr[scnt].next = &read[scnt];
+ read[scnt].opcode = IB_WR_RDMA_READ_WITH_INV;
+ read[scnt].wr_id = scnt;
+ read[scnt].send_flags = IB_SEND_SIGNALED;
+ read[scnt].wr.rdma.rkey = cb->remote_rkey;
+ read[scnt].wr.rdma.remote_addr = cb->remote_addr;
+ read[scnt].num_sge = 1;
+ read[scnt].sg_list = &sgl[scnt];
+ ret = ib_post_send(cb->qp, &fr[scnt], &bad);
if (ret) {
- PRINTF(cb, "post send error %d\n", ret);
- break;
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err2;
}
+ }
- /* Wait for the server to say the RDMA Write is complete. */
- wait_event_interruptible(cb->sem,
- cb->state >= RDMA_WRITE_COMPLETE);
- if (cb->state != RDMA_WRITE_COMPLETE) {
- PRINTF(cb,
- "wait for RDMA_WRITE_COMPLETE state %d\n",
- cb->state);
+ start = time_uptime;
+ DEBUG_LOG(cb, "%s starting IO.\n", __func__);
+ while (!cb->count || cb->server || count < cb->count) {
+ if ((time_uptime - start) >= 9) {
+ DEBUG_LOG(cb, "%s pausing 1 tick! count %u\n", __func__,
+ count);
+ wait_event_interruptible_timeout(cb->sem,
+ cb->state == ERROR,
+ 1);
+ if (cb->state == ERROR)
+ break;
+ start = time_uptime;
+ }
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n",
+ ret);
+ goto err2;
+ }
+ if (ret == 1) {
+ if (wc.status) {
+ PRINTF(cb,
+ "completion error %u wr_id %lld "
+ "opcode %d\n", wc.status,
+ wc.wr_id, wc.opcode);
+ goto err2;
+ }
+ count++;
+ if (count == cb->count)
+ break;
+ ib_update_fast_reg_key(mr[wc.wr_id], ++key);
+ fr[wc.wr_id].wr.fast_reg.rkey =
+ mr[wc.wr_id]->rkey;
+ sgl[wc.wr_id].lkey = mr[wc.wr_id]->rkey;
+ ret = ib_post_send(cb->qp, &fr[wc.wr_id], &bad);
+ if (ret) {
+ PRINTF(cb,
+ "ib_post_send failed %d\n", ret);
+ goto err2;
+ }
+ } else if (krping_sigpending()) {
+ PRINTF(cb, "signal!\n");
+ goto err2;
+ }
+ } while (ret == 1);
+ }
+ DEBUG_LOG(cb, "%s done!\n", __func__);
+err2:
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ DEBUG_LOG(cb, "draining the cq...\n");
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
break;
}
-
- if (cb->validate)
- if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) {
- PRINTF(cb, "data mismatch!\n");
- break;
+ if (ret == 1) {
+ if (wc.status) {
+ PRINTF(cb, "completion error %u "
+ "opcode %u\n", wc.status, wc.opcode);
}
+ }
+ } while (ret == 1);
- if (cb->verbose) {
- if (strlen(cb->rdma_buf) > 128) {
- char msgbuf[128];
-
- strlcpy(msgbuf, cb->rdma_buf, sizeof(msgbuf));
- PRINTF(cb, "ping data stripped: %s\n",
- msgbuf);
- } else
- PRINTF(cb, "ping data: %s\n", cb->rdma_buf);
+ DEBUG_LOG(cb, "destroying fr mrs!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (mr[scnt]) {
+ ib_dereg_mr(mr[scnt]);
+ DEBUG_LOG(cb, "%s dereg mr %p\n", __func__, mr[scnt]);
+ }
+ }
+ DEBUG_LOG(cb, "unmapping/freeing bufs!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (buf[scnt]) {
+ dma_unmap_single(cb->pd->device->dma_device,
+ dma_addr[scnt], cb->size,
+ DMA_BIDIRECTIONAL);
+ kfree(buf[scnt]);
+ DEBUG_LOG(cb, "%s unmap/free buf %p dma_addr %p\n", __func__, buf[scnt], (void *)dma_addr[scnt]);
+ }
+ }
+ DEBUG_LOG(cb, "destroying fr page lists!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (pl[scnt]) {
+ DEBUG_LOG(cb, "%s free pl %p\n", __func__, pl[scnt]);
+ ib_free_fast_reg_page_list(pl[scnt]);
}
-#ifdef SLOW_KRPING
- wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
-#endif
}
+err1:
+ if (pl)
+ kfree(pl);
+ if (mr)
+ kfree(mr);
+ if (fr)
+ kfree(fr);
+ if (read)
+ kfree(read);
+ if (sgl)
+ kfree(sgl);
+ if (buf)
+ kfree(buf);
+ if (dma_addr)
+ kfree(dma_addr);
+}
+static void krping_fr_test_server(struct krping_cb *cb)
+{
+ DEBUG_LOG(cb, "%s waiting for disconnect...\n", __func__);
+ wait_event_interruptible(cb->sem, cb->state == ERROR);
}
-static void krping_rlat_test_client(struct krping_cb *cb)
+static void krping_fr_test5_server(struct krping_cb *cb)
{
struct ib_send_wr *bad_wr;
struct ib_wc wc;
int ret;
- cb->state = RDMA_READ_ADV;
+ /* Spin waiting for client's Start STAG/TO/Len */
+ while (cb->state < RDMA_READ_ADV) {
+ krping_cq_event_handler(cb->cq, cb);
+ }
+ DEBUG_LOG(cb, "%s client STAG %x TO 0x%llx\n", __func__,
+ cb->remote_rkey, cb->remote_addr);
/* Send STAG/TO/Len to client */
krping_format_send(cb, cb->start_dma_addr);
- if (cb->state == ERROR) {
- PRINTF(cb, "krping_format_send failed\n");
- return;
- }
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
if (ret) {
PRINTF(cb, "post send error %d\n", ret);
@@ -1775,84 +1895,31 @@ static void krping_rlat_test_client(stru
return;
}
if (wc.status) {
- PRINTF(cb, "send completion error %d\n", wc.status);
+ PRINTF(cb, "send completiong error %d\n", wc.status);
return;
}
- /* Spin waiting for server's Start STAG/TO/Len */
- while (cb->state < RDMA_WRITE_ADV) {
- krping_cq_event_handler(cb->cq, cb);
- }
-
-#if 0
-{
- int i;
- struct timeval start, stop;
- time_t sec;
- suseconds_t usec;
- unsigned long long elapsed;
- struct ib_wc wc;
- struct ib_send_wr *bad_wr;
- int ne;
-
- cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
- cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
- cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
- cb->rdma_sq_wr.sg_list->length = 0;
- cb->rdma_sq_wr.num_sge = 0;
-
- microtime(&start);
- for (i=0; i < 100000; i++) {
- if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) {
- PRINTF(cb, "Couldn't post send\n");
- return;
- }
- do {
- ne = ib_poll_cq(cb->cq, 1, &wc);
- } while (ne == 0);
- if (ne < 0) {
- PRINTF(cb, "poll CQ failed %d\n", ne);
- return;
- }
- if (wc.status != IB_WC_SUCCESS) {
- PRINTF(cb, "Completion wth error at %s:\n",
- cb->server ? "server" : "client");
- PRINTF(cb, "Failed status %d: wr_id %d\n",
- wc.status, (int) wc.wr_id);
- return;
- }
- }
- microtime(&stop);
-
- if (stop.tv_usec < start.tv_usec) {
- stop.tv_usec += 1000000;
- stop.tv_sec -= 1;
- }
- sec = stop.tv_sec - start.tv_sec;
- usec = stop.tv_usec - start.tv_usec;
- elapsed = sec * 1000000 + usec;
- PRINTF(cb, "0B-write-lat iters 100000 usec %llu\n", elapsed);
-}
-#endif
-
- rlat_test(cb);
+ if (cb->duplex)
+ krping_fr_test5(cb);
+ DEBUG_LOG(cb, "%s waiting for disconnect...\n", __func__);
+ wait_event_interruptible(cb->sem, cb->state == ERROR);
}
-static void krping_wlat_test_client(struct krping_cb *cb)
+static void krping_fr_test5_client(struct krping_cb *cb)
{
- struct ib_send_wr *bad_wr;
+ struct ib_send_wr *bad;
struct ib_wc wc;
int ret;
cb->state = RDMA_READ_ADV;
- /* Send STAG/TO/Len to client */
+ /* Send STAG/TO/Len to server */
krping_format_send(cb, cb->start_dma_addr);
if (cb->state == ERROR) {
PRINTF(cb, "krping_format_send failed\n");
return;
}
- ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad);
if (ret) {
PRINTF(cb, "post send error %d\n", ret);
return;
@@ -1873,15 +1940,619 @@ static void krping_wlat_test_client(stru
while (cb->state < RDMA_WRITE_ADV) {
krping_cq_event_handler(cb->cq, cb);
}
+ DEBUG_LOG(cb, "%s server STAG %x TO 0x%llx\n", __func__, cb->remote_rkey, cb->remote_addr);
- wlat_test(cb);
+ return krping_fr_test5(cb);
}
-static void krping_bw_test_client(struct krping_cb *cb)
+/*
+ * sq-depth worth of write + fastreg + inv, reposting them as the invs
+ * complete.
+ * NOTE: every 9 seconds we sleep for 1 second to keep the kernel happy.
+ * If a count is given, then the last IO will have a bogus lkey in the
+ * write work request. This reproduces a fw bug where the connection
+ * will get stuck if a fastreg is processed while the ulptx is failing
+ * the bad write.
+ */
+static void krping_fr_test6(struct krping_cb *cb)
{
- struct ib_send_wr *bad_wr;
+ struct ib_fast_reg_page_list **pl;
+ struct ib_send_wr *fr, *write, *inv, *bad;
struct ib_wc wc;
- int ret;
+ struct ib_sge *sgl;
+ u8 key = 0;
+ struct ib_mr **mr;
+ u8 **buf;
+ dma_addr_t *dma_addr;
+ int i;
+ int ret;
+ int plen = (((cb->size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+ unsigned long start;
+ int count = 0;
+ int scnt;
+ int depth = cb->txdepth / 3;
+
+ if (!depth) {
+ PRINTF(cb, "txdepth must be > 3 for this test!\n");
+ return;
+ }
+
+ pl = kzalloc(sizeof *pl * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s pl %p size %lu\n", __func__, pl, sizeof *pl * depth);
+
+ mr = kzalloc(sizeof *mr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s mr %p size %lu\n", __func__, mr, sizeof *mr * depth);
+
+ fr = kzalloc(sizeof *fr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s fr %p size %lu\n", __func__, fr, sizeof *fr * depth);
+
+ sgl = kzalloc(sizeof *sgl * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s sgl %p size %lu\n", __func__, sgl, sizeof *sgl * depth);
+
+ write = kzalloc(sizeof *write * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s read %p size %lu\n", __func__, write, sizeof *write * depth);
+
+ inv = kzalloc(sizeof *inv * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s inv %p size %lu\n", __func__, inv, sizeof *inv * depth);
+
+ buf = kzalloc(sizeof *buf * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s buf %p size %lu\n", __func__, buf, sizeof *buf * depth);
+
+ dma_addr = kzalloc(sizeof *dma_addr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s dma_addr %p size %lu\n", __func__, dma_addr, sizeof *dma_addr * depth);
+
+ if (!pl || !mr || !fr || !write || !sgl || !buf || !dma_addr) {
+ PRINTF(cb, "kzalloc failed\n");
+ goto err1;
+ }
+
+ for (scnt = 0; scnt < depth; scnt++) {
+ pl[scnt] = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+ if (IS_ERR(pl[scnt])) {
+ PRINTF(cb, "alloc_fr_page_list failed %ld\n",
+ PTR_ERR(pl[scnt]));
+ goto err2;
+ }
+ DEBUG_LOG(cb, "%s pl[%u] %p\n", __func__, scnt, pl[scnt]);
+
+ mr[scnt] = ib_alloc_fast_reg_mr(cb->pd, plen);
+ if (IS_ERR(mr[scnt])) {
+ PRINTF(cb, "alloc_fr failed %ld\n",
+ PTR_ERR(mr[scnt]));
+ goto err2;
+ }
+ DEBUG_LOG(cb, "%s mr[%u] %p\n", __func__, scnt, mr[scnt]);
+ ib_update_fast_reg_key(mr[scnt], ++key);
+
+ buf[scnt] = kmalloc(cb->size, GFP_KERNEL);
+ if (!buf[scnt]) {
+ PRINTF(cb, "kmalloc failed\n");
+ ret = -ENOMEM;
+ goto err2;
+ }
+ DEBUG_LOG(cb, "%s buf[%u] %p\n", __func__, scnt, buf[scnt]);
+ dma_addr[scnt] = dma_map_single(cb->pd->device->dma_device,
+ buf[scnt], cb->size,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(cb->pd->device->dma_device,
+ dma_addr[scnt])) {
+ PRINTF(cb, "dma_map failed\n");
+ ret = -ENOMEM;
+ goto err2;
+ }
+ DEBUG_LOG(cb, "%s dma_addr[%u] %p\n", __func__, scnt, (void *)dma_addr[scnt]);
+ for (i=0; i<plen; i++) {
+ pl[scnt]->page_list[i] = ((unsigned long)dma_addr[scnt] & PAGE_MASK) + (i * PAGE_SIZE);
+ DEBUG_LOG(cb, "%s pl[%u]->page_list[%u] 0x%llx\n",
+ __func__, scnt, i, pl[scnt]->page_list[i]);
+ }
+
+ write[scnt].opcode = IB_WR_RDMA_WRITE;
+ write[scnt].wr_id = scnt;
+ write[scnt].wr.rdma.rkey = cb->remote_rkey;
+ write[scnt].wr.rdma.remote_addr = cb->remote_addr;
+ write[scnt].num_sge = 1;
+ write[scnt].sg_list = &cb->rdma_sgl;
+ write[scnt].sg_list->length = cb->size;
+ write[scnt].next = &fr[scnt];
+
+ fr[scnt].opcode = IB_WR_FAST_REG_MR;
+ fr[scnt].wr_id = scnt;
+ fr[scnt].wr.fast_reg.page_shift = PAGE_SHIFT;
+ fr[scnt].wr.fast_reg.length = cb->size;
+ fr[scnt].wr.fast_reg.page_list = pl[scnt];
+ fr[scnt].wr.fast_reg.page_list_len = plen;
+ fr[scnt].wr.fast_reg.iova_start = (u64)buf[scnt];
+ fr[scnt].wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
+ fr[scnt].wr.fast_reg.rkey = mr[scnt]->rkey;
+ fr[scnt].next = &inv[scnt];
+
+ inv[scnt].opcode = IB_WR_LOCAL_INV;
+ inv[scnt].send_flags = IB_SEND_SIGNALED;
+ inv[scnt].ex.invalidate_rkey = mr[scnt]->rkey;
+
+ ret = ib_post_send(cb->qp, &write[scnt], &bad);
+ if (ret) {
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err2;
+ }
+ }
+
+ start = time_uptime;
+ DEBUG_LOG(cb, "%s starting IO.\n", __func__);
+ while (!cb->count || cb->server || count < cb->count) {
+ if ((time_uptime - start) >= 9) {
+ DEBUG_LOG(cb, "%s pausing 1 tick! count %u\n", __func__,
+ count);
+ wait_event_interruptible_timeout(cb->sem,
+ cb->state == ERROR,
+ 1);
+ if (cb->state == ERROR)
+ break;
+ start = time_uptime;
+ }
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n",
+ ret);
+ goto err2;
+ }
+ if (ret == 1) {
+ if (wc.status) {
+ PRINTF(cb,
+ "completion error %u wr_id %lld "
+ "opcode %d\n", wc.status,
+ wc.wr_id, wc.opcode);
+ goto err2;
+ }
+ count++;
+ if (count == (cb->count -1))
+ cb->rdma_sgl.lkey = 0x00dead;
+ if (count == cb->count)
+ break;
+ ib_update_fast_reg_key(mr[wc.wr_id], ++key);
+ fr[wc.wr_id].wr.fast_reg.rkey =
+ mr[wc.wr_id]->rkey;
+ inv[wc.wr_id].ex.invalidate_rkey =
+ mr[wc.wr_id]->rkey;
+ ret = ib_post_send(cb->qp, &write[wc.wr_id], &bad);
+ if (ret) {
+ PRINTF(cb,
+ "ib_post_send failed %d\n", ret);
+ goto err2;
+ }
+ } else if (krping_sigpending()){
+ PRINTF(cb, "signal!\n");
+ goto err2;
+ }
+ } while (ret == 1);
+ }
+ DEBUG_LOG(cb, "%s done!\n", __func__);
+err2:
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ DEBUG_LOG(cb, "draining the cq...\n");
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ break;
+ }
+ if (ret == 1) {
+ if (wc.status) {
+ PRINTF(cb, "completion error %u "
+ "opcode %u\n", wc.status, wc.opcode);
+ }
+ }
+ } while (ret == 1);
+
+ DEBUG_LOG(cb, "destroying fr mrs!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (mr[scnt]) {
+ ib_dereg_mr(mr[scnt]);
+ DEBUG_LOG(cb, "%s dereg mr %p\n", __func__, mr[scnt]);
+ }
+ }
+ DEBUG_LOG(cb, "unmapping/freeing bufs!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (buf[scnt]) {
+ dma_unmap_single(cb->pd->device->dma_device,
+ dma_addr[scnt], cb->size,
+ DMA_BIDIRECTIONAL);
+ kfree(buf[scnt]);
+ DEBUG_LOG(cb, "%s unmap/free buf %p dma_addr %p\n", __func__, buf[scnt], (void *)dma_addr[scnt]);
+ }
+ }
+ DEBUG_LOG(cb, "destroying fr page lists!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (pl[scnt]) {
+ DEBUG_LOG(cb, "%s free pl %p\n", __func__, pl[scnt]);
+ ib_free_fast_reg_page_list(pl[scnt]);
+ }
+ }
+err1:
+ if (pl)
+ kfree(pl);
+ if (mr)
+ kfree(mr);
+ if (fr)
+ kfree(fr);
+ if (write)
+ kfree(write);
+ if (inv)
+ kfree(inv);
+ if (sgl)
+ kfree(sgl);
+ if (buf)
+ kfree(buf);
+ if (dma_addr)
+ kfree(dma_addr);
+}
+
+static void krping_fr_test6_server(struct krping_cb *cb)
+{
+ struct ib_send_wr *bad_wr;
+ struct ib_wc wc;
+ int ret;
+
+ /* Spin waiting for client's Start STAG/TO/Len */
+ while (cb->state < RDMA_READ_ADV) {
+ krping_cq_event_handler(cb->cq, cb);
+ }
+ DEBUG_LOG(cb, "%s client STAG %x TO 0x%llx\n", __func__,
+ cb->remote_rkey, cb->remote_addr);
+
+ /* Send STAG/TO/Len to client */
+ krping_format_send(cb, cb->start_dma_addr);
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ if (ret) {
+ PRINTF(cb, "post send error %d\n", ret);
+ return;
+ }
+
+ /* Spin waiting for send completion */
+ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+ if (ret < 0) {
+ PRINTF(cb, "poll error %d\n", ret);
+ return;
+ }
+ if (wc.status) {
+ PRINTF(cb, "send completiong error %d\n", wc.status);
+ return;
+ }
+
+ if (cb->duplex)
+ krping_fr_test6(cb);
+ DEBUG_LOG(cb, "%s waiting for disconnect...\n", __func__);
+ wait_event_interruptible(cb->sem, cb->state == ERROR);
+}
+
+static void krping_fr_test6_client(struct krping_cb *cb)
+{
+ struct ib_send_wr *bad;
+ struct ib_wc wc;
+ int ret;
+
+ cb->state = RDMA_READ_ADV;
+
+ /* Send STAG/TO/Len to server */
+ krping_format_send(cb, cb->start_dma_addr);
+ if (cb->state == ERROR) {
+ PRINTF(cb, "krping_format_send failed\n");
+ return;
+ }
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad);
+ if (ret) {
+ PRINTF(cb, "post send error %d\n", ret);
+ return;
+ }
+
+ /* Spin waiting for send completion */
+ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+ if (ret < 0) {
+ PRINTF(cb, "poll error %d\n", ret);
+ return;
+ }
+ if (wc.status) {
+ PRINTF(cb, "send completion error %d\n", wc.status);
+ return;
+ }
+
+ /* Spin waiting for server's Start STAG/TO/Len */
+ while (cb->state < RDMA_WRITE_ADV) {
+ krping_cq_event_handler(cb->cq, cb);
+ }
+ DEBUG_LOG(cb, "%s server STAG %x TO 0x%llx\n", __func__, cb->remote_rkey, cb->remote_addr);
+
+ return krping_fr_test6(cb);
+}
+
+static void krping_run_server(struct krping_cb *cb)
+{
+ struct ib_recv_wr *bad_wr;
+ int ret;
+
+ ret = krping_bind_server(cb);
+ if (ret)
+ return;
+
+ ret = krping_setup_qp(cb, cb->child_cm_id);
+ if (ret) {
+ PRINTF(cb, "setup_qp failed: %d\n", ret);
+ goto err0;
+ }
+
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list