svn commit: r306661 - in stable/11/sys/dev/cxgbe: . tom
John Baldwin
jhb at FreeBSD.org
Mon Oct 3 23:15:45 UTC 2016
Author: jhb
Date: Mon Oct 3 23:15:44 2016
New Revision: 306661
URL: https://svnweb.freebsd.org/changeset/base/306661
Log:
MFC 303405: Add support for zero-copy aio_write() on TOE sockets.
AIO write requests for a TOE socket on a Chelsio T4+ adapter can now
DMA directly from the user-supplied buffer. This is implemented by
wiring the pages backing the user-supplied buffer and queueing special
mbufs backed by raw VM pages to the socket buffer. The TOE code
recognizes these special mbufs and builds a sglist from the VM page
array associated with the mbuf when queueing a work request to the TOE.
Because these mbufs do not have an associated virtual address, m_data
is not valid. Thus, the AIO handler does not invoke sosend() directly
for these mbufs but instead inlines portions of sosend_generic() and
tcp_usr_send().
An aiotx_buffer structure is used to describe the user buffer (e.g.
it holds the array of VM pages and a reference to the AIO job). The
special mbufs reference this structure via m_ext. Note that a single
job might be split across multiple mbufs (e.g. if it is larger than
the socket buffer size). The 'ext_arg2' member of each mbuf gives an
offset relative to the backing aiotx_buffer. The AIO job associated
with an aiotx_buffer structure is completed when the last reference to
the structure is released.
Zero-copy aio_write()'s for connections associated with a given
adapter can be enabled/disabled at runtime via the
'dev.t[45]nex.N.toe.tx_zcopy' sysctl.
Sponsored by: Chelsio Communications
Modified:
stable/11/sys/dev/cxgbe/offload.h
stable/11/sys/dev/cxgbe/t4_main.c
stable/11/sys/dev/cxgbe/tom/t4_cpl_io.c
stable/11/sys/dev/cxgbe/tom/t4_tom.c
stable/11/sys/dev/cxgbe/tom/t4_tom.h
Directory Properties:
stable/11/ (props changed)
Modified: stable/11/sys/dev/cxgbe/offload.h
==============================================================================
--- stable/11/sys/dev/cxgbe/offload.h Mon Oct 3 22:42:23 2016 (r306660)
+++ stable/11/sys/dev/cxgbe/offload.h Mon Oct 3 23:15:44 2016 (r306661)
@@ -147,6 +147,7 @@ struct tom_tunables {
int ddp;
int rx_coalesce;
int tx_align;
+ int tx_zcopy;
};
#ifdef TCP_OFFLOAD
Modified: stable/11/sys/dev/cxgbe/t4_main.c
==============================================================================
--- stable/11/sys/dev/cxgbe/t4_main.c Mon Oct 3 22:42:23 2016 (r306660)
+++ stable/11/sys/dev/cxgbe/t4_main.c Mon Oct 3 23:15:44 2016 (r306661)
@@ -4864,6 +4864,11 @@ t4_sysctls(struct adapter *sc)
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
+ sc->tt.tx_zcopy = 0;
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy",
+ CTLFLAG_RW, &sc->tt.tx_zcopy, 0,
+ "Enable zero-copy aio_write(2)");
+
SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick",
CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A",
"TP timer tick (us)");
Modified: stable/11/sys/dev/cxgbe/tom/t4_cpl_io.c
==============================================================================
--- stable/11/sys/dev/cxgbe/tom/t4_cpl_io.c Mon Oct 3 22:42:23 2016 (r306660)
+++ stable/11/sys/dev/cxgbe/tom/t4_cpl_io.c Mon Oct 3 23:15:44 2016 (r306661)
@@ -32,15 +32,18 @@ __FBSDID("$FreeBSD$");
#ifdef TCP_OFFLOAD
#include <sys/param.h>
-#include <sys/types.h>
+#include <sys/aio.h>
+#include <sys/file.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/module.h>
+#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/domain.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sglist.h>
+#include <sys/taskqueue.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/ip.h>
@@ -51,6 +54,14 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_var.h>
#include <netinet/toecore.h>
+#include <security/mac/mac_framework.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+
#include "common/common.h"
#include "common/t4_msg.h"
#include "common/t4_regs.h"
@@ -71,6 +82,34 @@ VNET_DECLARE(int, tcp_autorcvbuf_inc);
VNET_DECLARE(int, tcp_autorcvbuf_max);
#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
+#define IS_AIOTX_MBUF(m) \
+ ((m)->m_flags & M_EXT && (m)->m_ext.ext_flags & EXT_FLAG_AIOTX)
+
+static void t4_aiotx_cancel(struct kaiocb *job);
+static void t4_aiotx_queue_toep(struct toepcb *toep);
+
+static size_t
+aiotx_mbuf_pgoff(struct mbuf *m)
+{
+ struct aiotx_buffer *ab;
+
+ MPASS(IS_AIOTX_MBUF(m));
+ ab = m->m_ext.ext_arg1;
+ return ((ab->ps.offset + (uintptr_t)m->m_ext.ext_arg2) % PAGE_SIZE);
+}
+
+static vm_page_t *
+aiotx_mbuf_pages(struct mbuf *m)
+{
+ struct aiotx_buffer *ab;
+ int npages;
+
+ MPASS(IS_AIOTX_MBUF(m));
+ ab = m->m_ext.ext_arg1;
+ npages = (ab->ps.offset + (uintptr_t)m->m_ext.ext_arg2) / PAGE_SIZE;
+ return (ab->ps.pages + npages);
+}
+
void
send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp)
{
@@ -519,7 +558,11 @@ write_tx_sgl(void *dst, struct mbuf *sta
i = -1;
for (m = start; m != stop; m = m->m_next) {
- rc = sglist_append(&sg, mtod(m, void *), m->m_len);
+ if (IS_AIOTX_MBUF(m))
+ rc = sglist_append_vmpages(&sg, aiotx_mbuf_pages(m),
+ aiotx_mbuf_pgoff(m), m->m_len);
+ else
+ rc = sglist_append(&sg, mtod(m, void *), m->m_len);
if (__predict_false(rc != 0))
panic("%s: sglist_append %d", __func__, rc);
@@ -579,6 +622,7 @@ t4_push_frames(struct adapter *sc, struc
struct sockbuf *sb = &so->so_snd;
int tx_credits, shove, compl, sowwakeup;
struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
+ bool aiotx_mbuf_seen;
INP_WLOCK_ASSERT(inp);
KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
@@ -589,6 +633,10 @@ t4_push_frames(struct adapter *sc, struc
toep->ulp_mode == ULP_MODE_RDMA,
("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep));
+#ifdef VERBOSE_TRACES
+ CTR4(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d",
+ __func__, toep->tid, toep->flags, tp->t_flags);
+#endif
if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
return;
@@ -618,8 +666,15 @@ t4_push_frames(struct adapter *sc, struc
plen = 0;
nsegs = 0;
max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
+ aiotx_mbuf_seen = false;
for (m = sndptr; m != NULL; m = m->m_next) {
- int n = sglist_count(mtod(m, void *), m->m_len);
+ int n;
+
+ if (IS_AIOTX_MBUF(m))
+ n = sglist_count_vmpages(aiotx_mbuf_pages(m),
+ aiotx_mbuf_pgoff(m), m->m_len);
+ else
+ n = sglist_count(mtod(m, void *), m->m_len);
nsegs += n;
plen += m->m_len;
@@ -631,9 +686,13 @@ t4_push_frames(struct adapter *sc, struc
if (plen == 0) {
/* Too few credits */
toep->flags |= TPF_TX_SUSPENDED;
- if (sowwakeup)
+ if (sowwakeup) {
+ if (!TAILQ_EMPTY(
+ &toep->aiotx_jobq))
+ t4_aiotx_queue_toep(
+ toep);
sowwakeup_locked(so);
- else
+ } else
SOCKBUF_UNLOCK(sb);
SOCKBUF_UNLOCK_ASSERT(sb);
return;
@@ -641,6 +700,8 @@ t4_push_frames(struct adapter *sc, struc
break;
}
+ if (IS_AIOTX_MBUF(m))
+ aiotx_mbuf_seen = true;
if (max_nsegs_1mbuf < n)
max_nsegs_1mbuf = n;
sb_sndptr = m; /* new sb->sb_sndptr if all goes well */
@@ -670,9 +731,11 @@ t4_push_frames(struct adapter *sc, struc
else
sowwakeup = 1; /* room available */
}
- if (sowwakeup)
+ if (sowwakeup) {
+ if (!TAILQ_EMPTY(&toep->aiotx_jobq))
+ t4_aiotx_queue_toep(toep);
sowwakeup_locked(so);
- else
+ } else
SOCKBUF_UNLOCK(sb);
SOCKBUF_UNLOCK_ASSERT(sb);
@@ -687,7 +750,7 @@ t4_push_frames(struct adapter *sc, struc
panic("%s: excess tx.", __func__);
shove = m == NULL && !(tp->t_flags & TF_MORETOCOME);
- if (plen <= max_imm) {
+ if (plen <= max_imm && !aiotx_mbuf_seen) {
/* Immediate data tx */
@@ -1616,6 +1679,9 @@ do_fw4_ack(struct sge_iq *iq, const stru
}
}
+#ifdef VERBOSE_TRACES
+ CTR3(KTR_CXGBE, "%s: tid %d credits %u", __func__, tid, credits);
+#endif
so = inp->inp_socket;
txsd = &toep->txsd[toep->txsd_cidx];
plen = 0;
@@ -1642,6 +1708,10 @@ do_fw4_ack(struct sge_iq *iq, const stru
if (toep->flags & TPF_TX_SUSPENDED &&
toep->tx_credits >= toep->tx_total / 4) {
+#ifdef VERBOSE_TRACES
+ CTR2(KTR_CXGBE, "%s: tid %d calling t4_push_frames", __func__,
+ tid);
+#endif
toep->flags &= ~TPF_TX_SUSPENDED;
if (toep->ulp_mode == ULP_MODE_ISCSI)
t4_push_pdus(sc, toep, plen);
@@ -1668,7 +1738,13 @@ do_fw4_ack(struct sge_iq *iq, const stru
sowwakeup_locked(so); /* unlocks so_snd */
rqdrop_locked(&toep->ulp_pdu_reclaimq, plen);
} else {
+#ifdef VERBOSE_TRACES
+ CTR3(KTR_CXGBE, "%s: tid %d dropped %d bytes", __func__,
+ tid, plen);
+#endif
sbdrop_locked(sb, plen);
+ if (!TAILQ_EMPTY(&toep->aiotx_jobq))
+ t4_aiotx_queue_toep(toep);
sowwakeup_locked(so); /* unlocks so_snd */
}
SOCKBUF_UNLOCK_ASSERT(sb);
@@ -1768,4 +1844,397 @@ t4_uninit_cpl_io_handlers(void)
t4_register_cpl_handler(CPL_RX_DATA, do_rx_data);
t4_register_cpl_handler(CPL_FW4_ACK, do_fw4_ack);
}
+
+/*
+ * Use the 'backend3' field in AIO jobs to store the amount of data
+ * sent by the AIO job so far and the 'backend4' field to hold an
+ * error that should be reported when the job is completed.
+ */
+#define aio_sent backend3
+#define aio_error backend4
+
+#define jobtotid(job) \
+ (((struct toepcb *)(so_sototcpcb((job)->fd_file->f_data)->t_toe))->tid)
+
+static void
+free_aiotx_buffer(struct aiotx_buffer *ab)
+{
+ struct kaiocb *job;
+ long status;
+ int error;
+
+ if (refcount_release(&ab->refcount) == 0)
+ return;
+
+ job = ab->job;
+ error = job->aio_error;
+ status = job->aio_sent;
+ vm_page_unhold_pages(ab->ps.pages, ab->ps.npages);
+ free(ab, M_CXGBE);
+#ifdef VERBOSE_TRACES
+ CTR5(KTR_CXGBE, "%s: tid %d completed %p len %ld, error %d", __func__,
+ jobtotid(job), job, status, error);
+#endif
+ if (error == ECANCELED && status != 0)
+ error = 0;
+ if (error == ECANCELED)
+ aio_cancel(job);
+ else if (error)
+ aio_complete(job, -1, error);
+ else
+ aio_complete(job, status, 0);
+}
+
+static void
+t4_aiotx_mbuf_free(struct mbuf *m, void *buffer, void *arg)
+{
+ struct aiotx_buffer *ab = buffer;
+
+#ifdef VERBOSE_TRACES
+ CTR3(KTR_CXGBE, "%s: completed %d bytes for tid %d", __func__,
+ m->m_len, jobtotid(ab->job));
+#endif
+ free_aiotx_buffer(ab);
+}
+
+/*
+ * Hold the buffer backing an AIO request and return an AIO transmit
+ * buffer.
+ */
+static int
+hold_aio(struct kaiocb *job)
+{
+ struct aiotx_buffer *ab;
+ struct vmspace *vm;
+ vm_map_t map;
+ vm_offset_t start, end, pgoff;
+ int n;
+
+ MPASS(job->backend1 == NULL);
+
+ /*
+ * The AIO subsystem will cancel and drain all requests before
+ * permitting a process to exit or exec, so p_vmspace should
+ * be stable here.
+ */
+ vm = job->userproc->p_vmspace;
+ map = &vm->vm_map;
+ start = (uintptr_t)job->uaiocb.aio_buf;
+ pgoff = start & PAGE_MASK;
+ end = round_page(start + job->uaiocb.aio_nbytes);
+ start = trunc_page(start);
+ n = atop(end - start);
+
+ ab = malloc(sizeof(*ab) + n * sizeof(vm_page_t), M_CXGBE, M_WAITOK |
+ M_ZERO);
+ refcount_init(&ab->refcount, 1);
+ ab->ps.pages = (vm_page_t *)(ab + 1);
+ ab->ps.npages = vm_fault_quick_hold_pages(map, start, end - start,
+ VM_PROT_WRITE, ab->ps.pages, n);
+ if (ab->ps.npages < 0) {
+ free(ab, M_CXGBE);
+ return (EFAULT);
+ }
+
+ KASSERT(ab->ps.npages == n,
+ ("hold_aio: page count mismatch: %d vs %d", ab->ps.npages, n));
+
+ ab->ps.offset = pgoff;
+ ab->ps.len = job->uaiocb.aio_nbytes;
+ ab->job = job;
+ job->backend1 = ab;
+#ifdef VERBOSE_TRACES
+ CTR5(KTR_CXGBE, "%s: tid %d, new pageset %p for job %p, npages %d",
+ __func__, jobtotid(job), &ab->ps, job, ab->ps.npages);
+#endif
+ return (0);
+}
+
+static void
+t4_aiotx_process_job(struct toepcb *toep, struct socket *so, struct kaiocb *job)
+{
+ struct adapter *sc;
+ struct sockbuf *sb;
+ struct file *fp;
+ struct aiotx_buffer *ab;
+ struct inpcb *inp;
+ struct tcpcb *tp;
+ struct mbuf *m;
+ int error;
+ bool moretocome, sendmore;
+
+ sc = td_adapter(toep->td);
+ sb = &so->so_snd;
+ SOCKBUF_UNLOCK(sb);
+ fp = job->fd_file;
+ ab = job->backend1;
+ m = NULL;
+
+#ifdef MAC
+ error = mac_socket_check_send(fp->f_cred, so);
+ if (error != 0)
+ goto out;
+#endif
+
+ if (ab == NULL) {
+ error = hold_aio(job);
+ if (error != 0)
+ goto out;
+ ab = job->backend1;
+ }
+
+ /* Inline sosend_generic(). */
+
+ job->msgsnd = 1;
+
+ error = sblock(sb, SBL_WAIT);
+ MPASS(error == 0);
+
+sendanother:
+ m = m_get(M_WAITOK, MT_DATA);
+
+ SOCKBUF_LOCK(sb);
+ if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
+ SOCKBUF_UNLOCK(sb);
+ sbunlock(sb);
+ if ((so->so_options & SO_NOSIGPIPE) == 0) {
+ PROC_LOCK(job->userproc);
+ kern_psignal(job->userproc, SIGPIPE);
+ PROC_UNLOCK(job->userproc);
+ }
+ error = EPIPE;
+ goto out;
+ }
+ if (so->so_error) {
+ error = so->so_error;
+ so->so_error = 0;
+ SOCKBUF_UNLOCK(sb);
+ sbunlock(sb);
+ goto out;
+ }
+ if ((so->so_state & SS_ISCONNECTED) == 0) {
+ SOCKBUF_UNLOCK(sb);
+ sbunlock(sb);
+ error = ENOTCONN;
+ goto out;
+ }
+ if (sbspace(sb) < sb->sb_lowat) {
+ MPASS(job->aio_sent == 0 || !(so->so_state & SS_NBIO));
+
+ /*
+ * Don't block if there is too little room in the socket
+ * buffer. Instead, requeue the request.
+ */
+ if (!aio_set_cancel_function(job, t4_aiotx_cancel)) {
+ SOCKBUF_UNLOCK(sb);
+ sbunlock(sb);
+ error = ECANCELED;
+ goto out;
+ }
+ TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list);
+ SOCKBUF_UNLOCK(sb);
+ sbunlock(sb);
+ goto out;
+ }
+
+ /*
+ * Write as much data as the socket permits, but no more than a
+ * a single sndbuf at a time.
+ */
+ m->m_len = sbspace(sb);
+ if (m->m_len > ab->ps.len - job->aio_sent) {
+ m->m_len = ab->ps.len - job->aio_sent;
+ moretocome = false;
+ } else
+ moretocome = true;
+ if (m->m_len > sc->tt.sndbuf) {
+ m->m_len = sc->tt.sndbuf;
+ sendmore = true;
+ } else
+ sendmore = false;
+
+ if (!TAILQ_EMPTY(&toep->aiotx_jobq))
+ moretocome = true;
+ SOCKBUF_UNLOCK(sb);
+ MPASS(m->m_len != 0);
+
+ /* Inlined tcp_usr_send(). */
+
+ inp = toep->inp;
+ INP_WLOCK(inp);
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ INP_WUNLOCK(inp);
+ sbunlock(sb);
+ error = ECONNRESET;
+ goto out;
+ }
+
+ refcount_acquire(&ab->refcount);
+ m_extadd(m, NULL, ab->ps.len, t4_aiotx_mbuf_free, ab,
+ (void *)(uintptr_t)job->aio_sent, 0, EXT_NET_DRV);
+ m->m_ext.ext_flags |= EXT_FLAG_AIOTX;
+ job->aio_sent += m->m_len;
+
+ sbappendstream(sb, m, 0);
+ m = NULL;
+
+ if (!(inp->inp_flags & INP_DROPPED)) {
+ tp = intotcpcb(inp);
+ if (moretocome)
+ tp->t_flags |= TF_MORETOCOME;
+ error = tp->t_fb->tfb_tcp_output(tp);
+ if (moretocome)
+ tp->t_flags &= ~TF_MORETOCOME;
+ }
+
+ INP_WUNLOCK(inp);
+ if (sendmore)
+ goto sendanother;
+ sbunlock(sb);
+
+ if (error)
+ goto out;
+
+ /*
+ * If this is a non-blocking socket and the request has not
+ * been fully completed, requeue it until the socket is ready
+ * again.
+ */
+ if (job->aio_sent < job->uaiocb.aio_nbytes &&
+ !(so->so_state & SS_NBIO)) {
+ SOCKBUF_LOCK(sb);
+ if (!aio_set_cancel_function(job, t4_aiotx_cancel)) {
+ SOCKBUF_UNLOCK(sb);
+ error = ECANCELED;
+ goto out;
+ }
+ TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list);
+ return;
+ }
+
+ /*
+ * If the request will not be requeued, drop a reference on
+ * the the aiotx buffer. Any mbufs in flight should still
+ * contain a reference, but this drops the reference that the
+ * job owns while it is waiting to queue mbufs to the socket.
+ */
+ free_aiotx_buffer(ab);
+
+out:
+ if (error) {
+ if (ab != NULL) {
+ job->aio_error = error;
+ free_aiotx_buffer(ab);
+ } else {
+ MPASS(job->aio_sent == 0);
+ aio_complete(job, -1, error);
+ }
+ }
+ if (m != NULL)
+ m_free(m);
+ SOCKBUF_LOCK(sb);
+}
+
+static void
+t4_aiotx_task(void *context, int pending)
+{
+ struct toepcb *toep = context;
+ struct inpcb *inp = toep->inp;
+ struct socket *so = inp->inp_socket;
+ struct kaiocb *job;
+
+ CURVNET_SET(so->so_vnet);
+ SOCKBUF_LOCK(&so->so_snd);
+ while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) {
+ job = TAILQ_FIRST(&toep->aiotx_jobq);
+ TAILQ_REMOVE(&toep->aiotx_jobq, job, list);
+ if (!aio_clear_cancel_function(job))
+ continue;
+
+ t4_aiotx_process_job(toep, so, job);
+ }
+ toep->aiotx_task_active = false;
+ SOCKBUF_UNLOCK(&so->so_snd);
+ CURVNET_RESTORE();
+
+ free_toepcb(toep);
+}
+
+static void
+t4_aiotx_queue_toep(struct toepcb *toep)
+{
+
+ SOCKBUF_LOCK_ASSERT(&toep->inp->inp_socket->so_snd);
+#ifdef VERBOSE_TRACES
+ CTR3(KTR_CXGBE, "%s: queueing aiotx task for tid %d, active = %s",
+ __func__, toep->tid, toep->aiotx_task_active ? "true" : "false");
+#endif
+ if (toep->aiotx_task_active)
+ return;
+ toep->aiotx_task_active = true;
+ hold_toepcb(toep);
+ soaio_enqueue(&toep->aiotx_task);
+}
+
+static void
+t4_aiotx_cancel(struct kaiocb *job)
+{
+ struct aiotx_buffer *ab;
+ struct socket *so;
+ struct sockbuf *sb;
+ struct tcpcb *tp;
+ struct toepcb *toep;
+
+ so = job->fd_file->f_data;
+ tp = so_sototcpcb(so);
+ toep = tp->t_toe;
+ MPASS(job->uaiocb.aio_lio_opcode == LIO_WRITE);
+ sb = &so->so_snd;
+
+ SOCKBUF_LOCK(sb);
+ if (!aio_cancel_cleared(job))
+ TAILQ_REMOVE(&toep->aiotx_jobq, job, list);
+ SOCKBUF_UNLOCK(sb);
+
+ ab = job->backend1;
+ if (ab != NULL)
+ free_aiotx_buffer(ab);
+ else
+ aio_cancel(job);
+}
+
+int
+t4_aio_queue_aiotx(struct socket *so, struct kaiocb *job)
+{
+ struct tcpcb *tp = so_sototcpcb(so);
+ struct toepcb *toep = tp->t_toe;
+ struct adapter *sc = td_adapter(toep->td);
+
+ /* This only handles writes. */
+ if (job->uaiocb.aio_lio_opcode != LIO_WRITE)
+ return (EOPNOTSUPP);
+
+ if (!sc->tt.tx_zcopy)
+ return (EOPNOTSUPP);
+
+ SOCKBUF_LOCK(&so->so_snd);
+#ifdef VERBOSE_TRACES
+ CTR2(KTR_CXGBE, "%s: queueing %p", __func__, job);
+#endif
+ if (!aio_set_cancel_function(job, t4_aiotx_cancel))
+ panic("new job was cancelled");
+ TAILQ_INSERT_TAIL(&toep->aiotx_jobq, job, list);
+ if (sowriteable(so))
+ t4_aiotx_queue_toep(toep);
+ SOCKBUF_UNLOCK(&so->so_snd);
+ return (0);
+}
+
+void
+aiotx_init_toep(struct toepcb *toep)
+{
+
+ TAILQ_INIT(&toep->aiotx_jobq);
+ TASK_INIT(&toep->aiotx_task, 0, t4_aiotx_task, toep);
+}
#endif
Modified: stable/11/sys/dev/cxgbe/tom/t4_tom.c
==============================================================================
--- stable/11/sys/dev/cxgbe/tom/t4_tom.c Mon Oct 3 22:42:23 2016 (r306660)
+++ stable/11/sys/dev/cxgbe/tom/t4_tom.c Mon Oct 3 23:15:44 2016 (r306661)
@@ -68,11 +68,11 @@ __FBSDID("$FreeBSD$");
#include "tom/t4_tom_l2t.h"
#include "tom/t4_tom.h"
-static struct protosw ddp_protosw;
-static struct pr_usrreqs ddp_usrreqs;
+static struct protosw toe_protosw;
+static struct pr_usrreqs toe_usrreqs;
-static struct protosw ddp6_protosw;
-static struct pr_usrreqs ddp6_usrreqs;
+static struct protosw toe6_protosw;
+static struct pr_usrreqs toe6_usrreqs;
/* Module ops */
static int t4_tom_mod_load(void);
@@ -167,6 +167,7 @@ alloc_toepcb(struct vi_info *vi, int txq
toep->txsd_avail = txsd_total;
toep->txsd_pidx = 0;
toep->txsd_cidx = 0;
+ aiotx_init_toep(toep);
ddp_init_toep(toep);
return (toep);
@@ -217,12 +218,10 @@ offload_socket(struct socket *so, struct
sb = &so->so_rcv;
SOCKBUF_LOCK(sb);
sb->sb_flags |= SB_NOCOALESCE;
- if (toep->ulp_mode == ULP_MODE_TCPDDP) {
- if (inp->inp_vflag & INP_IPV6)
- so->so_proto = &ddp6_protosw;
- else
- so->so_proto = &ddp_protosw;
- }
+ if (inp->inp_vflag & INP_IPV6)
+ so->so_proto = &toe6_protosw;
+ else
+ so->so_proto = &toe_protosw;
SOCKBUF_UNLOCK(sb);
/* Update TCP PCB */
@@ -1120,6 +1119,22 @@ t4_tom_ifaddr_event(void *arg __unused,
}
static int
+t4_aio_queue_tom(struct socket *so, struct kaiocb *job)
+{
+ struct tcpcb *tp = so_sototcpcb(so);
+ struct toepcb *toep = tp->t_toe;
+ int error;
+
+ if (toep->ulp_mode == ULP_MODE_TCPDDP) {
+ error = t4_aio_queue_ddp(so, job);
+ if (error != EOPNOTSUPP)
+ return (error);
+ }
+
+ return (t4_aio_queue_aiotx(so, job));
+}
+
+static int
t4_tom_mod_load(void)
{
int rc;
@@ -1137,18 +1152,18 @@ t4_tom_mod_load(void)
tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM);
if (tcp_protosw == NULL)
return (ENOPROTOOPT);
- bcopy(tcp_protosw, &ddp_protosw, sizeof(ddp_protosw));
- bcopy(tcp_protosw->pr_usrreqs, &ddp_usrreqs, sizeof(ddp_usrreqs));
- ddp_usrreqs.pru_aio_queue = t4_aio_queue_ddp;
- ddp_protosw.pr_usrreqs = &ddp_usrreqs;
+ bcopy(tcp_protosw, &toe_protosw, sizeof(toe_protosw));
+ bcopy(tcp_protosw->pr_usrreqs, &toe_usrreqs, sizeof(toe_usrreqs));
+ toe_usrreqs.pru_aio_queue = t4_aio_queue_tom;
+ toe_protosw.pr_usrreqs = &toe_usrreqs;
tcp6_protosw = pffindproto(PF_INET6, IPPROTO_TCP, SOCK_STREAM);
if (tcp6_protosw == NULL)
return (ENOPROTOOPT);
- bcopy(tcp6_protosw, &ddp6_protosw, sizeof(ddp6_protosw));
- bcopy(tcp6_protosw->pr_usrreqs, &ddp6_usrreqs, sizeof(ddp6_usrreqs));
- ddp6_usrreqs.pru_aio_queue = t4_aio_queue_ddp;
- ddp6_protosw.pr_usrreqs = &ddp6_usrreqs;
+ bcopy(tcp6_protosw, &toe6_protosw, sizeof(toe6_protosw));
+ bcopy(tcp6_protosw->pr_usrreqs, &toe6_usrreqs, sizeof(toe6_usrreqs));
+ toe6_usrreqs.pru_aio_queue = t4_aio_queue_tom;
+ toe6_protosw.pr_usrreqs = &toe6_usrreqs;
TIMEOUT_TASK_INIT(taskqueue_thread, &clip_task, 0, t4_clip_task, NULL);
ifaddr_evhandler = EVENTHANDLER_REGISTER(ifaddr_event,
Modified: stable/11/sys/dev/cxgbe/tom/t4_tom.h
==============================================================================
--- stable/11/sys/dev/cxgbe/tom/t4_tom.h Mon Oct 3 22:42:23 2016 (r306660)
+++ stable/11/sys/dev/cxgbe/tom/t4_tom.h Mon Oct 3 23:15:44 2016 (r306661)
@@ -102,6 +102,8 @@ TAILQ_HEAD(pagesetq, pageset);
#define PS_WIRED 0x0001 /* Pages wired rather than held. */
#define PS_PPODS_WRITTEN 0x0002 /* Page pods written to the card. */
+#define EXT_FLAG_AIOTX EXT_FLAG_VENDOR1
+
struct ddp_buffer {
struct pageset *ps;
@@ -109,6 +111,12 @@ struct ddp_buffer {
int cancel_pending;
};
+struct aiotx_buffer {
+ struct pageset ps;
+ struct kaiocb *job;
+ int refcount;
+};
+
struct toepcb {
TAILQ_ENTRY(toepcb) link; /* toep_list */
u_int flags; /* miscellaneous flags */
@@ -151,6 +159,10 @@ struct toepcb {
struct kaiocb *ddp_queueing;
struct mtx ddp_lock;
+ TAILQ_HEAD(, kaiocb) aiotx_jobq;
+ struct task aiotx_task;
+ bool aiotx_task_active;
+
/* Tx software descriptor */
uint8_t txsd_total;
uint8_t txsd_pidx;
@@ -313,6 +325,8 @@ int do_abort_rpl_synqe(struct sge_iq *,
void t4_offload_socket(struct toedev *, void *, struct socket *);
/* t4_cpl_io.c */
+void aiotx_init_toep(struct toepcb *);
+int t4_aio_queue_aiotx(struct socket *, struct kaiocb *);
void t4_init_cpl_io_handlers(void);
void t4_uninit_cpl_io_handlers(void);
void send_abort_rpl(struct adapter *, struct sge_wrq *, int , int);
More information about the svn-src-all
mailing list