git: 1093f16487a9 - main - unix/dgram: reduce mbuf chain traversals in send(2) and recv(2)
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 24 Jun 2022 16:10:38 UTC
The branch main has been updated by glebius: URL: https://cgit.FreeBSD.org/src/commit/?id=1093f16487a93c3ecdea910ca7249375873969da commit 1093f16487a93c3ecdea910ca7249375873969da Author: Gleb Smirnoff <glebius@FreeBSD.org> AuthorDate: 2022-06-24 16:09:11 +0000 Commit: Gleb Smirnoff <glebius@FreeBSD.org> CommitDate: 2022-06-24 16:09:11 +0000 unix/dgram: reduce mbuf chain traversals in send(2) and recv(2) o Use m_pkthdr.memlen from m_uiotombuf() o Modify unp_internalize() to keep track of allocated space and memory as well as pointer to the last buffer. o Modify unp_addsockcred() to keep track of allocated space and memory as well as pointer to the last buffer. o Record the datagram len/memlen/ctllen in the first (from) mbuf of the chain in uipc_sosend_dgram() and reuse it in uipc_soreceive_dgram(). Reviewed by: markj Differential revision: https://reviews.freebsd.org/D35302 --- sys/kern/uipc_usrreq.c | 151 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 113 insertions(+), 38 deletions(-) diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index d1c87865c632..05fde7675b9f 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -302,11 +302,13 @@ static void unp_gc(__unused void *, int); static void unp_scan(struct mbuf *, void (*)(struct filedescent **, int)); static void unp_discard(struct file *); static void unp_freerights(struct filedescent **, int); -static int unp_internalize(struct mbuf **, struct thread *); +static int unp_internalize(struct mbuf **, struct thread *, + struct mbuf **, u_int *, u_int *); static void unp_internalize_fp(struct file *); static int unp_externalize(struct mbuf *, struct mbuf **, int); static int unp_externalize_fp(struct file *); -static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *, int); +static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *, + int, struct mbuf **, u_int *, u_int *); static void unp_process_defers(void * __unused, int); static void @@ -1014,7 +1016,8 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, error = EOPNOTSUPP; goto release; } - if (control != NULL && (error = unp_internalize(&control, td))) + if (control != NULL && + (error = unp_internalize(&control, td, NULL, NULL, NULL))) goto release; unp2 = NULL; @@ -1051,7 +1054,8 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, * SOCK_SEQPACKET (LOCAL_CREDS => WANTCRED_ONESHOT), or * forever (LOCAL_CREDS_PERSISTENT => WANTCRED_ALWAYS). */ - control = unp_addsockcred(td, control, unp2->unp_flags); + control = unp_addsockcred(td, control, unp2->unp_flags, NULL, + NULL, NULL); unp2->unp_flags &= ~UNP_WANTCRED_ONESHOT; } @@ -1131,7 +1135,13 @@ release: * * Allocate a record consisting of 3 mbufs in the sequence of * from -> control -> data and append it to the socket buffer. + * + * The first mbuf carries sender's name and is a pkthdr that stores + * overall length of datagram, its memory consumption and control length. */ +#define ctllen PH_loc.thirtytwo[1] +_Static_assert(offsetof(struct pkthdr, memlen) + sizeof(u_int) <= + offsetof(struct pkthdr, ctllen), "unix/dgram can not store ctllen"); static int uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *m, struct mbuf *c, int flags, struct thread *td) @@ -1140,14 +1150,16 @@ uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, const struct sockaddr *from; struct socket *so2; struct sockbuf *sb; - struct mbuf *f; - u_int cc; + struct mbuf *f, *clast; + u_int cc, ctl, mbcnt; + u_int dcc __diagused, dctl __diagused, dmbcnt __diagused; int error; MPASS((uio != NULL && m == NULL) || (m != NULL && uio == NULL)); error = 0; f = NULL; + ctl = 0; if (__predict_false(flags & MSG_OOB)) { error = EOPNOTSUPP; @@ -1163,8 +1175,11 @@ uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, error = EFAULT; goto out; } - f = m_get(M_WAITOK, MT_SONAME); - if (c != NULL && (error = unp_internalize(&c, td))) + f = m_gethdr(M_WAITOK, MT_SONAME); + cc = m->m_pkthdr.len; + mbcnt = MSIZE + m->m_pkthdr.memlen; + if (c != NULL && + (error = unp_internalize(&c, td, &clast, &ctl, &mbcnt))) goto out; } else { /* pru_sosend() with mbuf usually is a kernel thread. */ @@ -1177,7 +1192,7 @@ uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, error = EMSGSIZE; goto out; } - if ((f = m_get(M_NOWAIT, MT_SONAME)) == NULL) { + if ((f = m_gethdr(M_NOWAIT, MT_SONAME)) == NULL) { error = ENOBUFS; goto out; } @@ -1189,6 +1204,14 @@ uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, m->m_pkthdr.csum_flags = 0; m->m_pkthdr.fibnum = 0; m->m_pkthdr.rsstype = 0; + + cc = m->m_pkthdr.len; + mbcnt = MSIZE; + for (struct mbuf *mb = m; mb != NULL; mb = mb->m_next) { + mbcnt += MSIZE; + if (mb->m_flags & M_EXT) + mbcnt += mb->m_ext.ext_size; + } } unp = sotounpcb(so); @@ -1240,7 +1263,8 @@ uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, } if (unp2->unp_flags & UNP_WANTCRED_MASK) - c = unp_addsockcred(td, c, unp2->unp_flags); + c = unp_addsockcred(td, c, unp2->unp_flags, &clast, &ctl, + &mbcnt); if (unp->unp_addr != NULL) from = (struct sockaddr *)unp->unp_addr; else @@ -1248,36 +1272,55 @@ uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, f->m_len = from->sa_len; MPASS(from->sa_len <= MLEN); bcopy(from, mtod(f, void *), from->sa_len); - cc = f->m_len + m->m_pkthdr.len; + ctl += f->m_len; - /* Concatenate: from -> control -> data. */ + /* + * Concatenate mbufs: from -> control -> data. + * Save overall cc and mbcnt in "from" mbuf. + */ if (c != NULL) { - struct mbuf *clast; +#ifdef INVARIANTS + struct mbuf *mc; - cc += m_length(c, &clast); + for (mc = c; mc->m_next != NULL; mc = mc->m_next); + MPASS(mc == clast); +#endif f->m_next = c; clast->m_next = m; c = NULL; } else f->m_next = m; m = NULL; +#ifdef INVARIANTS + dcc = dctl = dmbcnt = 0; + for (struct mbuf *mb = f; mb != NULL; mb = mb->m_next) { + if (mb->m_type == MT_DATA) + dcc += mb->m_len; + else + dctl += mb->m_len; + dmbcnt += MSIZE; + if (mb->m_flags & M_EXT) + dmbcnt += mb->m_ext.ext_size; + } + MPASS(dcc == cc); + MPASS(dctl == ctl); + MPASS(dmbcnt == mbcnt); +#endif + f->m_pkthdr.len = cc + ctl; + f->m_pkthdr.memlen = mbcnt; + f->m_pkthdr.ctllen = ctl; so2 = unp2->unp_socket; sb = &so2->so_rcv; SOCK_RECVBUF_LOCK(so2); if (cc <= sbspace(sb)) { STAILQ_INSERT_TAIL(&sb->uxdg_mb, f, m_stailqpkt); - /* XXX: would be nice if m_uiotombuf() returns count. */ - for (; f != NULL; f = f->m_next) { - if (f->m_type != MT_DATA) - sb->sb_ctl += f->m_len; - sb->sb_mbcnt += MSIZE; - if (f->m_flags & M_EXT) - sb->sb_mbcnt += f->m_ext.ext_size; - } - sb->sb_acc += cc; - sb->sb_ccc += cc; + sb->sb_acc += cc + ctl; + sb->sb_ccc += cc + ctl; + sb->sb_ctl += ctl; + sb->sb_mbcnt += mbcnt; sorwakeup_locked(so2); + f = NULL; } else { soroverflow_locked(so2); error = (so->so_state & SS_NBIO) ? EAGAIN : ENOBUFS; @@ -1366,7 +1409,7 @@ static int uipc_soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { - struct mbuf *m, *m2; + struct mbuf *m; int flags, error; ssize_t len; bool nonblock; @@ -1419,7 +1462,9 @@ uipc_soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, return (error); } } - SOCK_RECVBUF_LOCK_ASSERT(so); + + M_ASSERTPKTHDR(m); + KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type)); if (uio->uio_td) uio->uio_td->td_ru.ru_msgrcv++; @@ -1428,18 +1473,12 @@ uipc_soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, return (uipc_peek_dgram(so, psa, uio, controlp, flagsp)); STAILQ_REMOVE_HEAD(&so->so_rcv.uxdg_mb, m_stailqpkt); - for (m2 = m; m2 != NULL; m2 = m2->m_next) { - if (m2->m_type != MT_DATA) - so->so_rcv.sb_ctl -= m2->m_len; - so->so_rcv.sb_acc -= m2->m_len; - so->so_rcv.sb_ccc -= m2->m_len; - so->so_rcv.sb_mbcnt -= MSIZE; - if (m2->m_flags & M_EXT) - so->so_rcv.sb_mbcnt -= m2->m_ext.ext_size; - } + so->so_rcv.sb_acc -= m->m_pkthdr.len; + so->so_rcv.sb_ccc -= m->m_pkthdr.len; + so->so_rcv.sb_ctl -= m->m_pkthdr.ctllen; + so->so_rcv.sb_mbcnt -= m->m_pkthdr.memlen; SOCK_RECVBUF_UNLOCK(so); - KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type)); if (psa != NULL) *psa = sodupsockaddr(mtod(m, struct sockaddr *), M_WAITOK); m = m_free(m); @@ -2510,7 +2549,8 @@ unp_internalize_cleanup_rights(struct mbuf *control) } static int -unp_internalize(struct mbuf **controlp, struct thread *td) +unp_internalize(struct mbuf **controlp, struct thread *td, + struct mbuf **clast, u_int *space, u_int *mbcnt) { struct mbuf *control, **initial_controlp; struct proc *p; @@ -2527,6 +2567,7 @@ unp_internalize(struct mbuf **controlp, struct thread *td) int i, j, error, *fdp, oldfds; u_int newlen; + MPASS((*controlp)->m_next == NULL); /* COMPAT_OLDSOCK may violate */ UNP_LINK_UNLOCK_ASSERT(); p = td->td_proc; @@ -2672,6 +2713,13 @@ unp_internalize(struct mbuf **controlp, struct thread *td) goto out; } + if (space != NULL) { + *space += (*controlp)->m_len; + *mbcnt += MSIZE; + if ((*controlp)->m_flags & M_EXT) + *mbcnt += (*controlp)->m_ext.ext_size; + *clast = *controlp; + } controlp = &(*controlp)->m_next; } if (clen > 0) @@ -2685,7 +2733,8 @@ out: } static struct mbuf * -unp_addsockcred(struct thread *td, struct mbuf *control, int mode) +unp_addsockcred(struct thread *td, struct mbuf *control, int mode, + struct mbuf **clast, u_int *space, u_int *mbcnt) { struct mbuf *m, *n, *n_prev; const struct cmsghdr *cm; @@ -2704,6 +2753,7 @@ unp_addsockcred(struct thread *td, struct mbuf *control, int mode) m = sbcreatecontrol(NULL, ctrlsz, cmsgtype, SOL_SOCKET, M_NOWAIT); if (m == NULL) return (control); + MPASS((m->m_flags & M_EXT) == 0 && m->m_next == NULL); if (mode & UNP_WANTCRED_ALWAYS) { struct sockcred2 *sc; @@ -2745,6 +2795,25 @@ unp_addsockcred(struct thread *td, struct mbuf *control, int mode) control = n->m_next; else n_prev->m_next = n->m_next; + if (space != NULL) { + MPASS(*space >= n->m_len); + *space -= n->m_len; + MPASS(*mbcnt >= MSIZE); + *mbcnt -= MSIZE; + if (n->m_flags & M_EXT) { + MPASS(*mbcnt >= + n->m_ext.ext_size); + *mbcnt -= n->m_ext.ext_size; + } + MPASS(clast); + if (*clast == n) { + MPASS(n->m_next == NULL); + if (n_prev == NULL) + *clast = m; + else + *clast = n_prev; + } + } n = m_free(n); } else { n_prev = n; @@ -2754,6 +2823,12 @@ unp_addsockcred(struct thread *td, struct mbuf *control, int mode) /* Prepend it to the head. */ m->m_next = control; + if (space != NULL) { + *space += m->m_len; + *mbcnt += MSIZE; + if (control == NULL) + *clast = m; + } return (m); }