git: a7444f807ec4 - main - unix/dgram: use minimal possible socket buffer for PF_UNIX/SOCK_DGRAM
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 24 Jun 2022 16:10:36 UTC
The branch main has been updated by glebius: URL: https://cgit.FreeBSD.org/src/commit/?id=a7444f807ec44ec5dc4db59b155982ae5b2970b0 commit a7444f807ec44ec5dc4db59b155982ae5b2970b0 Author: Gleb Smirnoff <glebius@FreeBSD.org> AuthorDate: 2022-06-24 16:09:11 +0000 Commit: Gleb Smirnoff <glebius@FreeBSD.org> CommitDate: 2022-06-24 16:09:11 +0000 unix/dgram: use minimal possible socket buffer for PF_UNIX/SOCK_DGRAM This change fully splits away PF_UNIX/SOCK_DGRAM from other socket buffer implementations, without any behavior changes. Generic socket implementation is reduced down to one STAILQ and very little code. Reviewed by: markj Differential revision: https://reviews.freebsd.org/D35300 --- sys/kern/uipc_usrreq.c | 202 ++++++++++++++++++++++++++----------------------- sys/sys/sockbuf.h | 8 ++ 2 files changed, 115 insertions(+), 95 deletions(-) diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 594cc35af57d..553d0293770c 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -434,7 +434,8 @@ static struct protosw localsw[] = { { .pr_type = SOCK_DGRAM, .pr_domain = &localdomain, - .pr_flags = PR_ATOMIC|PR_ADDR|PR_RIGHTS|PR_CAPATTACH, + .pr_flags = PR_ATOMIC | PR_ADDR |PR_RIGHTS | PR_CAPATTACH | + PR_SOCKBUF, .pr_ctloutput = &uipc_ctloutput, .pr_usrreqs = &uipc_usrreqs_dgram }, @@ -528,6 +529,7 @@ uipc_attach(struct socket *so, int proto, struct thread *td) break; case SOCK_DGRAM: + STAILQ_INIT(&so->so_rcv.uxdg_mb); sendspace = unpdg_maxdgram; recvspace = unpdg_recvspace; break; @@ -850,6 +852,14 @@ uipc_detach(struct socket *so) } if (local_unp_rights) taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1); + + switch (so->so_type) { + case SOCK_DGRAM: + /* + * Everything should have been unlinked/freed by unp_dispose(). + */ + MPASS(STAILQ_EMPTY(&so->so_rcv.uxdg_mb)); + } } static int @@ -1130,8 +1140,9 @@ uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, const struct sockaddr *from; struct socket *so2; struct sockbuf *sb; - struct mbuf *f, *clast; - int cc, error; + struct mbuf *f; + u_int cc; + int error; MPASS((uio != NULL && m == NULL) || (m != NULL && uio == NULL)); @@ -1193,7 +1204,7 @@ uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags)); if (error) goto out2; - SOCKBUF_LOCK(&so->so_snd); + SOCK_SENDBUF_LOCK(so); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { SOCK_SENDBUF_UNLOCK(so); error = EPIPE; @@ -1202,15 +1213,15 @@ uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, if (so->so_error != 0) { error = so->so_error; so->so_error = 0; - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); goto out3; } if (((so->so_state & SS_ISCONNECTED) == 0) && addr == NULL) { - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); error = EDESTADDRREQ; goto out3; } - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); if (addr != NULL) { if ((error = unp_connectat(AT_FDCWD, so, addr, td, true))) @@ -1238,34 +1249,35 @@ uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, MPASS(from->sa_len <= MLEN); bcopy(from, mtod(f, void *), from->sa_len); cc = f->m_len + m->m_pkthdr.len; - if (c != NULL) + + /* Concatenate: from -> control -> data. */ + if (c != NULL) { + struct mbuf *clast; + cc += m_length(c, &clast); + f->m_next = c; + clast->m_next = m; + c = NULL; + } else + f->m_next = m; + m = NULL; + so2 = unp2->unp_socket; sb = &so2->so_rcv; - SOCKBUF_LOCK(sb); + SOCK_RECVBUF_LOCK(so2); if (cc <= sbspace(sb)) { - /* Concatenate: from -> control -> data. */ - if (c != NULL) { - f->m_next = c; - clast->m_next = m; - } else - f->m_next = m; - m = f; - /* Reusing f as iterator. */ - for (f = m; f->m_next != NULL; f = f->m_next) - sballoc(sb, f); - sballoc(sb, f); - sb->sb_mbtail = f; - /* SBLINKRECORD */ - if (sb->sb_lastrecord != NULL) - sb->sb_lastrecord->m_nextpkt = m; - else - sb->sb_mb = m; - sb->sb_lastrecord = m; - SBLASTMBUFCHK(sb); - SBLASTRECORDCHK(sb); + STAILQ_INSERT_TAIL(&sb->uxdg_mb, f, m_stailqpkt); + /* XXX: would be nice if m_uiotombuf() returns count. */ + for (; f != NULL ; f = f->m_next) { + if (f->m_type != MT_DATA) + sb->sb_ctl += f->m_len; + sb->sb_mbcnt += MSIZE; + if (f->m_flags & M_EXT) + sb->sb_mbcnt += f->m_ext.ext_size; + } + sb->sb_acc += cc; + sb->sb_ccc += cc; sorwakeup_locked(so2); - f = m = c = NULL; } else { soroverflow_locked(so2); error = (so->so_state & SS_NBIO) ? EAGAIN : ENOBUFS; @@ -1285,7 +1297,7 @@ out2: unp_scan(c, unp_freerights); out: if (f) - m_free(f); + m_freem(f); if (c) m_freem(c); if (m) @@ -1305,18 +1317,15 @@ uipc_peek_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, ssize_t len; int error; - SOCKBUF_UNLOCK(&so->so_rcv); + SOCK_RECVBUF_UNLOCK(so); - m = so->so_rcv.sb_mb; + m = STAILQ_FIRST(&so->so_rcv.uxdg_mb); KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type)); if (psa != NULL) *psa = sodupsockaddr(mtod(m, struct sockaddr *), M_WAITOK); - if ((m = m->m_next) == NULL) { - /* XXXRW: Can this happen? */ - SOCK_IO_RECV_UNLOCK(so); - return (0); - } + m = m->m_next; + KASSERT(m, ("%s: no data or control after soname", __func__)); /* * With MSG_PEEK the control isn't executed, just copied. @@ -1381,82 +1390,60 @@ uipc_soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, * Loop blocking while waiting for a datagram. */ SOCK_RECVBUF_LOCK(so); - while ((m = so->so_rcv.sb_mb) == NULL) { + while ((m = STAILQ_FIRST(&so->so_rcv.uxdg_mb)) == NULL) { KASSERT(sbavail(&so->so_rcv) == 0, ("soreceive_dgram: sb_mb NULL but sbavail %u", sbavail(&so->so_rcv))); if (so->so_error) { error = so->so_error; so->so_error = 0; - SOCKBUF_UNLOCK(&so->so_rcv); + SOCK_RECVBUF_UNLOCK(so); SOCK_IO_RECV_UNLOCK(so); return (error); } if (so->so_rcv.sb_state & SBS_CANTRCVMORE || uio->uio_resid == 0) { - SOCKBUF_UNLOCK(&so->so_rcv); + SOCK_RECVBUF_UNLOCK(so); SOCK_IO_RECV_UNLOCK(so); return (0); } if (nonblock) { - SOCKBUF_UNLOCK(&so->so_rcv); + SOCK_RECVBUF_UNLOCK(so); SOCK_IO_RECV_UNLOCK(so); return (EWOULDBLOCK); } - SBLASTRECORDCHK(&so->so_rcv); - SBLASTMBUFCHK(&so->so_rcv); error = sbwait(so, SO_RCV); if (error) { - SOCKBUF_UNLOCK(&so->so_rcv); + SOCK_RECVBUF_UNLOCK(so); SOCK_IO_RECV_UNLOCK(so); return (error); } } - SOCKBUF_LOCK_ASSERT(&so->so_rcv); + SOCK_RECVBUF_LOCK_ASSERT(so); if (uio->uio_td) uio->uio_td->td_ru.ru_msgrcv++; - SBLASTRECORDCHK(&so->so_rcv); - SBLASTMBUFCHK(&so->so_rcv); if (__predict_false(flags & MSG_PEEK)) return (uipc_peek_dgram(so, psa, uio, controlp, flagsp)); - /* - * Advance the sb_mb, update sb_lastrecord if necessary. - */ - so->so_rcv.sb_mb = m->m_nextpkt; - if (so->so_rcv.sb_mb == NULL) { - KASSERT(so->so_rcv.sb_lastrecord == m, - ("%s: lastrecord != m", __func__)); - so->so_rcv.sb_lastrecord = NULL; - so->so_rcv.sb_mbtail = NULL; - } else if (so->so_rcv.sb_mb->m_nextpkt == NULL) - so->so_rcv.sb_lastrecord = so->so_rcv.sb_mb; - - /* - * Walk 'm's chain and free that many bytes from the socket buffer. - */ - for (m2 = m; m2 != NULL; m2 = m2->m_next) - sbfree(&so->so_rcv, m2); - - /* - * Do a few last checks before we let go of the lock. - */ - SBLASTRECORDCHK(&so->so_rcv); - SBLASTMBUFCHK(&so->so_rcv); - SOCKBUF_UNLOCK(&so->so_rcv); + STAILQ_REMOVE_HEAD(&so->so_rcv.uxdg_mb, m_stailqpkt); + for (m2 = m; m2 != NULL; m2 = m2->m_next) { + if (m2->m_type != MT_DATA) + so->so_rcv.sb_ctl -= m2->m_len; + so->so_rcv.sb_acc -= m2->m_len; + so->so_rcv.sb_ccc -= m2->m_len; + so->so_rcv.sb_mbcnt -= MSIZE; + if (m2->m_flags & M_EXT) + so->so_rcv.sb_mbcnt -= m2->m_ext.ext_size; + } + SOCK_RECVBUF_UNLOCK(so); KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type)); if (psa != NULL) *psa = sodupsockaddr(mtod(m, struct sockaddr *), M_WAITOK); m = m_free(m); - - if (m == NULL) { - /* XXXRW: Can this happen? */ - SOCK_IO_RECV_UNLOCK(so); - return (0); - } + KASSERT(m, ("%s: no data or control after soname", __func__)); /* * Packet to copyout() is now in 'm' and it is disconnected from the @@ -2915,6 +2902,28 @@ unp_restore_undead_ref(struct filedescent **fdep, int fdcount) } } +static void +unp_scan_socket(struct socket *so, void (*op)(struct filedescent **, int)) +{ + + SOCK_LOCK_ASSERT(so); + + if (sotounpcb(so)->unp_gcflag & UNPGC_IGNORE_RIGHTS) + return; + + SOCK_RECVBUF_LOCK(so); + switch (so->so_type) { + case SOCK_DGRAM: + unp_scan(STAILQ_FIRST(&so->so_rcv.uxdg_mb), op); + break; + case SOCK_STREAM: + case SOCK_SEQPACKET: + unp_scan(so->so_rcv.sb_mb, op); + break; + } + SOCK_RECVBUF_UNLOCK(so); +} + static void unp_gc_scan(struct unpcb *unp, void (*op)(struct filedescent **, int)) { @@ -2926,22 +2935,13 @@ unp_gc_scan(struct unpcb *unp, void (*op)(struct filedescent **, int)) /* * Mark all sockets in our accept queue. */ - TAILQ_FOREACH(soa, &so->sol_comp, so_list) { - if (sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) - continue; - SOCKBUF_LOCK(&soa->so_rcv); - unp_scan(soa->so_rcv.sb_mb, op); - SOCKBUF_UNLOCK(&soa->so_rcv); - } + TAILQ_FOREACH(soa, &so->sol_comp, so_list) + unp_scan_socket(soa, op); } else { /* * Mark all sockets we reference with RIGHTS. */ - if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) { - SOCKBUF_LOCK(&so->so_rcv); - unp_scan(so->so_rcv.sb_mb, op); - SOCKBUF_UNLOCK(&so->so_rcv); - } + unp_scan_socket(so, op); } SOCK_UNLOCK(so); } @@ -3108,11 +3108,23 @@ unp_dispose(struct socket *so) * Grab our special mbufs before calling sbrelease(). */ SOCK_RECVBUF_LOCK(so); - m = sbcut_locked(sb, sb->sb_ccc); - KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0, - ("%s: ccc %u mb %p mbcnt %u", __func__, - sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt)); - sbrelease_locked(so, SO_RCV); + switch (so->so_type) { + case SOCK_DGRAM: + m = STAILQ_FIRST(&sb->uxdg_mb); + STAILQ_INIT(&sb->uxdg_mb); + /* XXX: our shortened sbrelease() */ + (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, + RLIM_INFINITY); + break; + case SOCK_STREAM: + case SOCK_SEQPACKET: + m = sbcut_locked(sb, sb->sb_ccc); + KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0, + ("%s: ccc %u mb %p mbcnt %u", __func__, + sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt)); + sbrelease_locked(so, SO_RCV); + break; + } SOCK_RECVBUF_UNLOCK(so); if (SOCK_IO_RECV_OWNED(so)) SOCK_IO_RECV_UNLOCK(so); diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h index 7800b2790c04..a1fd65d1a9e5 100644 --- a/sys/sys/sockbuf.h +++ b/sys/sys/sockbuf.h @@ -133,6 +133,14 @@ struct sockbuf { uint64_t sb_tls_seqno; /* TLS seqno */ struct ktls_session *sb_tls_info; /* TLS state */ }; + /* + * PF_UNIX/SOCK_DGRAM + * + * Local protocol, thus any socket buffer is a receive buffer. + */ + struct { + STAILQ_HEAD(, mbuf) uxdg_mb; + }; }; };