git: 4328318445ae - main - sockets: use socket buffer mutexes in struct socket directly
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 12 May 2022 20:22:51 UTC
The branch main has been updated by glebius: URL: https://cgit.FreeBSD.org/src/commit/?id=4328318445aec29c87f33ebfa11f5253a73a7896 commit 4328318445aec29c87f33ebfa11f5253a73a7896 Author: Gleb Smirnoff <glebius@FreeBSD.org> AuthorDate: 2022-05-12 20:22:12 +0000 Commit: Gleb Smirnoff <glebius@FreeBSD.org> CommitDate: 2022-05-12 20:22:12 +0000 sockets: use socket buffer mutexes in struct socket directly Since c67f3b8b78e the sockbuf mutexes belong to the containing socket, and socket buffers just point to it. In 74a68313b50 macros that access this mutex directly were added. Go over the core socket code and eliminate code that reaches the mutex by dereferencing the sockbuf compatibility pointer. This change requires a KPI change, as some functions were given the sockbuf pointer only without any hint if it is a receive or send buffer. This change doesn't cover the whole kernel, many protocols still use compatibility pointers internally. However, it allows operation of a protocol that doesn't use them. Reviewed by: markj Differential revision: https://reviews.freebsd.org/D35152 --- sys/dev/cxgbe/tom/t4_cpl_io.c | 4 +- sys/dev/cxgbe/tom/t4_ddp.c | 2 +- sys/dev/hyperv/hvsock/hv_sock.c | 4 +- sys/kern/kern_sendfile.c | 2 +- sys/kern/sys_socket.c | 31 +++--- sys/kern/uipc_sockbuf.c | 142 ++++++++++++++++--------- sys/kern/uipc_socket.c | 48 ++++----- sys/kern/uipc_usrreq.c | 2 +- sys/netinet/sctp_output.c | 4 +- sys/netinet/sctputil.c | 4 +- sys/netinet/tcp_input.c | 8 +- sys/netinet/tcp_output.c | 4 +- sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c | 4 +- sys/rpc/clnt_bck.c | 2 +- sys/rpc/clnt_vc.c | 2 +- sys/sys/sockbuf.h | 16 +-- sys/sys/socketvar.h | 61 +++++------ 17 files changed, 191 insertions(+), 149 deletions(-) diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c index 130c2468b20b..59d1c367f94c 100644 --- a/sys/dev/cxgbe/tom/t4_cpl_io.c +++ b/sys/dev/cxgbe/tom/t4_cpl_io.c @@ -803,7 +803,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc, V_tcp_autosndbuf_max); - if (!sbreserve_locked(sb, newsize, so, NULL)) + if (!sbreserve_locked(so, SO_SND, newsize, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; else sowwakeup = 1; /* room available */ @@ -1770,7 +1770,7 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc, V_tcp_autorcvbuf_max); - if (!sbreserve_locked(sb, newsize, so, NULL)) + if (!sbreserve_locked(so, SO_RCV, newsize, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; } diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c index 05bb903a28aa..11fea91b060e 100644 --- a/sys/dev/cxgbe/tom/t4_ddp.c +++ b/sys/dev/cxgbe/tom/t4_ddp.c @@ -555,7 +555,7 @@ handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len) unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc, V_tcp_autorcvbuf_max); - if (!sbreserve_locked(sb, newsize, so, NULL)) + if (!sbreserve_locked(so, SO_RCV, newsize, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; } SOCKBUF_UNLOCK(sb); diff --git a/sys/dev/hyperv/hvsock/hv_sock.c b/sys/dev/hyperv/hvsock/hv_sock.c index f0238c233181..8c327a22e6fd 100644 --- a/sys/dev/hyperv/hvsock/hv_sock.c +++ b/sys/dev/hyperv/hvsock/hv_sock.c @@ -763,7 +763,7 @@ hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr, * Wait and block until (more) data comes in. * Note: Drops the sockbuf lock during wait. */ - error = sbwait(sb); + error = sbwait(so, SO_RCV); if (error) break; @@ -859,7 +859,7 @@ hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, * Sleep wait until space avaiable to send * Note: Drops the sockbuf lock during wait. */ - error = sbwait(sb); + error = sbwait(so, SO_SND); if (error) break; diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c index 30383490ca41..2de015254ab9 100644 --- a/sys/kern/kern_sendfile.c +++ b/sys/kern/kern_sendfile.c @@ -814,7 +814,7 @@ retry_space: * state may have changed and we retest * for it. */ - error = sbwait(&so->so_snd); + error = sbwait(so, SO_SND); /* * An error from sbwait usually indicates that we've * been interrupted by a signal. If we've sent anything diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c index 774b317c6ecb..b1b47d3d3c26 100644 --- a/sys/kern/sys_socket.c +++ b/sys/kern/sys_socket.c @@ -179,12 +179,12 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, so->sol_sbrcv_flags |= SB_ASYNC; so->sol_sbsnd_flags |= SB_ASYNC; } else { - SOCKBUF_LOCK(&so->so_rcv); + SOCK_RECVBUF_LOCK(so); so->so_rcv.sb_flags |= SB_ASYNC; - SOCKBUF_UNLOCK(&so->so_rcv); - SOCKBUF_LOCK(&so->so_snd); + SOCK_RECVBUF_UNLOCK(so); + SOCK_SENDBUF_LOCK(so); so->so_snd.sb_flags |= SB_ASYNC; - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); } SOCK_UNLOCK(so); } else { @@ -194,12 +194,12 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, so->sol_sbrcv_flags &= ~SB_ASYNC; so->sol_sbsnd_flags &= ~SB_ASYNC; } else { - SOCKBUF_LOCK(&so->so_rcv); + SOCK_RECVBUF_LOCK(so); so->so_rcv.sb_flags &= ~SB_ASYNC; - SOCKBUF_UNLOCK(&so->so_rcv); - SOCKBUF_LOCK(&so->so_snd); + SOCK_RECVBUF_UNLOCK(so); + SOCK_SENDBUF_LOCK(so); so->so_snd.sb_flags &= ~SB_ASYNC; - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); } SOCK_UNLOCK(so); } @@ -751,10 +751,12 @@ soaio_snd(void *context, int pending) } void -sowakeup_aio(struct socket *so, struct sockbuf *sb) +sowakeup_aio(struct socket *so, sb_which which) { + struct sockbuf *sb = sobuf(so, which); + + SOCK_BUF_LOCK_ASSERT(so, which); - SOCKBUF_LOCK_ASSERT(sb); sb->sb_flags &= ~SB_AIO; if (sb->sb_flags & SB_AIO_RUNNING) return; @@ -799,6 +801,7 @@ soo_aio_queue(struct file *fp, struct kaiocb *job) { struct socket *so; struct sockbuf *sb; + sb_which which; int error; so = fp->f_data; @@ -809,12 +812,14 @@ soo_aio_queue(struct file *fp, struct kaiocb *job) /* Lock through the socket, since this may be a listening socket. */ switch (job->uaiocb.aio_lio_opcode & (LIO_WRITE | LIO_READ)) { case LIO_READ: - sb = &so->so_rcv; SOCK_RECVBUF_LOCK(so); + sb = &so->so_rcv; + which = SO_RCV; break; case LIO_WRITE: - sb = &so->so_snd; SOCK_SENDBUF_LOCK(so); + sb = &so->so_snd; + which = SO_SND; break; default: return (EINVAL); @@ -833,7 +838,7 @@ soo_aio_queue(struct file *fp, struct kaiocb *job) TAILQ_INSERT_TAIL(&sb->sb_aiojobq, job, list); if (!(sb->sb_flags & SB_AIO_RUNNING)) { if (soaio_ready(so, sb)) - sowakeup_aio(so, sb); + sowakeup_aio(so, which); else sb->sb_flags |= SB_AIO; } diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c index 5ac6c79a928f..421fa5da37d9 100644 --- a/sys/kern/uipc_sockbuf.c +++ b/sys/kern/uipc_sockbuf.c @@ -396,27 +396,27 @@ void socantsendmore_locked(struct socket *so) { - SOCKBUF_LOCK_ASSERT(&so->so_snd); + SOCK_SENDBUF_LOCK_ASSERT(so); so->so_snd.sb_state |= SBS_CANTSENDMORE; sowwakeup_locked(so); - mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); + SOCK_SENDBUF_UNLOCK_ASSERT(so); } void socantsendmore(struct socket *so) { - SOCKBUF_LOCK(&so->so_snd); + SOCK_SENDBUF_LOCK(so); socantsendmore_locked(so); - mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); + SOCK_SENDBUF_UNLOCK_ASSERT(so); } void socantrcvmore_locked(struct socket *so) { - SOCKBUF_LOCK_ASSERT(&so->so_rcv); + SOCK_RECVBUF_LOCK_ASSERT(so); so->so_rcv.sb_state |= SBS_CANTRCVMORE; #ifdef KERN_TLS @@ -424,53 +424,55 @@ socantrcvmore_locked(struct socket *so) ktls_check_rx(&so->so_rcv); #endif sorwakeup_locked(so); - mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); + SOCK_RECVBUF_UNLOCK_ASSERT(so); } void socantrcvmore(struct socket *so) { - SOCKBUF_LOCK(&so->so_rcv); + SOCK_RECVBUF_LOCK(so); socantrcvmore_locked(so); - mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); + SOCK_RECVBUF_UNLOCK_ASSERT(so); } void soroverflow_locked(struct socket *so) { - SOCKBUF_LOCK_ASSERT(&so->so_rcv); + SOCK_RECVBUF_LOCK_ASSERT(so); if (so->so_options & SO_RERROR) { so->so_rerror = ENOBUFS; sorwakeup_locked(so); } else - SOCKBUF_UNLOCK(&so->so_rcv); + SOCK_RECVBUF_UNLOCK(so); - mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); + SOCK_RECVBUF_UNLOCK_ASSERT(so); } void soroverflow(struct socket *so) { - SOCKBUF_LOCK(&so->so_rcv); + SOCK_RECVBUF_LOCK(so); soroverflow_locked(so); - mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); + SOCK_RECVBUF_UNLOCK_ASSERT(so); } /* * Wait for data to arrive at/drain from a socket buffer. */ int -sbwait(struct sockbuf *sb) +sbwait(struct socket *so, sb_which which) { + struct sockbuf *sb; - SOCKBUF_LOCK_ASSERT(sb); + SOCK_BUF_LOCK_ASSERT(so, which); + sb = sobuf(so, which); sb->sb_flags |= SB_WAIT; - return (msleep_sbt(&sb->sb_acc, SOCKBUF_MTX(sb), + return (msleep_sbt(&sb->sb_acc, soeventmtx(so, which), (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", sb->sb_timeo, 0, 0)); } @@ -487,13 +489,15 @@ sbwait(struct sockbuf *sb) * then release it to avoid lock order issues. It's not clear that's * correct. */ -void -sowakeup(struct socket *so, struct sockbuf *sb) +static __always_inline void +sowakeup(struct socket *so, const sb_which which) { + struct sockbuf *sb; int ret; - SOCKBUF_LOCK_ASSERT(sb); + SOCK_BUF_LOCK_ASSERT(so, which); + sb = sobuf(so, which); selwakeuppri(sb->sb_sel, PSOCK); if (!SEL_WAITING(sb->sb_sel)) sb->sb_flags &= ~SB_SEL; @@ -512,13 +516,43 @@ sowakeup(struct socket *so, struct sockbuf *sb) } else ret = SU_OK; if (sb->sb_flags & SB_AIO) - sowakeup_aio(so, sb); - SOCKBUF_UNLOCK(sb); + sowakeup_aio(so, which); + SOCK_BUF_UNLOCK(so, which); if (ret == SU_ISCONNECTED) soisconnected(so); if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) pgsigio(&so->so_sigio, SIGIO, 0); - mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); + SOCK_BUF_UNLOCK_ASSERT(so, which); +} + +/* + * Do we need to notify the other side when I/O is possible? + */ +static __always_inline bool +sb_notify(const struct sockbuf *sb) +{ + return ((sb->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | + SB_UPCALL | SB_AIO | SB_KNOTE)) != 0); +} + +void +sorwakeup_locked(struct socket *so) +{ + SOCK_RECVBUF_LOCK_ASSERT(so); + if (sb_notify(&so->so_rcv)) + sowakeup(so, SO_RCV); + else + SOCK_RECVBUF_UNLOCK(so); +} + +void +sowwakeup_locked(struct socket *so) +{ + SOCK_SENDBUF_LOCK_ASSERT(so); + if (sb_notify(&so->so_snd)) + sowakeup(so, SO_SND); + else + SOCK_SENDBUF_UNLOCK(so); } /* @@ -557,11 +591,11 @@ soreserve(struct socket *so, u_long sndcc, u_long rcvcc) { struct thread *td = curthread; - SOCKBUF_LOCK(&so->so_snd); - SOCKBUF_LOCK(&so->so_rcv); - if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0) + SOCK_SENDBUF_LOCK(so); + SOCK_RECVBUF_LOCK(so); + if (sbreserve_locked(so, SO_SND, sndcc, td) == 0) goto bad; - if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0) + if (sbreserve_locked(so, SO_RCV, rcvcc, td) == 0) goto bad2; if (so->so_rcv.sb_lowat == 0) so->so_rcv.sb_lowat = 1; @@ -569,14 +603,14 @@ soreserve(struct socket *so, u_long sndcc, u_long rcvcc) so->so_snd.sb_lowat = MCLBYTES; if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) so->so_snd.sb_lowat = so->so_snd.sb_hiwat; - SOCKBUF_UNLOCK(&so->so_rcv); - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_RECVBUF_UNLOCK(so); + SOCK_SENDBUF_UNLOCK(so); return (0); bad2: - sbrelease_locked(&so->so_snd, so); + sbrelease_locked(so, SO_SND); bad: - SOCKBUF_UNLOCK(&so->so_rcv); - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_RECVBUF_UNLOCK(so); + SOCK_SENDBUF_UNLOCK(so); return (ENOBUFS); } @@ -600,13 +634,14 @@ sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS) * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't * become limiting if buffering efficiency is near the normal case. */ -int -sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, +bool +sbreserve_locked(struct socket *so, sb_which which, u_long cc, struct thread *td) { + struct sockbuf *sb = sobuf(so, which); rlim_t sbsize_limit; - SOCKBUF_LOCK_ASSERT(sb); + SOCK_BUF_LOCK_ASSERT(so, which); /* * When a thread is passed, we take into account the thread's socket @@ -616,24 +651,25 @@ sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, * we don't apply a process limit. */ if (cc > sb_max_adj) - return (0); + return (false); if (td != NULL) { sbsize_limit = lim_cur(td, RLIMIT_SBSIZE); } else sbsize_limit = RLIM_INFINITY; if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, sbsize_limit)) - return (0); + return (false); sb->sb_mbmax = min(cc * sb_efficiency, sb_max); if (sb->sb_lowat > sb->sb_hiwat) sb->sb_lowat = sb->sb_hiwat; - return (1); + return (true); } int sbsetopt(struct socket *so, int cmd, u_long cc) { struct sockbuf *sb; + sb_which wh; short *flags; u_int *hiwat, *lowat; int error; @@ -660,16 +696,18 @@ sbsetopt(struct socket *so, int cmd, u_long cc) case SO_SNDLOWAT: case SO_SNDBUF: sb = &so->so_snd; + wh = SO_SND; break; case SO_RCVLOWAT: case SO_RCVBUF: sb = &so->so_rcv; + wh = SO_RCV; break; } flags = &sb->sb_flags; hiwat = &sb->sb_hiwat; lowat = &sb->sb_lowat; - SOCKBUF_LOCK(sb); + SOCK_BUF_LOCK(so, wh); } error = 0; @@ -685,7 +723,7 @@ sbsetopt(struct socket *so, int cmd, u_long cc) if (*lowat > *hiwat) *lowat = *hiwat; } else { - if (!sbreserve_locked(sb, cc, so, curthread)) + if (!sbreserve_locked(so, wh, cc, curthread)) error = ENOBUFS; } if (error == 0) @@ -702,7 +740,7 @@ sbsetopt(struct socket *so, int cmd, u_long cc) } if (!SOLISTENING(so)) - SOCKBUF_UNLOCK(sb); + SOCK_BUF_UNLOCK(so, wh); SOCK_UNLOCK(so); return (error); } @@ -711,8 +749,9 @@ sbsetopt(struct socket *so, int cmd, u_long cc) * Free mbufs held by a socket, and reserved mbuf space. */ static void -sbrelease_internal(struct sockbuf *sb, struct socket *so) +sbrelease_internal(struct socket *so, sb_which which) { + struct sockbuf *sb = sobuf(so, which); sbflush_internal(sb); (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, @@ -721,33 +760,34 @@ sbrelease_internal(struct sockbuf *sb, struct socket *so) } void -sbrelease_locked(struct sockbuf *sb, struct socket *so) +sbrelease_locked(struct socket *so, sb_which which) { - SOCKBUF_LOCK_ASSERT(sb); + SOCK_BUF_LOCK_ASSERT(so, which); - sbrelease_internal(sb, so); + sbrelease_internal(so, which); } void -sbrelease(struct sockbuf *sb, struct socket *so) +sbrelease(struct socket *so, sb_which which) { - SOCKBUF_LOCK(sb); - sbrelease_locked(sb, so); - SOCKBUF_UNLOCK(sb); + SOCK_BUF_LOCK(so, which); + sbrelease_locked(so, which); + SOCK_BUF_UNLOCK(so, which); } void -sbdestroy(struct sockbuf *sb, struct socket *so) +sbdestroy(struct socket *so, sb_which which) { - - sbrelease_internal(sb, so); #ifdef KERN_TLS + struct sockbuf *sb = sobuf(so, which); + if (sb->sb_tls_info != NULL) ktls_free(sb->sb_tls_info); sb->sb_tls_info = NULL; #endif + sbrelease_internal(so, which); } /* diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 628730171715..49a2b5773cc6 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -420,8 +420,8 @@ soalloc(struct vnet *vnet) mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK); so->so_snd.sb_mtx = &so->so_snd_mtx; so->so_rcv.sb_mtx = &so->so_rcv_mtx; - SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd"); - SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv"); + mtx_init(&so->so_snd_mtx, "so_snd", NULL, MTX_DEF); + mtx_init(&so->so_rcv_mtx, "so_rcv", NULL, MTX_DEF); so->so_rcv.sb_sel = &so->so_rdsel; so->so_snd.sb_sel = &so->so_wrsel; sx_init(&so->so_snd_sx, "so_snd_sx"); @@ -491,8 +491,8 @@ sodealloc(struct socket *so) &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); sx_destroy(&so->so_snd_sx); sx_destroy(&so->so_rcv_sx); - SOCKBUF_LOCK_DESTROY(&so->so_snd); - SOCKBUF_LOCK_DESTROY(&so->so_rcv); + mtx_destroy(&so->so_snd_mtx); + mtx_destroy(&so->so_rcv_mtx); } crfree(so->so_cred); mtx_destroy(&so->so_lock); @@ -990,8 +990,8 @@ solisten_proto(struct socket *so, int backlog) sbrcv_timeo = so->so_rcv.sb_timeo; sbsnd_timeo = so->so_snd.sb_timeo; - sbdestroy(&so->so_snd, so); - sbdestroy(&so->so_rcv, so); + sbdestroy(so, SO_SND); + sbdestroy(so, SO_RCV); #ifdef INVARIANTS bzero(&so->so_rcv, @@ -1208,8 +1208,8 @@ sofree(struct socket *so) * to be acquired or held. */ if (!SOLISTENING(so)) { - sbdestroy(&so->so_snd, so); - sbdestroy(&so->so_rcv, so); + sbdestroy(so, SO_SND); + sbdestroy(so, SO_RCV); } seldrain(&so->so_rdsel); seldrain(&so->so_wrsel); @@ -1735,7 +1735,7 @@ restart: error = EWOULDBLOCK; goto release; } - error = sbwait(&so->so_snd); + error = sbwait(so, SO_SND); SOCKBUF_UNLOCK(&so->so_snd); if (error) goto release; @@ -2067,7 +2067,7 @@ restart: } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); - error = sbwait(&so->so_rcv); + error = sbwait(so, SO_RCV); SOCKBUF_UNLOCK(&so->so_rcv); if (error) goto release; @@ -2389,7 +2389,7 @@ dontblock: * the protocol. Skip blocking in this case. */ if (so->so_rcv.sb_mb == NULL) { - error = sbwait(&so->so_rcv); + error = sbwait(so, SO_RCV); if (error) { SOCKBUF_UNLOCK(&so->so_rcv); goto release; @@ -2570,7 +2570,7 @@ restart: * Wait and block until (more) data comes in. * NB: Drops the sockbuf lock during wait. */ - error = sbwait(sb); + error = sbwait(so, SO_RCV); if (error) goto out; goto restart; @@ -2742,7 +2742,7 @@ soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); - error = sbwait(&so->so_rcv); + error = sbwait(so, SO_RCV); if (error) { SOCKBUF_UNLOCK(&so->so_rcv); return (error); @@ -2960,7 +2960,7 @@ sorflush(struct socket *so) MPASS(pr->pr_domain->dom_dispose != NULL); (*pr->pr_domain->dom_dispose)(so); } else { - sbrelease(&so->so_rcv, so); + sbrelease(so, SO_RCV); SOCK_IO_RECV_UNLOCK(so); } @@ -3610,8 +3610,8 @@ sopoll_generic(struct socket *so, int events, struct ucred *active_cred, } } else { revents = 0; - SOCKBUF_LOCK(&so->so_snd); - SOCKBUF_LOCK(&so->so_rcv); + SOCK_SENDBUF_LOCK(so); + SOCK_RECVBUF_LOCK(so); if (events & (POLLIN | POLLRDNORM)) if (soreadabledata(so)) revents |= events & (POLLIN | POLLRDNORM); @@ -3642,8 +3642,8 @@ sopoll_generic(struct socket *so, int events, struct ucred *active_cred, so->so_snd.sb_flags |= SB_SEL; } } - SOCKBUF_UNLOCK(&so->so_rcv); - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_RECVBUF_UNLOCK(so); + SOCK_SENDBUF_UNLOCK(so); } SOCK_UNLOCK(so); return (revents); @@ -4297,12 +4297,12 @@ so_rdknl_assert_lock(void *arg, int what) if (SOLISTENING(so)) SOCK_LOCK_ASSERT(so); else - SOCKBUF_LOCK_ASSERT(&so->so_rcv); + SOCK_RECVBUF_LOCK_ASSERT(so); } else { if (SOLISTENING(so)) SOCK_UNLOCK_ASSERT(so); else - SOCKBUF_UNLOCK_ASSERT(&so->so_rcv); + SOCK_RECVBUF_UNLOCK_ASSERT(so); } } @@ -4314,7 +4314,7 @@ so_wrknl_lock(void *arg) if (SOLISTENING(so)) SOCK_LOCK(so); else - SOCKBUF_LOCK(&so->so_snd); + SOCK_SENDBUF_LOCK(so); } static void @@ -4325,7 +4325,7 @@ so_wrknl_unlock(void *arg) if (SOLISTENING(so)) SOCK_UNLOCK(so); else - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); } static void @@ -4337,12 +4337,12 @@ so_wrknl_assert_lock(void *arg, int what) if (SOLISTENING(so)) SOCK_LOCK_ASSERT(so); else - SOCKBUF_LOCK_ASSERT(&so->so_snd); + SOCK_SENDBUF_LOCK_ASSERT(so); } else { if (SOLISTENING(so)) SOCK_UNLOCK_ASSERT(so); else - SOCKBUF_UNLOCK_ASSERT(&so->so_snd); + SOCK_SENDBUF_UNLOCK_ASSERT(so); } } diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index efa586d346c5..b326dbd825a6 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -2771,7 +2771,7 @@ unp_dispose(struct socket *so) KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0, ("%s: ccc %u mb %p mbcnt %u", __func__, sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt)); - sbrelease_locked(sb, so); + sbrelease_locked(so, SO_RCV); SOCK_RECVBUF_UNLOCK(so); if (SOCK_IO_RECV_OWNED(so)) SOCK_IO_RECV_UNLOCK(so); diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c index 9a8927160441..9e2e70313be3 100644 --- a/sys/netinet/sctp_output.c +++ b/sys/netinet/sctp_output.c @@ -12992,7 +12992,7 @@ sctp_lower_sosend(struct socket *so, stcb->block_entry = &be; SCTP_TCB_UNLOCK(stcb); hold_tcblock = false; - error = sbwait(&so->so_snd); + error = sbwait(so, SO_SND); if (error == 0) { if (so->so_error != 0) { error = so->so_error; @@ -13352,7 +13352,7 @@ skip_preblock: stcb->block_entry = &be; SCTP_TCB_UNLOCK(stcb); hold_tcblock = false; - error = sbwait(&so->so_snd); + error = sbwait(so, SO_SND); if (error == 0) { if (so->so_error != 0) error = so->so_error; diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c index 8451ed5e2007..e20a49be1adb 100644 --- a/sys/netinet/sctputil.c +++ b/sys/netinet/sctputil.c @@ -5640,7 +5640,7 @@ restart_nosblocks: } } if (block_allowed) { - error = sbwait(&so->so_rcv); + error = sbwait(so, SO_RCV); if (error) { goto out; } @@ -6255,7 +6255,7 @@ wait_some_more: goto release; } if (so->so_rcv.sb_cc <= control->held_length) { - error = sbwait(&so->so_rcv); + error = sbwait(so, SO_RCV); if (error) { goto release; } diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 695cd5a916db..be86ceca445c 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1921,8 +1921,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, * Give up when limit is reached. */ if (newsize) - if (!sbreserve_locked(&so->so_rcv, - newsize, so, NULL)) + if (!sbreserve_locked(so, SO_RCV, + newsize, NULL)) so->so_rcv.sb_flags &= ~SB_AUTOSIZE; m_adj(m, drop_hdrlen); /* delayed header drop */ sbappendstream_locked(&so->so_rcv, m, 0); @@ -3848,7 +3848,7 @@ tcp_mss(struct tcpcb *tp, int offer) if (bufsize > sb_max) bufsize = sb_max; if (bufsize > so->so_snd.sb_hiwat) - (void)sbreserve_locked(&so->so_snd, bufsize, so, NULL); + (void)sbreserve_locked(so, SO_SND, bufsize, NULL); } SOCKBUF_UNLOCK(&so->so_snd); /* @@ -3871,7 +3871,7 @@ tcp_mss(struct tcpcb *tp, int offer) if (bufsize > sb_max) bufsize = sb_max; if (bufsize > so->so_rcv.sb_hiwat) - (void)sbreserve_locked(&so->so_rcv, bufsize, so, NULL); + (void)sbreserve_locked(so, SO_RCV, bufsize, NULL); } SOCKBUF_UNLOCK(&so->so_rcv); diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 299f1d034717..b0dedb1a24d1 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -2166,9 +2166,9 @@ tcp_sndbuf_autoscale(struct tcpcb *tp, struct socket *so, uint32_t sendwin) sbused(&so->so_snd) < V_tcp_autosndbuf_max && sendwin >= (sbused(&so->so_snd) - (tp->snd_nxt - tp->snd_una))) { - if (!sbreserve_locked(&so->so_snd, + if (!sbreserve_locked(so, SO_SND, min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc, - V_tcp_autosndbuf_max), so, curthread)) + V_tcp_autosndbuf_max), curthread)) so->so_snd.sb_flags &= ~SB_AUTOSIZE; } } diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c index c822276185cb..def5edb98983 100644 --- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c +++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c @@ -1142,7 +1142,7 @@ restart: error = EWOULDBLOCK; goto release; } - error = sbwait(&so->so_snd); + error = sbwait(so, SO_SND); SOCKBUF_UNLOCK(&so->so_snd); if (error) goto release; @@ -1336,7 +1336,7 @@ restart: * Wait and block until (more) data comes in. * NB: Drops the sockbuf lock during wait. */ - error = sbwait(sb); + error = sbwait(so, SO_RCV); if (error) goto out; goto restart; diff --git a/sys/rpc/clnt_bck.c b/sys/rpc/clnt_bck.c index 514905bf1cc2..810a957bb97b 100644 --- a/sys/rpc/clnt_bck.c +++ b/sys/rpc/clnt_bck.c @@ -326,7 +326,7 @@ if (error != 0) printf("sosend=%d\n", error); if (error == EMSGSIZE) { printf("emsgsize\n"); SOCKBUF_LOCK(&xprt->xp_socket->so_snd); - sbwait(&xprt->xp_socket->so_snd); + sbwait(xprt->xp_socket, SO_SND); SOCKBUF_UNLOCK(&xprt->xp_socket->so_snd); sx_xunlock(&xprt->xp_lock); AUTH_VALIDATE(auth, xid, NULL, NULL); diff --git a/sys/rpc/clnt_vc.c b/sys/rpc/clnt_vc.c index dfada2bea388..f565de06f4bd 100644 --- a/sys/rpc/clnt_vc.c +++ b/sys/rpc/clnt_vc.c @@ -447,7 +447,7 @@ call_again: if (error == EMSGSIZE || (error == ERESTART && (ct->ct_waitflag & PCATCH) == 0 && trycnt-- > 0)) { SOCKBUF_LOCK(&ct->ct_socket->so_snd); - sbwait(&ct->ct_socket->so_snd); + sbwait(ct->ct_socket, SO_SND); SOCKBUF_UNLOCK(&ct->ct_socket->so_snd); AUTH_VALIDATE(auth, xid, NULL, NULL); mtx_lock(&ct->ct_lock); diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h index 372f04eba54c..2484407d557c 100644 --- a/sys/sys/sockbuf.h +++ b/sys/sys/sockbuf.h @@ -116,6 +116,9 @@ struct sockbuf { #endif /* defined(_KERNEL) || defined(_WANT_SOCKET) */ #ifdef _KERNEL +/* 'which' values for KPIs that operate on one buffer of a socket. */ +typedef enum { SO_RCV, SO_SND } sb_which; + /* * Per-socket buffer mutex used to protect most fields in the socket buffer. * These make use of the mutex pointer embedded in struct sockbuf, which @@ -124,9 +127,6 @@ struct sockbuf { * these locking macros. */ #define SOCKBUF_MTX(_sb) ((_sb)->sb_mtx) -#define SOCKBUF_LOCK_INIT(_sb, _name) \ - mtx_init(SOCKBUF_MTX(_sb), _name, NULL, MTX_DEF) -#define SOCKBUF_LOCK_DESTROY(_sb) mtx_destroy(SOCKBUF_MTX(_sb)) #define SOCKBUF_LOCK(_sb) mtx_lock(SOCKBUF_MTX(_sb)) #define SOCKBUF_OWNED(_sb) mtx_owned(SOCKBUF_MTX(_sb)) #define SOCKBUF_UNLOCK(_sb) mtx_unlock(SOCKBUF_MTX(_sb)) @@ -162,7 +162,7 @@ struct mbuf * struct mbuf * sbcreatecontrol_how(void *p, int size, int type, int level, int wait); -void sbdestroy(struct sockbuf *sb, struct socket *so); +void sbdestroy(struct socket *, sb_which); void sbdrop(struct sockbuf *sb, int len); void sbdrop_locked(struct sockbuf *sb, int len); struct mbuf * @@ -171,17 +171,17 @@ void sbdroprecord(struct sockbuf *sb); void sbdroprecord_locked(struct sockbuf *sb); void sbflush(struct sockbuf *sb); void sbflush_locked(struct sockbuf *sb); -void sbrelease(struct sockbuf *sb, struct socket *so); -void sbrelease_locked(struct sockbuf *sb, struct socket *so); +void sbrelease(struct socket *, sb_which); +void sbrelease_locked(struct socket *, sb_which); int sbsetopt(struct socket *so, int cmd, u_long cc); -int sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, +bool sbreserve_locked(struct socket *so, sb_which which, u_long cc, struct thread *td); void sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len); struct mbuf * sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff); struct mbuf * sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff); -int sbwait(struct sockbuf *sb); +int sbwait(struct socket *, sb_which); void sballoc(struct sockbuf *, struct mbuf *); void sbfree(struct sockbuf *, struct mbuf *); void sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m); diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index fe6faa842bda..05eefd7e4fd4 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -77,8 +77,8 @@ enum socket_qstate { * Locking key to struct socket: * (a) constant after allocation, no locking required. * (b) locked by SOCK_LOCK(so). - * (cr) locked by SOCK_RECVBUF_LOCK(so)/SOCKBUF_LOCK(&so->so_rcv). - * (cs) locked by SOCK_SENDBUF_LOCK(so)/SOCKBUF_LOCK(&so->so_snd). + * (cr) locked by SOCK_RECVBUF_LOCK(so) + * (cs) locked by SOCK_SENDBUF_LOCK(so) * (e) locked by SOLISTEN_LOCK() of corresponding listening socket. * (f) not locked since integer reads/writes are atomic. * (g) used only as a sleep/wakeup address, no value. @@ -256,8 +256,8 @@ struct socket { } while (0) /* - * Socket buffer locks. These manipulate the same mutexes as SOCKBUF_LOCK() - * and related macros. + * Socket buffer locks. These are strongly preferred over SOCKBUF_LOCK(sb) + * macros, as we are moving towards protocol specific socket buffers. */ #define SOCK_RECVBUF_MTX(so) \ (&(so)->so_rcv_mtx) @@ -281,8 +281,26 @@ struct socket { #define SOCK_SENDBUF_UNLOCK_ASSERT(so) \ mtx_assert(SOCK_SENDBUF_MTX(so), MA_NOTOWNED) -/* 'which' values for socket buffer events and upcalls. */ -typedef enum { SO_RCV, SO_SND } sb_which; +#define SOCK_BUF_LOCK(so, which) \ + mtx_lock(soeventmtx(so, which)) +#define SOCK_BUF_UNLOCK(so, which) \ + mtx_unlock(soeventmtx(so, which)) +#define SOCK_BUF_LOCK_ASSERT(so, which) \ + mtx_assert(soeventmtx(so, which), MA_OWNED) +#define SOCK_BUF_UNLOCK_ASSERT(so, which) \ + mtx_assert(soeventmtx(so, which), MA_NOTOWNED) + +static inline struct sockbuf * +sobuf(struct socket *so, const sb_which which) +{ + return (which == SO_RCV ? &so->so_rcv : &so->so_snd); +} + +static inline struct mtx * +soeventmtx(struct socket *so, const sb_which which) +{ + return (which == SO_RCV ? SOCK_RECVBUF_MTX(so) : SOCK_SENDBUF_MTX(so)); +} /* * Macros for sockets and socket buffering. *** 57 LINES SKIPPED ***