svn commit: r365764 - in head/sys: kern sys
Mark Johnston
markj at FreeBSD.org
Tue Sep 15 19:23:23 UTC 2020
Author: markj
Date: Tue Sep 15 19:23:22 2020
New Revision: 365764
URL: https://svnweb.freebsd.org/changeset/base/365764
Log:
Simplify unix socket connection peer locking.
unp_pcb_owned_lock2() has some sharp edges and forces callers to deal
with a bunch of cases. Simplify it:
- Rename to unp_pcb_lock_peer().
- Return the connected peer instead of forcing callers to load it
beforehand.
- Handle self-connected sockets.
- In unp_connectat(), just lock the accept socket directly. It should
not be possible for the nascent socket to participate in any other
lock orders.
- Get rid of connect_internal(). It does not provide any useful
checking anymore.
- Block in unp_connectat() when a different thread is concurrently
attempting to lock both sides of a connection. This provides simpler
semantics for callers of unp_pcb_lock_peer().
- Make unp_connectat() return EISCONN if the socket is already
connected. This fixes a race[1] when multiple threads attempt to
connect() to different addresses using the same datagram socket.
Upper layers will disconnect a connected datagram socket before
calling the protocol connect's method, but there is no synchronization
between this and protocol-layer code.
Reported by: syzkaller [1]
Tested by: pho
Sponsored by: The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D26299
Modified:
head/sys/kern/uipc_usrreq.c
head/sys/sys/unpcb.h
Modified: head/sys/kern/uipc_usrreq.c
==============================================================================
--- head/sys/kern/uipc_usrreq.c Tue Sep 15 19:23:01 2020 (r365763)
+++ head/sys/kern/uipc_usrreq.c Tue Sep 15 19:23:22 2020 (r365764)
@@ -279,6 +279,7 @@ static struct mtx unp_defers_lock;
"unp", "unp", \
MTX_DUPOK|MTX_DEF)
#define UNP_PCB_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_mtx)
+#define UNP_PCB_LOCKPTR(unp) (&(unp)->unp_mtx)
#define UNP_PCB_LOCK(unp) mtx_lock(&(unp)->unp_mtx)
#define UNP_PCB_TRYLOCK(unp) mtx_trylock(&(unp)->unp_mtx)
#define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx)
@@ -368,35 +369,55 @@ unp_pcb_unlock_pair(struct unpcb *unp, struct unpcb *u
UNP_PCB_UNLOCK(unp2);
}
-static __noinline void
-unp_pcb_owned_lock2_slowpath(struct unpcb *unp, struct unpcb **unp2p,
- int *freed)
+/*
+ * Try to lock the connected peer of an already locked socket. In some cases
+ * this requires that we unlock the current socket. The pairbusy counter is
+ * used to block concurrent connection attempts while the lock is dropped. The
+ * caller must be careful to revalidate PCB state.
+ */
+static struct unpcb *
+unp_pcb_lock_peer(struct unpcb *unp)
{
struct unpcb *unp2;
- unp2 = *unp2p;
+ UNP_PCB_LOCK_ASSERT(unp);
+ unp2 = unp->unp_conn;
+ if (__predict_false(unp2 == NULL))
+ return (NULL);
+ if (__predict_false(unp == unp2))
+ return (unp);
+
+ UNP_PCB_UNLOCK_ASSERT(unp2);
+
+ if (__predict_true(UNP_PCB_TRYLOCK(unp2)))
+ return (unp2);
+ if ((uintptr_t)unp2 > (uintptr_t)unp) {
+ UNP_PCB_LOCK(unp2);
+ return (unp2);
+ }
+ unp->unp_pairbusy++;
unp_pcb_hold(unp2);
UNP_PCB_UNLOCK(unp);
+
UNP_PCB_LOCK(unp2);
UNP_PCB_LOCK(unp);
- *freed = unp_pcb_rele(unp2);
- if (*freed)
- *unp2p = NULL;
+ KASSERT(unp->unp_conn == unp2 || unp->unp_conn == NULL,
+ ("%s: socket %p was reconnected", __func__, unp));
+ if (--unp->unp_pairbusy == 0 && (unp->unp_flags & UNP_WAITING) != 0) {
+ unp->unp_flags &= ~UNP_WAITING;
+ wakeup(unp);
+ }
+ if (unp_pcb_rele(unp2)) {
+ /* unp2 is unlocked. */
+ return (NULL);
+ }
+ if (unp->unp_conn == NULL) {
+ UNP_PCB_UNLOCK(unp2);
+ return (NULL);
+ }
+ return (unp2);
}
-#define unp_pcb_owned_lock2(unp, unp2, freed) do { \
- freed = 0; \
- UNP_PCB_LOCK_ASSERT(unp); \
- UNP_PCB_UNLOCK_ASSERT(unp2); \
- MPASS((unp) != (unp2)); \
- if (__predict_true(UNP_PCB_TRYLOCK(unp2))) \
- break; \
- else if ((uintptr_t)(unp2) > (uintptr_t)(unp)) \
- UNP_PCB_LOCK(unp2); \
- else \
- unp_pcb_owned_lock2_slowpath((unp), &(unp2), &freed); \
-} while (0)
-
/*
* Definitions of protocols supported in the LOCAL domain.
*/
@@ -710,7 +731,7 @@ uipc_close(struct socket *so)
struct unpcb *unp, *unp2;
struct vnode *vp = NULL;
struct mtx *vplock;
- int freed;
+
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
@@ -728,15 +749,10 @@ uipc_close(struct socket *so)
VOP_UNP_DETACH(vp);
unp->unp_vnode = NULL;
}
- unp2 = unp->unp_conn;
- if (__predict_false(unp == unp2)) {
+ if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
unp_disconnect(unp, unp2);
- } else if (unp2 != NULL) {
- unp_pcb_owned_lock2(unp, unp2, freed);
- unp_disconnect(unp, unp2);
- } else {
+ else
UNP_PCB_UNLOCK(unp);
- }
if (vp) {
mtx_unlock(vplock);
vrele(vp);
@@ -765,14 +781,13 @@ uipc_detach(struct socket *so)
struct unpcb *unp, *unp2;
struct mtx *vplock;
struct vnode *vp;
- int freeunp, local_unp_rights;
+ int local_unp_rights;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
vp = NULL;
vplock = NULL;
- local_unp_rights = 0;
SOCK_LOCK(so);
if (!SOLISTENING(so)) {
@@ -811,21 +826,11 @@ uipc_detach(struct socket *so)
VOP_UNP_DETACH(vp);
unp->unp_vnode = NULL;
}
- if (__predict_false(unp == unp->unp_conn)) {
- unp_disconnect(unp, unp);
- unp2 = NULL;
- } else {
- if ((unp2 = unp->unp_conn) != NULL) {
- unp_pcb_owned_lock2(unp, unp2, freeunp);
- if (freeunp)
- unp2 = NULL;
- }
- unp_pcb_hold(unp);
- if (unp2 != NULL)
- unp_disconnect(unp, unp2);
- else
- UNP_PCB_UNLOCK(unp);
- }
+ if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
+ unp_disconnect(unp, unp2);
+ else
+ UNP_PCB_UNLOCK(unp);
+
UNP_REF_LIST_LOCK();
while (!LIST_EMPTY(&unp->unp_refs)) {
struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
@@ -838,11 +843,9 @@ uipc_detach(struct socket *so)
unp_drop(ref);
UNP_REF_LIST_LOCK();
}
-
UNP_REF_LIST_UNLOCK();
+
UNP_PCB_LOCK(unp);
- freeunp = unp_pcb_rele(unp);
- MPASS(freeunp == 0);
local_unp_rights = unp_rights;
unp->unp_socket->so_pcb = NULL;
unp->unp_socket = NULL;
@@ -862,24 +865,15 @@ static int
uipc_disconnect(struct socket *so)
{
struct unpcb *unp, *unp2;
- int freed;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL"));
UNP_PCB_LOCK(unp);
- if ((unp2 = unp->unp_conn) == NULL) {
+ if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
+ unp_disconnect(unp, unp2);
+ else
UNP_PCB_UNLOCK(unp);
- return (0);
- }
- if (__predict_true(unp != unp2)) {
- unp_pcb_owned_lock2(unp, unp2, freed);
- if (__predict_false(freed)) {
- UNP_PCB_UNLOCK(unp);
- return (0);
- }
- }
- unp_disconnect(unp, unp2);
return (0);
}
@@ -998,27 +992,6 @@ uipc_rcvd(struct socket *so, int flags)
}
static int
-connect_internal(struct socket *so, struct sockaddr *nam, struct thread *td)
-{
- int error;
- struct unpcb *unp;
-
- unp = so->so_pcb;
- if (unp->unp_conn != NULL)
- return (EISCONN);
- error = unp_connect(so, nam, td);
- if (error)
- return (error);
- UNP_PCB_LOCK(unp);
- if (unp->unp_conn == NULL) {
- UNP_PCB_UNLOCK(unp);
- if (error == 0)
- error = ENOTCONN;
- }
- return (error);
-}
-
-static int
uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
struct mbuf *control, struct thread *td)
{
@@ -1048,57 +1021,26 @@ uipc_send(struct socket *so, int flags, struct mbuf *m
const struct sockaddr *from;
if (nam != NULL) {
- /*
- * We return with UNP_PCB_LOCK_HELD so we know that
- * the reference is live if the pointer is valid.
- */
- if ((error = connect_internal(so, nam, td)))
+ error = unp_connect(so, nam, td);
+ if (error != 0)
break;
- MPASS(unp->unp_conn != NULL);
- unp2 = unp->unp_conn;
- } else {
- UNP_PCB_LOCK(unp);
-
- /*
- * Because connect() and send() are non-atomic in a sendto()
- * with a target address, it's possible that the socket will
- * have disconnected before the send() can run. In that case
- * return the slightly counter-intuitive but otherwise
- * correct error that the socket is not connected.
- */
- if ((unp2 = unp->unp_conn) == NULL) {
- UNP_PCB_UNLOCK(unp);
- error = ENOTCONN;
- break;
- }
}
- if (__predict_false(unp == unp2)) {
- if (unp->unp_socket == NULL) {
- error = ENOTCONN;
- break;
- }
- goto connect_self;
- }
- unp_pcb_owned_lock2(unp, unp2, freed);
- if (__predict_false(freed)) {
- UNP_PCB_UNLOCK(unp);
- error = ENOTCONN;
- break;
- }
+ UNP_PCB_LOCK(unp);
+
/*
- * The socket referencing unp2 may have been closed
- * or unp may have been disconnected if the unp lock
- * was dropped to acquire unp2.
+ * Because connect() and send() are non-atomic in a sendto()
+ * with a target address, it's possible that the socket will
+ * have disconnected before the send() can run. In that case
+ * return the slightly counter-intuitive but otherwise
+ * correct error that the socket is not connected.
*/
- if (__predict_false(unp->unp_conn == NULL) ||
- unp2->unp_socket == NULL) {
+ unp2 = unp_pcb_lock_peer(unp);
+ if (unp2 == NULL) {
UNP_PCB_UNLOCK(unp);
- if (unp_pcb_rele(unp2) == 0)
- UNP_PCB_UNLOCK(unp2);
error = ENOTCONN;
break;
}
- connect_self:
+
if (unp2->unp_flags & UNP_WANTCRED)
control = unp_addsockcred(td, control);
if (unp->unp_addr != NULL)
@@ -1127,36 +1069,26 @@ uipc_send(struct socket *so, int flags, struct mbuf *m
case SOCK_STREAM:
if ((so->so_state & SS_ISCONNECTED) == 0) {
if (nam != NULL) {
- error = connect_internal(so, nam, td);
+ error = unp_connect(so, nam, td);
if (error != 0)
break;
} else {
error = ENOTCONN;
break;
}
- } else {
- UNP_PCB_LOCK(unp);
}
- if ((unp2 = unp->unp_conn) == NULL) {
+ UNP_PCB_LOCK(unp);
+ if ((unp2 = unp_pcb_lock_peer(unp)) == NULL) {
UNP_PCB_UNLOCK(unp);
error = ENOTCONN;
break;
} else if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
- UNP_PCB_UNLOCK(unp);
+ unp_pcb_unlock_pair(unp, unp2);
error = EPIPE;
break;
- } else if ((unp2 = unp->unp_conn) == NULL) {
- UNP_PCB_UNLOCK(unp);
- error = ENOTCONN;
- break;
}
- unp_pcb_owned_lock2(unp, unp2, freed);
UNP_PCB_UNLOCK(unp);
- if (__predict_false(freed)) {
- error = ENOTCONN;
- break;
- }
if ((so2 = unp2->unp_socket) == NULL) {
UNP_PCB_UNLOCK(unp2);
error = ENOTCONN;
@@ -1291,30 +1223,19 @@ uipc_ready(struct socket *so, struct mbuf *m, int coun
("%s: unexpected socket type for %p", __func__, so));
UNP_PCB_LOCK(unp);
- if ((unp2 = unp->unp_conn) == NULL) {
+ if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
UNP_PCB_UNLOCK(unp);
- goto search;
+ so2 = unp2->unp_socket;
+ SOCKBUF_LOCK(&so2->so_rcv);
+ if ((error = sbready(&so2->so_rcv, m, count)) == 0)
+ sorwakeup_locked(so2);
+ else
+ SOCKBUF_UNLOCK(&so2->so_rcv);
+ UNP_PCB_UNLOCK(unp2);
+ return (error);
}
- if (unp != unp2) {
- if (UNP_PCB_TRYLOCK(unp2) == 0) {
- unp_pcb_hold(unp2);
- UNP_PCB_UNLOCK(unp);
- UNP_PCB_LOCK(unp2);
- if (unp_pcb_rele(unp2))
- goto search;
- } else
- UNP_PCB_UNLOCK(unp);
- }
- so2 = unp2->unp_socket;
- SOCKBUF_LOCK(&so2->so_rcv);
- if ((error = sbready(&so2->so_rcv, m, count)) == 0)
- sorwakeup_locked(so2);
- else
- SOCKBUF_UNLOCK(&so2->so_rcv);
- UNP_PCB_UNLOCK(unp2);
- return (error);
+ UNP_PCB_UNLOCK(unp);
-search:
/*
* The receiving socket has been disconnected, but may still be valid.
* In this case, the now-ready mbufs are still present in its socket
@@ -1566,7 +1487,7 @@ unp_connectat(int fd, struct socket *so, struct sockad
char buf[SOCK_MAXADDRLEN];
struct sockaddr *sa;
cap_rights_t rights;
- int error, len, freed;
+ int error, len;
bool connreq;
if (nam->sa_family != AF_UNIX)
@@ -1582,9 +1503,33 @@ unp_connectat(int fd, struct socket *so, struct sockad
unp = sotounpcb(so);
UNP_PCB_LOCK(unp);
- if (unp->unp_flags & UNP_CONNECTING) {
- UNP_PCB_UNLOCK(unp);
- return (EALREADY);
+ for (;;) {
+ /*
+ * Wait for connection state to stabilize. If a connection
+ * already exists, give up. For datagram sockets, which permit
+ * multiple consecutive connect(2) calls, upper layers are
+ * responsible for disconnecting in advance of a subsequent
+ * connect(2), but this is not synchronized with PCB connection
+ * state.
+ *
+ * Also make sure that no threads are currently attempting to
+ * lock the peer socket, to ensure that unp_conn cannot
+ * transition between two valid sockets while locks are dropped.
+ */
+ if (unp->unp_conn != NULL) {
+ UNP_PCB_UNLOCK(unp);
+ return (EISCONN);
+ }
+ if ((unp->unp_flags & UNP_CONNECTING) != 0) {
+ UNP_PCB_UNLOCK(unp);
+ return (EALREADY);
+ }
+ if (unp->unp_pairbusy > 0) {
+ unp->unp_flags |= UNP_WAITING;
+ mtx_sleep(unp, UNP_PCB_LOCKPTR(unp), 0, "unpeer", 0);
+ continue;
+ }
+ break;
}
unp->unp_flags |= UNP_CONNECTING;
UNP_PCB_UNLOCK(unp);
@@ -1657,12 +1602,12 @@ unp_connectat(int fd, struct socket *so, struct sockad
UNP_PCB_UNLOCK(unp2);
unp2 = unp3;
- unp_pcb_owned_lock2(unp2, unp, freed);
- if (__predict_false(freed)) {
- UNP_PCB_UNLOCK(unp2);
- error = ECONNREFUSED;
- goto bad2;
- }
+
+ /*
+ * It is safe to block on the PCB lock here since unp2 is
+ * nascent and cannot be connected to any other sockets.
+ */
+ UNP_PCB_LOCK(unp);
#ifdef MAC
mac_socketpeer_set_from_socket(so, so2);
mac_socketpeer_set_from_socket(so2, so);
@@ -1683,6 +1628,8 @@ bad:
}
free(sa, M_SONAME);
UNP_PCB_LOCK(unp);
+ KASSERT((unp->unp_flags & UNP_CONNECTING) != 0,
+ ("%s: unp %p has UNP_CONNECTING clear", __func__, unp));
unp->unp_flags &= ~UNP_CONNECTING;
UNP_PCB_UNLOCK(unp);
return (error);
@@ -1722,6 +1669,8 @@ unp_connect2(struct socket *so, struct socket *so2, in
UNP_PCB_LOCK_ASSERT(unp);
UNP_PCB_LOCK_ASSERT(unp2);
+ KASSERT(unp->unp_conn == NULL,
+ ("%s: socket %p is already connected", __func__, unp));
if (so2->so_type != so->so_type)
return (EPROTOTYPE);
@@ -1738,6 +1687,8 @@ unp_connect2(struct socket *so, struct socket *so2, in
case SOCK_STREAM:
case SOCK_SEQPACKET:
+ KASSERT(unp2->unp_conn == NULL,
+ ("%s: socket %p is already connected", __func__, unp2));
unp2->unp_conn = unp;
if (req == PRU_CONNECT &&
((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
@@ -1992,25 +1943,18 @@ unp_drop(struct unpcb *unp)
{
struct socket *so = unp->unp_socket;
struct unpcb *unp2;
- int freed;
/*
* Regardless of whether the socket's peer dropped the connection
* with this socket by aborting or disconnecting, POSIX requires
* that ECONNRESET is returned.
*/
- /* acquire a reference so that unp isn't freed from underneath us */
UNP_PCB_LOCK(unp);
if (so)
so->so_error = ECONNRESET;
- unp2 = unp->unp_conn;
- /* Last reference dropped in unp_disconnect(). */
- if (unp2 == unp) {
- unp_pcb_rele_notlast(unp);
- unp_disconnect(unp, unp2);
- } else if (unp2 != NULL) {
- unp_pcb_owned_lock2(unp, unp2, freed);
+ if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
+ /* Last reference dropped in unp_disconnect(). */
unp_pcb_rele_notlast(unp);
unp_disconnect(unp, unp2);
} else if (!unp_pcb_rele(unp)) {
Modified: head/sys/sys/unpcb.h
==============================================================================
--- head/sys/sys/unpcb.h Tue Sep 15 19:23:01 2020 (r365763)
+++ head/sys/sys/unpcb.h Tue Sep 15 19:23:22 2020 (r365764)
@@ -85,6 +85,7 @@ struct unpcb {
struct sockaddr_un *unp_addr; /* (p) bound address of socket */
struct socket *unp_socket; /* (c) pointer back to socket */
/* Cache line 2 */
+ u_int unp_pairbusy; /* (p) threads acquiring peer locks */
struct vnode *unp_vnode; /* (p) associated file if applicable */
struct xucred unp_peercred; /* (p) peer credentials if applicable */
LIST_ENTRY(unpcb) unp_reflink; /* (l) link in unp_refs list */
@@ -117,6 +118,7 @@ struct unpcb {
*/
#define UNP_CONNECTING 0x010 /* Currently connecting. */
#define UNP_BINDING 0x020 /* Currently binding. */
+#define UNP_WAITING 0x040 /* Peer state is changing. */
/*
* Flags in unp_gcflag.
More information about the svn-src-all
mailing list