git: 5bba2728079e - main - sockets: make pr_shutdown fully protocol specific method

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Tue, 16 Jan 2024 18:31:34 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=5bba2728079ed4da33f727dbc2b6ae1de02ba897

commit 5bba2728079ed4da33f727dbc2b6ae1de02ba897
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2024-01-16 18:26:10 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2024-01-16 18:30:37 +0000

    sockets: make pr_shutdown fully protocol specific method
    
    Disassemble a one-for-all soshutdown() into protocol specific methods.
    This creates a small amount of copy & paste, but makes code a lot more
    self documented, as protocol specific method would execute only the code
    that is relevant to that protocol and nothing else.  This also fixes a
    couple recent regressions and reduces risk of future regressions.  The
    extended KPI for the new pr_shutdown removes need for the extra pr_flush
    which was added for the sake of SCTP which could not perform its shutdown
    properly with the old one.  Particularly for SCTP this change streamlines
    a lot of code.
    
    Some notes on why certain parts of code were copied or were not to certain
    protocols:
    * The (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING) check is
      needed only for those protocols that may be connected or disconnected.
    * The above reduces into only SS_ISCONNECTED for those protocols that
      always connect instantly.
    * The ENOTCONN and continue processing hack is left only for datagram
      protocols.
    * The SOLISTENING(so) block is copied to those protocols that listen(2).
    * sorflush() on SHUT_RD is copied almost to every protocol, but that
      will be refactored later.
    * wakeup(&so->so_timeo) is copied to protocols that can make a non-instant
      connect(2), can SO_LINGER or can accept(2).
    
    There are three protocols (netgraph(4), Bluetooth, SDP) that did not have
    pr_shutdown, but old soshutdown() would still perform sorflush() on
    SHUT_RD for them and also wakeup(9).  Those protocols partially supported
    shutdown(2) returning EOPNOTSUP for SHUT_WR/SHUT_RDWR, now they fully lost
    shutdown(2) support.  I'm pretty sure netgraph(4) and Bluetooth are okay
    about that and SDP is almost abandoned anyway.
    
    Reviewed by:            tuexen
    Differential Revision:  https://reviews.freebsd.org/D43413
---
 sys/dev/hyperv/hvsock/hv_sock.c |  46 +++++-----
 sys/dev/hyperv/hvsock/hv_sock.h |   2 +-
 sys/kern/uipc_domain.c          |   2 +-
 sys/kern/uipc_socket.c          |  53 ++---------
 sys/kern/uipc_usrreq.c          |  65 ++++++++++++--
 sys/net/rtsock.c                |  16 +++-
 sys/netinet/raw_ip.c            |  25 ++++--
 sys/netinet/sctp_usrreq.c       | 195 +++++++++++++++++++---------------------
 sys/netinet/sctp_var.h          |   2 +-
 sys/netinet/tcp_usrreq.c        |  64 +++++++++----
 sys/netinet/udp_usrreq.c        |  42 +++++++--
 sys/netinet/udp_var.h           |   2 +-
 sys/netinet6/raw_ip6.c          |  25 ++++--
 sys/netinet6/sctp6_usrreq.c     |   1 -
 sys/sys/protosw.h               |   5 +-
 sys/sys/socket.h                |   8 --
 16 files changed, 313 insertions(+), 240 deletions(-)

diff --git a/sys/dev/hyperv/hvsock/hv_sock.c b/sys/dev/hyperv/hvsock/hv_sock.c
index df6f58f6fcb6..8072765f2d5b 100644
--- a/sys/dev/hyperv/hvsock/hv_sock.c
+++ b/sys/dev/hyperv/hvsock/hv_sock.c
@@ -978,43 +978,43 @@ hvs_trans_abort(struct socket *so)
 }
 
 int
-hvs_trans_shutdown(struct socket *so)
+hvs_trans_shutdown(struct socket *so, enum shutdown_how how)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
-	struct sockbuf *sb;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_shutdown called\n", __func__);
 
+	SOCK_LOCK(so);
+	if ((so->so_state &
+	    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
+		SOCK_UNLOCK(so);
+		return (ENOTCONN);
+	}
+	SOCK_UNLOCK(so);
+
 	if (pcb == NULL)
 		return (EINVAL);
 
-	/*
-	 * Only get called with the shutdown method is SHUT_WR or
-	 * SHUT_RDWR.
-	 * When the method is SHUT_RD or SHUT_RDWR, the caller
-	 * already set the SBS_CANTRCVMORE on receive side socket
-	 * buffer.
-	 */
-	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) {
-		/*
-		 * SHUT_WR only case.
-		 * Receive side is still open. Just close
-		 * the send side.
-		 */
-		socantsendmore(so);
-	} else {
-		/* SHUT_RDWR case */
+	switch (how) {
+	case SHUT_RD:
+		socantrcvmore(so);
+		break;
+	case SHUT_RDWR:
+		socantrcvmore(so);
 		if (so->so_state & SS_ISCONNECTED) {
 			/* Send a FIN to peer */
-			sb = &so->so_snd;
-			SOCKBUF_LOCK(sb);
-			(void) hvsock_send_data(pcb->chan, NULL, 0, sb);
-			SOCKBUF_UNLOCK(sb);
-
+			SOCK_SENDBUF_LOCK(so);
+			(void) hvsock_send_data(pcb->chan, NULL, 0,
+			    &so->so_snd);
+			SOCK_SENDBUF_UNLOCK(so);
 			soisdisconnecting(so);
 		}
+		/* FALLTHROUGH */
+	case SHUT_WR:
+		socantsendmore(so);
 	}
+	wakeup(&so->so_timeo);
 
 	return (0);
 }
diff --git a/sys/dev/hyperv/hvsock/hv_sock.h b/sys/dev/hyperv/hvsock/hv_sock.h
index e11621d76dbc..32a6e71640a4 100644
--- a/sys/dev/hyperv/hvsock/hv_sock.h
+++ b/sys/dev/hyperv/hvsock/hv_sock.h
@@ -110,7 +110,7 @@ int	hvs_trans_soreceive(struct socket *, struct sockaddr **,
 int	hvs_trans_sosend(struct socket *, struct sockaddr *, struct uio *,
 	     struct mbuf *, struct mbuf *, int, struct thread *);
 int	hvs_trans_disconnect(struct socket *);
-int	hvs_trans_shutdown(struct socket *);
+int	hvs_trans_shutdown(struct socket *, enum shutdown_how);
 
 int	hvs_trans_lock(void);
 void	hvs_trans_unlock(void);
diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c
index 435b13842041..ab00bf2bc71f 100644
--- a/sys/kern/uipc_domain.c
+++ b/sys/kern/uipc_domain.c
@@ -151,7 +151,7 @@ pr_ready_notsupp(struct socket *so, struct mbuf *m, int count)
 }
 
 static int
-pr_shutdown_notsupp(struct socket *so)
+pr_shutdown_notsupp(struct socket *so, enum shutdown_how how)
 {
 	return (EOPNOTSUPP);
 }
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 919879e86e21..f61016d14e53 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -2966,59 +2966,18 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
 int
 soshutdown(struct socket *so, enum shutdown_how how)
 {
-	struct protosw *pr;
-	int error, soerror_enotconn;
-
-	soerror_enotconn = 0;
-	SOCK_LOCK(so);
-	if ((so->so_state &
-	    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
-		/*
-		 * POSIX mandates us to return ENOTCONN when shutdown(2) is
-		 * invoked on a datagram sockets, however historically we would
-		 * actually tear socket down. This is known to be leveraged by
-		 * some applications to unblock process waiting in recvXXX(2)
-		 * by other process that it shares that socket with. Try to meet
-		 * both backward-compatibility and POSIX requirements by forcing
-		 * ENOTCONN but still asking protocol to perform pru_shutdown().
-		 */
-		if (so->so_type != SOCK_DGRAM && !SOLISTENING(so)) {
-			SOCK_UNLOCK(so);
-			return (ENOTCONN);
-		}
-		soerror_enotconn = 1;
-	}
-
-	if (SOLISTENING(so)) {
-		if (how != SHUT_WR) {
-			so->so_error = ECONNABORTED;
-			solisten_wakeup(so);	/* unlocks so */
-		} else {
-			SOCK_UNLOCK(so);
-		}
-		goto done;
-	}
-	SOCK_UNLOCK(so);
+	int error;
 
 	CURVNET_SET(so->so_vnet);
-	pr = so->so_proto;
-	if (pr->pr_flush != NULL)
-		pr->pr_flush(so, how);
-	if (how != SHUT_WR && !(pr->pr_flags & PR_SOCKBUF))
-		sorflush(so);
-	if (how != SHUT_RD) {
-		error = pr->pr_shutdown(so);
-		wakeup(&so->so_timeo);
-		CURVNET_RESTORE();
-		return ((error == 0 && soerror_enotconn) ? ENOTCONN : error);
-	}
-	wakeup(&so->so_timeo);
+	error = so->so_proto->pr_shutdown(so, how);
 	CURVNET_RESTORE();
 
-done:
-	return (soerror_enotconn ? ENOTCONN : 0);
+	return (error);
 }
 
+/*
+ * Used by several pr_shutdown implementations that use generic socket buffers.
+ */
 void
 sorflush(struct socket *so)
 {
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 8f5560e0f30b..0460d2761e7c 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1660,18 +1660,65 @@ uipc_sense(struct socket *so, struct stat *sb)
 }
 
 static int
-uipc_shutdown(struct socket *so)
+uipc_shutdown(struct socket *so, enum shutdown_how how)
 {
-	struct unpcb *unp;
+	struct unpcb *unp = sotounpcb(so);
+	int error;
 
-	unp = sotounpcb(so);
-	KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL"));
+	SOCK_LOCK(so);
+	if ((so->so_state &
+	    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
+		/*
+		 * POSIX mandates us to just return ENOTCONN when shutdown(2) is
+		 * invoked on a datagram sockets, however historically we would
+		 * actually tear socket down.  This is known to be leveraged by
+		 * some applications to unblock process waiting in recv(2) by
+		 * other process that it shares that socket with.  Try to meet
+		 * both backward-compatibility and POSIX requirements by forcing
+		 * ENOTCONN but still flushing buffers and performing wakeup(9).
+		 *
+		 * XXXGL: it remains unknown what applications expect this
+		 * behavior and is this isolated to unix/dgram or inet/dgram or
+		 * both.  See: D10351, D3039.
+		 */
+		error = ENOTCONN;
+		if (so->so_type != SOCK_DGRAM) {
+			SOCK_UNLOCK(so);
+			return (error);
+		}
+	} else
+		error = 0;
+	if (SOLISTENING(so)) {
+		if (how != SHUT_WR) {
+			so->so_error = ECONNABORTED;
+			solisten_wakeup(so);    /* unlocks so */
+		} else
+			SOCK_UNLOCK(so);
+		return (0);
+	}
+	SOCK_UNLOCK(so);
 
-	UNP_PCB_LOCK(unp);
-	socantsendmore(so);
-	unp_shutdown(unp);
-	UNP_PCB_UNLOCK(unp);
-	return (0);
+	switch (how) {
+	case SHUT_RD:
+		/*
+		 * XXXGL: so far it is safe to call sorflush() on unix/dgram,
+		 * because PR_RIGHTS flag saves us from destructive sbrelease()
+		 * on our protocol specific buffers.
+		 */
+		sorflush(so);
+		break;
+	case SHUT_RDWR:
+		sorflush(so);
+		/* FALLTHROUGH */
+	case SHUT_WR:
+		UNP_PCB_LOCK(unp);
+		socantsendmore(so);
+		unp_shutdown(unp);
+		UNP_PCB_UNLOCK(unp);
+	}
+	wakeup(&so->so_timeo);
+
+	return (error);
 }
 
 static int
diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c
index dea6a8f23cad..94d5e9e4bccc 100644
--- a/sys/net/rtsock.c
+++ b/sys/net/rtsock.c
@@ -450,10 +450,22 @@ rts_disconnect(struct socket *so)
 }
 
 static int
-rts_shutdown(struct socket *so)
+rts_shutdown(struct socket *so, enum shutdown_how how)
 {
+	/*
+	 * Note: route socket marks itself as connected through its lifetime.
+	 */
+	switch (how) {
+	case SHUT_RD:
+		sorflush(so);
+		break;
+	case SHUT_RDWR:
+		sorflush(so);
+		/* FALLTHROUGH */
+	case SHUT_WR:
+		socantsendmore(so);
+	}
 
-	socantsendmore(so);
 	return (0);
 }
 
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index 4a61e685d898..a6bef1c7e275 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -982,16 +982,27 @@ rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 }
 
 static int
-rip_shutdown(struct socket *so)
+rip_shutdown(struct socket *so, enum shutdown_how how)
 {
-	struct inpcb *inp;
 
-	inp = sotoinpcb(so);
-	KASSERT(inp != NULL, ("rip_shutdown: inp == NULL"));
+	SOCK_LOCK(so);
+	if (!(so->so_state & SS_ISCONNECTED)) {
+		SOCK_UNLOCK(so);
+		return (ENOTCONN);
+	}
+	SOCK_UNLOCK(so);
+
+	switch (how) {
+	case SHUT_RD:
+		sorflush(so);
+		break;
+	case SHUT_RDWR:
+		sorflush(so);
+		/* FALLTHROUGH */
+	case SHUT_WR:
+		socantsendmore(so);
+	}
 
-	INP_WLOCK(inp);
-	socantsendmore(so);
-	INP_WUNLOCK(inp);
 	return (0);
 }
 #endif /* INET */
diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c
index 7fa4559108fd..ec9f211b519b 100644
--- a/sys/netinet/sctp_usrreq.c
+++ b/sys/netinet/sctp_usrreq.c
@@ -775,14 +775,39 @@ sctp_disconnect(struct socket *so)
 }
 
 int
-sctp_flush(struct socket *so, int how)
+sctp_shutdown(struct socket *so, enum shutdown_how how)
 {
+	struct sctp_inpcb *inp = (struct sctp_inpcb *)so->so_pcb;
 	struct epoch_tracker et;
 	struct sctp_tcb *stcb;
+	struct sctp_association *asoc;
+	struct sctp_nets *netp;
 	struct sctp_queued_to_read *control, *ncontrol;
-	struct sctp_inpcb *inp;
 	struct mbuf *m, *op_err;
 	bool need_to_abort = false;
+	int error = 0;
+
+	MPASS(inp);
+
+	if (!((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)))
+		return (EOPNOTSUPP);
+
+	SOCK_LOCK(so);
+	if ((so->so_state &
+	    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
+		SOCK_UNLOCK(so);
+		return (ENOTCONN);
+	}
+	if (SOLISTENING(so)) {
+		if (how != SHUT_WR) {
+			so->so_error = ECONNABORTED;
+			solisten_wakeup(so);	/* unlocks so */
+		} else
+			SOCK_UNLOCK(so);
+		return (0);
+	}
+	SOCK_UNLOCK(so);
 
 	/*
 	 * For 1-to-1 style sockets, flush the read queue and trigger an
@@ -790,106 +815,70 @@ sctp_flush(struct socket *so, int how)
 	 * messages are lost. Loosing notifications does not need to be
 	 * signalled to the peer.
 	 */
-	if (how == PRU_FLUSH_WR) {
-		/* This function is only relevant for the read directions. */
-		return (0);
-	}
-	inp = (struct sctp_inpcb *)so->so_pcb;
-	if (inp == NULL) {
-		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
-		return (EINVAL);
-	}
-	SCTP_INP_WLOCK(inp);
-	if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
-		/* For 1-to-many style sockets this function does nothing. */
-		SCTP_INP_WUNLOCK(inp);
-		return (0);
-	}
-	stcb = LIST_FIRST(&inp->sctp_asoc_list);
-	if (stcb != NULL) {
-		SCTP_TCB_LOCK(stcb);
-	}
-	SCTP_INP_READ_LOCK(inp);
-	inp->sctp_flags |= SCTP_PCB_FLAGS_SOCKET_CANT_READ;
-	SOCK_LOCK(so);
-	TAILQ_FOREACH_SAFE(control, &inp->read_queue, next, ncontrol) {
-		if ((control->spec_flags & M_NOTIFICATION) == 0) {
-			need_to_abort = true;
-		}
-		TAILQ_REMOVE(&inp->read_queue, control, next);
-		control->on_read_q = 0;
-		for (m = control->data; m; m = SCTP_BUF_NEXT(m)) {
-			sctp_sbfree(control, control->stcb, &so->so_rcv, m);
-		}
-		if (control->on_strm_q == 0) {
-			sctp_free_remote_addr(control->whoFrom);
-			if (control->data) {
-				sctp_m_freem(control->data);
-				control->data = NULL;
-			}
-			sctp_free_a_readq(stcb, control);
-		} else {
-			stcb->asoc.size_on_all_streams += control->length;
+	switch (how) {
+	case SHUT_RD:
+	case SHUT_RDWR:
+		SCTP_INP_WLOCK(inp);
+		stcb = LIST_FIRST(&inp->sctp_asoc_list);
+		if (stcb != NULL) {
+			SCTP_TCB_LOCK(stcb);
 		}
-	}
-	SOCK_UNLOCK(so);
-	SCTP_INP_READ_UNLOCK(inp);
-	if (need_to_abort && (stcb != NULL)) {
-		inp->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6;
-		SCTP_INP_WUNLOCK(inp);
-		op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
-		NET_EPOCH_ENTER(et);
-		sctp_abort_an_association(inp, stcb, op_err, false, SCTP_SO_LOCKED);
-		NET_EPOCH_EXIT(et);
-		return (ECONNABORTED);
-	}
-	if (stcb != NULL) {
-		SCTP_TCB_UNLOCK(stcb);
-	}
-	SCTP_INP_WUNLOCK(inp);
-	return (0);
-}
-
-int
-sctp_shutdown(struct socket *so)
-{
-	struct sctp_inpcb *inp;
+		SCTP_INP_READ_LOCK(inp);
+		inp->sctp_flags |= SCTP_PCB_FLAGS_SOCKET_CANT_READ;
+		SOCK_LOCK(so);
+		TAILQ_FOREACH_SAFE(control, &inp->read_queue, next, ncontrol) {
+			if ((control->spec_flags & M_NOTIFICATION) == 0) {
+				need_to_abort = true;
+			}
+			TAILQ_REMOVE(&inp->read_queue, control, next);
+			control->on_read_q = 0;
+			for (m = control->data; m; m = SCTP_BUF_NEXT(m)) {
+				sctp_sbfree(control, control->stcb,
+				    &so->so_rcv, m);
+			}
+			if (control->on_strm_q == 0) {
+				sctp_free_remote_addr(control->whoFrom);
+				if (control->data) {
+					sctp_m_freem(control->data);
+					control->data = NULL;
+				}
+				sctp_free_a_readq(stcb, control);
+			} else {
+				stcb->asoc.size_on_all_streams +=
+				    control->length;
+			}
+		}
+		SOCK_UNLOCK(so);
+		SCTP_INP_READ_UNLOCK(inp);
+		if (need_to_abort && (stcb != NULL)) {
+			inp->last_abort_code = SCTP_FROM_SCTP_USRREQ +
+			    SCTP_LOC_6;
+			SCTP_INP_WUNLOCK(inp);
+			op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC,
+			    "");
+			NET_EPOCH_ENTER(et);
+			sctp_abort_an_association(inp, stcb, op_err, false,
+			    SCTP_SO_LOCKED);
+			NET_EPOCH_EXIT(et);
 
-	inp = (struct sctp_inpcb *)so->so_pcb;
-	if (inp == NULL) {
-		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
-		return (EINVAL);
-	}
-	SCTP_INP_RLOCK(inp);
-	/* For UDP model this is a invalid call */
-	if (!((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
-	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) {
-		/* Restore the flags that the soshutdown took away. */
-		SOCKBUF_LOCK(&so->so_rcv);
-		so->so_rcv.sb_state &= ~SBS_CANTRCVMORE;
-		SOCKBUF_UNLOCK(&so->so_rcv);
-		/* This proc will wakeup for read and do nothing (I hope) */
-		SCTP_INP_RUNLOCK(inp);
-		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
-		return (EOPNOTSUPP);
-	} else {
+			error = ECONNABORTED;
+			goto out;
+		}
+		if (stcb != NULL) {
+			SCTP_TCB_UNLOCK(stcb);
+		}
+		SCTP_INP_WUNLOCK(inp);
 		/*
-		 * Ok, if we reach here its the TCP model and it is either a
-		 * SHUT_WR or SHUT_RDWR. This means we put the shutdown flag
-		 * against it.
+		 * XXXGL: does SCTP need sorflush()? This is what old
+		 * soshutdown() used to do for all kinds of sockets.
 		 */
-		struct epoch_tracker et;
-		struct sctp_tcb *stcb;
-		struct sctp_association *asoc;
-		struct sctp_nets *netp;
+		sorflush(so);
+		if (how == SHUT_RD)
+			break;
+		/* FALLTHROUGH */
 
-		if ((so->so_state &
-		    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
-			SCTP_INP_RUNLOCK(inp);
-			return (ENOTCONN);
-		}
+	case SHUT_WR:
 		socantsendmore(so);
-
 		stcb = LIST_FIRST(&inp->sctp_asoc_list);
 		if (stcb == NULL) {
 			/*
@@ -898,14 +887,14 @@ sctp_shutdown(struct socket *so)
 			 * now.
 			 */
 			SCTP_INP_RUNLOCK(inp);
-			return (0);
+			goto out;
 		}
 		SCTP_TCB_LOCK(stcb);
 		asoc = &stcb->asoc;
 		if (asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 			SCTP_TCB_UNLOCK(stcb);
 			SCTP_INP_RUNLOCK(inp);
-			return (0);
+			goto out;
 		}
 		if ((SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_WAIT) &&
 		    (SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_ECHOED) &&
@@ -916,7 +905,7 @@ sctp_shutdown(struct socket *so)
 			 */
 			SCTP_TCB_UNLOCK(stcb);
 			SCTP_INP_RUNLOCK(inp);
-			return (0);
+			goto out;
 		}
 		NET_EPOCH_ENTER(et);
 		if (stcb->asoc.alternate) {
@@ -961,7 +950,7 @@ sctp_shutdown(struct socket *so)
 				sctp_abort_an_association(stcb->sctp_ep, stcb,
 				    op_err, false, SCTP_SO_LOCKED);
 				NET_EPOCH_EXIT(et);
-				return (0);
+				goto out;
 			}
 		}
 		/*
@@ -972,8 +961,11 @@ sctp_shutdown(struct socket *so)
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_INP_RUNLOCK(inp);
 		NET_EPOCH_EXIT(et);
-		return (0);
 	}
+out:
+	wakeup(&so->so_timeo);
+
+	return (error);
 }
 
 /*
@@ -7523,7 +7515,6 @@ sctp_peeraddr(struct socket *so, struct sockaddr *sa)
 	.pr_close =	sctp_close,				\
 	.pr_detach =	sctp_close,				\
 	.pr_sopoll =	sopoll_generic,				\
-	.pr_flush =	sctp_flush,				\
 	.pr_disconnect = sctp_disconnect,			\
 	.pr_listen =	sctp_listen,				\
 	.pr_peeraddr =	sctp_peeraddr,				\
diff --git a/sys/netinet/sctp_var.h b/sys/netinet/sctp_var.h
index 54566e9ac0df..9ec8bdd9ab5e 100644
--- a/sys/netinet/sctp_var.h
+++ b/sys/netinet/sctp_var.h
@@ -331,7 +331,7 @@ void
 sctp_notify(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *,
     uint8_t, uint8_t, uint16_t, uint32_t);
 int sctp_flush(struct socket *, int);
-int sctp_shutdown(struct socket *);
+int sctp_shutdown(struct socket *, enum shutdown_how);
 int
 sctp_bindx(struct socket *, int, struct sockaddr_storage *,
     int, int, struct proc *);
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index dad79374c08b..ccd6a6149dae 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -799,31 +799,57 @@ tcp6_usr_accept(struct socket *so, struct sockaddr *sa)
  * Mark the connection as being incapable of further output.
  */
 static int
-tcp_usr_shutdown(struct socket *so)
+tcp_usr_shutdown(struct socket *so, enum shutdown_how how)
 {
-	int error = 0;
-	struct inpcb *inp;
-	struct tcpcb *tp;
 	struct epoch_tracker et;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp = intotcpcb(inp);
+	int error = 0;
 
-	inp = sotoinpcb(so);
-	KASSERT(inp != NULL, ("inp == NULL"));
-	INP_WLOCK(inp);
-	if (inp->inp_flags & INP_DROPPED) {
-		INP_WUNLOCK(inp);
-		return (ECONNRESET);
+	SOCK_LOCK(so);
+	if ((so->so_state &
+	    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
+		SOCK_UNLOCK(so);
+		return (ENOTCONN);
 	}
-	tp = intotcpcb(inp);
+	if (SOLISTENING(so)) {
+		if (how != SHUT_WR) {
+			so->so_error = ECONNABORTED;
+			solisten_wakeup(so);	/* unlocks so */
+		} else
+			SOCK_UNLOCK(so);
+		return (0);
+	}
+	SOCK_UNLOCK(so);
 
-	NET_EPOCH_ENTER(et);
-	socantsendmore(so);
-	tcp_usrclosed(tp);
-	if (!(inp->inp_flags & INP_DROPPED))
+	switch (how) {
+	case SHUT_RD:
+		sorflush(so);
+		break;
+	case SHUT_RDWR:
+		sorflush(so);
+		/* FALLTHROUGH */
+	case SHUT_WR:
+		/*
+		 * XXXGL: mimicing old soshutdown() here. But shouldn't we
+		 * return ECONNRESEST for SHUT_RD as well?
+		 */
+		INP_WLOCK(inp);
+		if (inp->inp_flags & INP_DROPPED) {
+			INP_WUNLOCK(inp);
+			return (ECONNRESET);
+		}
+
+		socantsendmore(so);
+		NET_EPOCH_ENTER(et);
+		tcp_usrclosed(tp);
 		error = tcp_output_nodrop(tp);
-	tcp_bblog_pru(tp, PRU_SHUTDOWN, error);
-	TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
-	error = tcp_unlock_or_drop(tp, error);
-	NET_EPOCH_EXIT(et);
+		tcp_bblog_pru(tp, PRU_SHUTDOWN, error);
+		TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
+		error = tcp_unlock_or_drop(tp, error);
+		NET_EPOCH_EXIT(et);
+	}
+	wakeup(&so->so_timeo);
 
 	return (error);
 }
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index affdb3b1f4c7..f91a96edeb68 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -1670,16 +1670,42 @@ udp_disconnect(struct socket *so)
 #endif /* INET */
 
 int
-udp_shutdown(struct socket *so)
+udp_shutdown(struct socket *so, enum shutdown_how how)
 {
-	struct inpcb *inp;
+	int error;
 
-	inp = sotoinpcb(so);
-	KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
-	INP_WLOCK(inp);
-	socantsendmore(so);
-	INP_WUNLOCK(inp);
-	return (0);
+	SOCK_LOCK(so);
+	if (!(so->so_state & SS_ISCONNECTED))
+		/*
+		 * POSIX mandates us to just return ENOTCONN when shutdown(2) is
+		 * invoked on a datagram sockets, however historically we would
+		 * actually tear socket down.  This is known to be leveraged by
+		 * some applications to unblock process waiting in recv(2) by
+		 * other process that it shares that socket with.  Try to meet
+		 * both backward-compatibility and POSIX requirements by forcing
+		 * ENOTCONN but still flushing buffers and performing wakeup(9).
+		 *
+		 * XXXGL: it remains unknown what applications expect this
+		 * behavior and is this isolated to unix/dgram or inet/dgram or
+		 * both.  See: D10351, D3039.
+		 */
+		error = ENOTCONN;
+	else
+		error = 0;
+	SOCK_UNLOCK(so);
+
+	switch (how) {
+	case SHUT_RD:
+		sorflush(so);
+		break;
+	case SHUT_RDWR:
+		sorflush(so);
+		/* FALLTHROUGH */
+	case SHUT_WR:
+		socantsendmore(so);
+	}
+
+	return (error);
 }
 
 #ifdef INET
diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h
index c31db2d97b5f..a66d76845eb6 100644
--- a/sys/netinet/udp_var.h
+++ b/sys/netinet/udp_var.h
@@ -168,7 +168,7 @@ udp_get_inpcbinfo(int protocol)
 int		udp_ctloutput(struct socket *, struct sockopt *);
 void		udplite_input(struct mbuf *, int);
 struct inpcb	*udp_notify(struct inpcb *inp, int errno);
-int		udp_shutdown(struct socket *so);
+int		udp_shutdown(struct socket *, enum shutdown_how);
 
 int		udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f,
 		    udp_tun_icmp_t i, void *ctx);
diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c
index 174cc29e6008..3264de331817 100644
--- a/sys/netinet6/raw_ip6.c
+++ b/sys/netinet6/raw_ip6.c
@@ -827,16 +827,27 @@ rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 }
 
 static int
-rip6_shutdown(struct socket *so)
+rip6_shutdown(struct socket *so, enum shutdown_how how)
 {
-	struct inpcb *inp;
 
-	inp = sotoinpcb(so);
-	KASSERT(inp != NULL, ("rip6_shutdown: inp == NULL"));
+	SOCK_LOCK(so);
+	if (!(so->so_state & SS_ISCONNECTED)) {
+		SOCK_UNLOCK(so);
+		return (ENOTCONN);
+	}
+	SOCK_UNLOCK(so);
+
+	switch (how) {
+	case SHUT_RD:
+		sorflush(so);
+		break;
+	case SHUT_RDWR:
+		sorflush(so);
+		/* FALLTHROUGH */
+	case SHUT_WR:
+		socantsendmore(so);
+	}
 
-	INP_WLOCK(inp);
-	socantsendmore(so);
-	INP_WUNLOCK(inp);
 	return (0);
 }
 
diff --git a/sys/netinet6/sctp6_usrreq.c b/sys/netinet6/sctp6_usrreq.c
index 1268e4990e90..e3ed37b53425 100644
--- a/sys/netinet6/sctp6_usrreq.c
+++ b/sys/netinet6/sctp6_usrreq.c
@@ -1095,7 +1095,6 @@ sctp6_getpeeraddr(struct socket *so, struct sockaddr *sa)
 	.pr_close =	sctp6_close,					\
 	.pr_detach =	sctp6_close,					\
 	.pr_sopoll =	sopoll_generic,					\
-	.pr_flush =	sctp_flush,					\
 	.pr_disconnect = sctp_disconnect,				\
 	.pr_listen =	sctp_listen,					\
 	.pr_peeraddr =	sctp6_getpeeraddr,				\
diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h
index b512c60971ee..6fd21b947687 100644
--- a/sys/sys/protosw.h
+++ b/sys/sys/protosw.h
@@ -39,6 +39,7 @@ struct thread;
 struct sockaddr;
 struct socket;
 struct sockopt;
+enum shutdown_how;
 
 /*#ifdef _KERNEL*/
 /*
@@ -84,8 +85,7 @@ typedef int	pr_send_t(struct socket *, int, struct mbuf *,
 		    struct sockaddr *, struct mbuf *, struct thread *);
 typedef int	pr_ready_t(struct socket *, struct mbuf *, int);
 typedef int	pr_sense_t(struct socket *, struct stat *);
-typedef int	pr_shutdown_t(struct socket *);
-typedef int	pr_flush_t(struct socket *, int);
+typedef int	pr_shutdown_t(struct socket *, enum shutdown_how);
 typedef int	pr_sockaddr_t(struct socket *, struct sockaddr *);
 typedef int	pr_sosend_t(struct socket *, struct sockaddr *, struct uio *,
 		    struct mbuf *, struct mbuf *, int, struct thread *);
@@ -137,7 +137,6 @@ struct protosw {
 	pr_peeraddr_t	*pr_peeraddr;	/* getpeername(2) */
 	pr_sockaddr_t	*pr_sockaddr;	/* getsockname(2) */
 	pr_sense_t	*pr_sense;	/* stat(2) */
-	pr_flush_t	*pr_flush;	/* XXXGL: merge with pr_shutdown_t! */
 	pr_sosetlabel_t	*pr_sosetlabel;	/* MAC, XXXGL: remove */
 	pr_setsbopt_t	*pr_setsbopt;	/* Socket buffer ioctls */
 };
diff --git a/sys/sys/socket.h b/sys/sys/socket.h
index 9e78281e5dd2..3e24db552618 100644
--- a/sys/sys/socket.h
+++ b/sys/sys/socket.h
@@ -633,14 +633,6 @@ enum shutdown_how {
 	SHUT_RDWR		/* shut down both sides */
 };
 
-#if __BSD_VISIBLE
-/* for SCTP */
-/* we cheat and use the SHUT_XX defines for these */
-#define PRU_FLUSH_RD     SHUT_RD
-#define PRU_FLUSH_WR     SHUT_WR
-#define PRU_FLUSH_RDWR   SHUT_RDWR
-#endif
-
 #if __BSD_VISIBLE
 /*
  * sendfile(2) header/trailer struct