git: f567d55f5152 - main - inpcb: don't return INP_DROPPED entries from pcb lookups

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Tue, 08 Nov 2022 18:24:54 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=f567d55f51527e90ca773c6191ac8266e25eef98

commit f567d55f51527e90ca773c6191ac8266e25eef98
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2022-11-08 18:24:39 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2022-11-08 18:24:39 +0000

    inpcb: don't return INP_DROPPED entries from pcb lookups
    
    The in_pcbdrop() KPI, which is used solely by TCP, allows to remove a
    pcb from hash list and mark it as dropped.  The comment suggests that
    such pcb won't be returned by lookups.  Indeed, every call to
    in_pcblookup*() is accompanied by a check for INP_DROPPED.  Do what
    comment suggests: never return such pcbs and remove unnecessary checks.
    
    Reviewed by:            tuexen
    Differential revision:  https://reviews.freebsd.org/D37061
---
 sys/netinet/in_pcb.c    | 22 +++++++++++++++++-----
 sys/netinet/tcp_input.c | 11 +----------
 sys/netinet/tcp_lro.c   |  4 +---
 sys/netinet/tcp_subr.c  | 12 ++++--------
 4 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index ea8bbea1b5ff..70aaca21a20f 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -1536,15 +1536,15 @@ in_pcbrele(struct inpcb *inp, const inp_lookup_t lock)
 	    in_pcbrele_rlocked(inp) : in_pcbrele_wlocked(inp));
 }
 
-bool
-inp_smr_lock(struct inpcb *inp, const inp_lookup_t lock)
+static inline bool
+_inp_smr_lock(struct inpcb *inp, const inp_lookup_t lock, const int ignflags)
 {
 
 	MPASS(lock == INPLOOKUP_RLOCKPCB || lock == INPLOOKUP_WLOCKPCB);
 	SMR_ASSERT_ENTERED(inp->inp_pcbinfo->ipi_smr);
 
 	if (__predict_true(inp_trylock(inp, lock))) {
-		if (__predict_false(inp->inp_flags & INP_FREED)) {
+		if (__predict_false(inp->inp_flags & ignflags)) {
 			smr_exit(inp->inp_pcbinfo->ipi_smr);
 			inp_unlock(inp, lock);
 			return (false);
@@ -1564,7 +1564,7 @@ inp_smr_lock(struct inpcb *inp, const inp_lookup_t lock)
 		 * through in_pcbfree() and has another reference, that
 		 * prevented its release by our in_pcbrele().
 		 */
-		if (__predict_false(inp->inp_flags & INP_FREED)) {
+		if (__predict_false(inp->inp_flags & ignflags)) {
 			inp_unlock(inp, lock);
 			return (false);
 		}
@@ -1575,6 +1575,18 @@ inp_smr_lock(struct inpcb *inp, const inp_lookup_t lock)
 	}
 }
 
+bool
+inp_smr_lock(struct inpcb *inp, const inp_lookup_t lock)
+{
+
+	/*
+	 * in_pcblookup() family of functions ignore not only freed entries,
+	 * that may be found due to lockless access to the hash, but dropped
+	 * entries, too.
+	 */
+	return (_inp_smr_lock(inp, lock, INP_FREED | INP_DROPPED));
+}
+
 /*
  * inp_next() - inpcb hash/list traversal iterator
  *
@@ -1631,7 +1643,7 @@ inp_next(struct inpcb_iterator *ii)
 		    inp = II_LIST_NEXT(inp, hash)) {
 			if (match != NULL && (match)(inp, ctx) == false)
 				continue;
-			if (__predict_true(inp_smr_lock(inp, lock)))
+			if (__predict_true(_inp_smr_lock(inp, lock, INP_FREED)))
 				break;
 			else {
 				smr_enter(ipi->ipi_smr);
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 370b947767ff..eeed49681ec6 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -937,16 +937,7 @@ findpcb:
 		goto dropwithreset;
 	}
 	INP_LOCK_ASSERT(inp);
-	/*
-	 * While waiting for inp lock during the lookup, another thread
-	 * can have dropped the inpcb, in which case we need to loop back
-	 * and try to find a new inpcb to deliver to.
-	 */
-	if (inp->inp_flags & INP_DROPPED) {
-		INP_UNLOCK(inp);
-		inp = NULL;
-		goto findpcb;
-	}
+
 	if ((inp->inp_flowtype == M_HASHTYPE_NONE) &&
 	    (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) &&
 	    ((inp->inp_socket == NULL) || !SOLISTENING(inp->inp_socket))) {
diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
index 4f7457d875e7..b0b9a812b3df 100644
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -1360,9 +1360,7 @@ tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le)
 	tp = intotcpcb(inp);
 
 	/* Check if the inp is dead, Jim. */
-	if (tp == NULL ||
-	    (inp->inp_flags & INP_DROPPED) ||
-	    (tp->t_state == TCPS_TIME_WAIT)) {
+	if (tp->t_state == TCPS_TIME_WAIT) {
 		INP_WUNLOCK(inp);
 		return (TCP_LRO_CANNOT);
 	}
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index b78967a0f20c..2363cdf75e1e 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -2878,8 +2878,7 @@ tcp_ctlinput_with_port(struct icmp *icp, uint16_t port)
 	inp = in_pcblookup(&V_tcbinfo, ip->ip_dst, th->th_dport, ip->ip_src,
 	    th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
 	if (inp != NULL)  {
-		if (!(inp->inp_flags & INP_DROPPED) &&
-		    !(inp->inp_socket == NULL)) {
+		if (inp->inp_socket != NULL) {
 			tp = intotcpcb(inp);
 #ifdef TCP_OFFLOAD
 			if (tp->t_flags & TF_TOE && errno == EMSGSIZE) {
@@ -3071,8 +3070,7 @@ tcp6_ctlinput_with_port(struct ip6ctlparam *ip6cp, uint16_t port)
 	}
 	m_copydata(m, off, sizeof(tcp_seq), (caddr_t)&icmp_tcp_seq);
 	if (inp != NULL)  {
-		if (!(inp->inp_flags & INP_DROPPED) &&
-		    !(inp->inp_socket == NULL)) {
+		if (inp->inp_socket != NULL) {
 			tp = intotcpcb(inp);
 #ifdef TCP_OFFLOAD
 			if (tp->t_flags & TF_TOE && errno == EMSGSIZE) {
@@ -3697,8 +3695,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
 #endif
 	}
 	if (inp != NULL) {
-		if ((inp->inp_flags & INP_DROPPED) == 0 &&
-		    !SOLISTENING(inp->inp_socket)) {
+		if (!SOLISTENING(inp->inp_socket)) {
 			tp = intotcpcb(inp);
 			tp = tcp_drop(tp, ECONNABORTED);
 			if (tp != NULL)
@@ -3817,8 +3814,7 @@ sysctl_switch_tls(SYSCTL_HANDLER_ARGS)
 	}
 	NET_EPOCH_EXIT(et);
 	if (inp != NULL) {
-		if ((inp->inp_flags & INP_DROPPED) != 0 ||
-		    inp->inp_socket == NULL) {
+		if (inp->inp_socket == NULL) {
 			error = ECONNRESET;
 			INP_WUNLOCK(inp);
 		} else {