git: 2de2ae331be2 - main - sctp: improve sctp_pathmtu_adjustment()

From: Michael Tuexen <tuexen_at_FreeBSD.org>
Date: Thu, 30 Dec 2021 14:19:35 UTC
The branch main has been updated by tuexen:

URL: https://cgit.FreeBSD.org/src/commit/?id=2de2ae331be2504d1038b2124e7d44f23aa70405

commit 2de2ae331be2504d1038b2124e7d44f23aa70405
Author:     Michael Tuexen <tuexen@FreeBSD.org>
AuthorDate: 2021-12-30 14:16:05 +0000
Commit:     Michael Tuexen <tuexen@FreeBSD.org>
CommitDate: 2021-12-30 14:16:05 +0000

    sctp: improve sctp_pathmtu_adjustment()
    
    Allow the resending of DATA chunks to be controlled by the caller,
    which allows retiring sctp_mtu_size_reset() in a separate commit.
    Also improve the computaion of the overhead and use 32-bit integers
    consistently.
    Thanks to Timo Voelker for pointing me to the code.
    
    MFC after:      3 days
---
 sys/netinet/sctp_input.c    |  6 ++---
 sys/netinet/sctp_pcb.c      |  2 +-
 sys/netinet/sctp_usrreq.c   | 62 ++++++++++++++++++++++++++++-----------------
 sys/netinet/sctp_var.h      |  2 +-
 sys/netinet6/sctp6_usrreq.c |  2 +-
 5 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/sys/netinet/sctp_input.c b/sys/netinet/sctp_input.c
index bdb126cbb50f..ef1b44c4a3ff 100644
--- a/sys/netinet/sctp_input.c
+++ b/sys/netinet/sctp_input.c
@@ -5291,7 +5291,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
 					/* UDP encapsulation turned on. */
 					net->mtu -= sizeof(struct udphdr);
 					if (stcb->asoc.smallest_mtu > net->mtu) {
-						sctp_pathmtu_adjustment(stcb, net->mtu);
+						sctp_pathmtu_adjustment(stcb, net->mtu, true);
 					}
 				} else if (port == 0) {
 					/* UDP encapsulation turned off. */
@@ -5331,7 +5331,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
 			/* UDP encapsulation turned on. */
 			net->mtu -= sizeof(struct udphdr);
 			if (stcb->asoc.smallest_mtu > net->mtu) {
-				sctp_pathmtu_adjustment(stcb, net->mtu);
+				sctp_pathmtu_adjustment(stcb, net->mtu, true);
 			}
 		} else if (port == 0) {
 			/* UDP encapsulation turned off. */
@@ -5426,7 +5426,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
 					/* UDP encapsulation turned on. */
 					net->mtu -= sizeof(struct udphdr);
 					if (stcb->asoc.smallest_mtu > net->mtu) {
-						sctp_pathmtu_adjustment(stcb, net->mtu);
+						sctp_pathmtu_adjustment(stcb, net->mtu, true);
 					}
 				} else if (port == 0) {
 					/* UDP encapsulation turned off. */
diff --git a/sys/netinet/sctp_pcb.c b/sys/netinet/sctp_pcb.c
index 7ad651ec377f..b7fce09fc661 100644
--- a/sys/netinet/sctp_pcb.c
+++ b/sys/netinet/sctp_pcb.c
@@ -4025,7 +4025,7 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
 		stcb->asoc.smallest_mtu = net->mtu;
 	}
 	if (stcb->asoc.smallest_mtu > net->mtu) {
-		sctp_pathmtu_adjustment(stcb, net->mtu);
+		sctp_pathmtu_adjustment(stcb, net->mtu, true);
 	}
 #ifdef INET6
 	if (newaddr->sa_family == AF_INET6) {
diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c
index bb84d3b7083f..e30d02dc9de1 100644
--- a/sys/netinet/sctp_usrreq.c
+++ b/sys/netinet/sctp_usrreq.c
@@ -104,35 +104,51 @@ VNET_SYSUNINIT(sctp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, sctp_finish, NULL);
 #endif
 
 void
-sctp_pathmtu_adjustment(struct sctp_tcb *stcb, uint16_t nxtsz)
+sctp_pathmtu_adjustment(struct sctp_tcb *stcb, uint32_t mtu, bool resend)
 {
+	struct sctp_association *asoc;
 	struct sctp_tmit_chunk *chk;
-	uint16_t overhead;
-
-	/* Adjust that too */
-	stcb->asoc.smallest_mtu = nxtsz;
-	/* now off to subtract IP_DF flag if needed */
-	overhead = IP_HDR_SIZE + sizeof(struct sctphdr);
-	if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) {
-		overhead += sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+	uint32_t overhead;
+
+	asoc = &stcb->asoc;
+	KASSERT(mtu < asoc->smallest_mtu,
+	    ("Currently only reducing association MTU %u supported (MTU %u)",
+	    asoc->smallest_mtu, mtu));
+	asoc->smallest_mtu = mtu;
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		overhead = SCTP_MIN_OVERHEAD;
+	} else {
+		overhead = SCTP_MIN_V4_OVERHEAD;
+	}
+	if (asoc->idata_supported) {
+		if (sctp_auth_is_required_chunk(SCTP_IDATA, asoc->peer_auth_chunks)) {
+			overhead += sctp_get_auth_chunk_len(asoc->peer_hmac_id);
+		}
+	} else {
+		if (sctp_auth_is_required_chunk(SCTP_DATA, asoc->peer_auth_chunks)) {
+			overhead += sctp_get_auth_chunk_len(asoc->peer_hmac_id);
+		}
 	}
-	TAILQ_FOREACH(chk, &stcb->asoc.send_queue, sctp_next) {
-		if ((chk->send_size + overhead) > nxtsz) {
+	KASSERT(overhead % 4 == 0,
+	    ("overhead (%u) not a multiple of 4", overhead));
+	TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
+		if (((uint32_t)chk->send_size + overhead) > mtu) {
 			chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
 		}
 	}
-	TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
-		if ((chk->send_size + overhead) > nxtsz) {
-			/*
-			 * For this guy we also mark for immediate resend
-			 * since we sent to big of chunk
-			 */
+	TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+		if (((uint32_t)chk->send_size + overhead) > mtu) {
 			chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
-			if (chk->sent < SCTP_DATAGRAM_RESEND) {
+			if (resend && chk->sent < SCTP_DATAGRAM_RESEND) {
+				/*
+				 * If requested, mark the chunk for
+				 * immediate resend, since we sent it being
+				 * too big.
+				 */
 				sctp_flight_size_decrease(chk);
 				sctp_total_flight_decrease(stcb, chk);
 				chk->sent = SCTP_DATAGRAM_RESEND;
-				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+				sctp_ucount_incr(asoc->sent_queue_retran_cnt);
 				chk->rec.data.doing_fast_retransmit = 0;
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
 					sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PMTU,
@@ -141,7 +157,7 @@ sctp_pathmtu_adjustment(struct sctp_tcb *stcb, uint16_t nxtsz)
 					    (uint32_t)(uintptr_t)chk->whoTo,
 					    chk->rec.data.tsn);
 				}
-				/* Clear any time so NO RTT is being done */
+				/* Clear any time, so NO RTT is being done. */
 				if (chk->do_rtt == 1) {
 					chk->do_rtt = 0;
 					chk->whoTo->rto_needed = 1;
@@ -229,7 +245,7 @@ sctp_notify(struct sctp_inpcb *inp,
 		}
 		/* Update the association MTU */
 		if (stcb->asoc.smallest_mtu > next_mtu) {
-			sctp_pathmtu_adjustment(stcb, next_mtu);
+			sctp_pathmtu_adjustment(stcb, next_mtu, true);
 		}
 		/* Finally, start the PMTU timer if it was running before. */
 		if (timer_stopped) {
@@ -5363,7 +5379,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
 								break;
 							}
 							if (net->mtu < stcb->asoc.smallest_mtu) {
-								sctp_pathmtu_adjustment(stcb, net->mtu);
+								sctp_pathmtu_adjustment(stcb, net->mtu, true);
 							}
 						}
 					}
@@ -5507,7 +5523,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
 									break;
 								}
 								if (net->mtu < stcb->asoc.smallest_mtu) {
-									sctp_pathmtu_adjustment(stcb, net->mtu);
+									sctp_pathmtu_adjustment(stcb, net->mtu, true);
 								}
 							}
 						}
diff --git a/sys/netinet/sctp_var.h b/sys/netinet/sctp_var.h
index ed01de7d7014..a18dbb5e66e2 100644
--- a/sys/netinet/sctp_var.h
+++ b/sys/netinet/sctp_var.h
@@ -328,7 +328,7 @@ int sctp_ctloutput(struct socket *, struct sockopt *);
 void sctp_input_with_port(struct mbuf *, int, uint16_t);
 int sctp_input(struct mbuf **, int *, int);
 #endif
-void sctp_pathmtu_adjustment(struct sctp_tcb *, uint16_t);
+void sctp_pathmtu_adjustment(struct sctp_tcb *, uint32_t, bool);
 void sctp_drain(void);
 void sctp_init(void);
 void
diff --git a/sys/netinet6/sctp6_usrreq.c b/sys/netinet6/sctp6_usrreq.c
index 516107caf408..361bc4a18a2f 100644
--- a/sys/netinet6/sctp6_usrreq.c
+++ b/sys/netinet6/sctp6_usrreq.c
@@ -233,7 +233,7 @@ sctp6_notify(struct sctp_inpcb *inp,
 		}
 		/* Update the association MTU */
 		if (stcb->asoc.smallest_mtu > next_mtu) {
-			sctp_pathmtu_adjustment(stcb, next_mtu);
+			sctp_pathmtu_adjustment(stcb, next_mtu, true);
 		}
 		/* Finally, start the PMTU timer if it was running before. */
 		if (timer_stopped) {