git: fe8c78f0d202 - main - ktls: Add full support for TLS RX offloading via network interface.

From: Hans Petter Selasky <hselasky_at_FreeBSD.org>
Date: Tue, 07 Jun 2022 10:59:31 UTC
The branch main has been updated by hselasky:

URL: https://cgit.FreeBSD.org/src/commit/?id=fe8c78f0d20285cacb0695aa96a4ae302429429a

commit fe8c78f0d20285cacb0695aa96a4ae302429429a
Author:     Hans Petter Selasky <hselasky@FreeBSD.org>
AuthorDate: 2022-04-23 08:11:18 +0000
Commit:     Hans Petter Selasky <hselasky@FreeBSD.org>
CommitDate: 2022-06-07 10:58:09 +0000

    ktls: Add full support for TLS RX offloading via network interface.
    
    Basic TLS RX offloading uses the "csum_flags" field in the mbuf packet
    header to figure out if an incoming mbuf has been fully offloaded or
    not. This information follows the packet stream via the LRO engine, IP
    stack and finally to the TCP stack. The TCP stack preserves the mbuf
    packet header also when re-assembling packets after packet loss. When
    the mbuf goes into the socket buffer the packet header is demoted and
    the offload information is transferred to "m_flags" . Later on a
    worker thread will analyze the mbuf flags and decide if the mbufs
    making up a TLS record indicate a fully-, partially- or not decrypted
    TLS record. Based on these three cases the worker thread will either
    pass the packet on as-is or recrypt the decrypted bits, if any, or
    decrypt the packet as usual.
    
    During packet loss the kernel TLS code will call back into the network
    driver using the send tag, informing about the TCP starting sequence
    number of every TLS record that is not fully decrypted by the network
    interface. The network interface then stores this information in a
    compressed table and starts asking the hardware if it has found a
    valid TLS header in the TCP data payload. If the hardware has found a
    valid TLS header and the referred TLS header is at a valid TCP
    sequence number according to the TCP sequence numbers provided by the
    kernel TLS code, the network driver then informs the hardware that it
    can resume decryption.
    
    Care has been taken to not merge encrypted and decrypted mbuf chains,
    in the LRO engine and when appending mbufs to the socket buffer.
    
    The mbuf's leaf network interface pointer is used to figure out from
    which network interface the offloading rule should be allocated. Also
    this pointer is used to track route changes.
    
    Currently mbuf send tags are used in both transmit and receive
    direction, due to convenience, but may get a new name in the future to
    better reflect their usage.
    
    Reviewed by:    jhb@ and gallatin@
    Differential revision:  https://reviews.freebsd.org/D32356
    Sponsored by:   NVIDIA Networking
---
 sys/kern/uipc_ktls.c    | 425 ++++++++++++++++++++++++++++++++++++++++++++----
 sys/kern/uipc_sockbuf.c |  39 ++++-
 sys/sys/ktls.h          |  15 +-
 sys/sys/sockbuf.h       |   1 +
 4 files changed, 443 insertions(+), 37 deletions(-)

diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c
index e55f4fd0cb62..5855e62bd983 100644
--- a/sys/kern/uipc_ktls.c
+++ b/sys/kern/uipc_ktls.c
@@ -299,6 +299,7 @@ static MALLOC_DEFINE(M_KTLS, "ktls", "Kernel TLS");
 
 static void ktls_cleanup(struct ktls_session *tls);
 #if defined(INET) || defined(INET6)
+static void ktls_reset_receive_tag(void *context, int pending);
 static void ktls_reset_send_tag(void *context, int pending);
 #endif
 static void ktls_work_thread(void *ctx);
@@ -503,7 +504,7 @@ start:
 #if defined(INET) || defined(INET6)
 static int
 ktls_create_session(struct socket *so, struct tls_enable *en,
-    struct ktls_session **tlsp)
+    struct ktls_session **tlsp, int direction)
 {
 	struct ktls_session *tls;
 	int error;
@@ -619,7 +620,10 @@ ktls_create_session(struct socket *so, struct tls_enable *en,
 	counter_u64_add(ktls_offload_active, 1);
 
 	refcount_init(&tls->refcount, 1);
-	TASK_INIT(&tls->reset_tag_task, 0, ktls_reset_send_tag, tls);
+	if (direction == KTLS_RX)
+		TASK_INIT(&tls->reset_tag_task, 0, ktls_reset_receive_tag, tls);
+	else
+		TASK_INIT(&tls->reset_tag_task, 0, ktls_reset_send_tag, tls);
 
 	tls->wq_index = ktls_get_cpu(so);
 
@@ -745,7 +749,7 @@ out:
 }
 
 static struct ktls_session *
-ktls_clone_session(struct ktls_session *tls)
+ktls_clone_session(struct ktls_session *tls, int direction)
 {
 	struct ktls_session *tls_new;
 
@@ -754,7 +758,12 @@ ktls_clone_session(struct ktls_session *tls)
 	counter_u64_add(ktls_offload_active, 1);
 
 	refcount_init(&tls_new->refcount, 1);
-	TASK_INIT(&tls_new->reset_tag_task, 0, ktls_reset_send_tag, tls_new);
+	if (direction == KTLS_RX)
+		TASK_INIT(&tls_new->reset_tag_task, 0, ktls_reset_receive_tag,
+		    tls_new);
+	else
+		TASK_INIT(&tls_new->reset_tag_task, 0, ktls_reset_send_tag,
+		    tls_new);
 
 	/* Copy fields from existing session. */
 	tls_new->params = tls->params;
@@ -810,6 +819,8 @@ ktls_cleanup(struct ktls_session *tls)
 		}
 		if (tls->snd_tag != NULL)
 			m_snd_tag_rele(tls->snd_tag);
+		if (tls->rx_ifp != NULL)
+			if_rele(tls->rx_ifp);
 		break;
 #ifdef TCP_OFFLOAD
 	case TCP_TLS_MODE_TOE:
@@ -983,28 +994,135 @@ out:
 	return (error);
 }
 
+/*
+ * Allocate an initial TLS receive tag for doing HW decryption of TLS
+ * data.
+ *
+ * This function allocates a new TLS receive tag on whatever interface
+ * the connection is currently routed over.  If the connection ends up
+ * using a different interface for receive this will get fixed up via
+ * ktls_input_ifp_mismatch as future packets arrive.
+ */
 static int
-ktls_try_ifnet(struct socket *so, struct ktls_session *tls, bool force)
+ktls_alloc_rcv_tag(struct inpcb *inp, struct ktls_session *tls,
+    struct m_snd_tag **mstp)
 {
-	struct m_snd_tag *mst;
+	union if_snd_tag_alloc_params params;
+	struct ifnet *ifp;
+	struct nhop_object *nh;
 	int error;
 
-	error = ktls_alloc_snd_tag(so->so_pcb, tls, force, &mst);
-	if (error == 0) {
-		tls->mode = TCP_TLS_MODE_IFNET;
-		tls->snd_tag = mst;
-		switch (tls->params.cipher_algorithm) {
-		case CRYPTO_AES_CBC:
-			counter_u64_add(ktls_ifnet_cbc, 1);
-			break;
-		case CRYPTO_AES_NIST_GCM_16:
-			counter_u64_add(ktls_ifnet_gcm, 1);
-			break;
-		case CRYPTO_CHACHA20_POLY1305:
-			counter_u64_add(ktls_ifnet_chacha20, 1);
-			break;
+	if (!ktls_ocf_recrypt_supported(tls))
+		return (ENXIO);
+
+	INP_RLOCK(inp);
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		INP_RUNLOCK(inp);
+		return (ECONNRESET);
+	}
+	if (inp->inp_socket == NULL) {
+		INP_RUNLOCK(inp);
+		return (ECONNRESET);
+	}
+
+	/*
+	 * Check administrative controls on ifnet TLS to determine if
+	 * ifnet TLS should be denied.
+	 */
+	if (ktls_ifnet_permitted == 0) {
+		INP_RUNLOCK(inp);
+		return (ENXIO);
+	}
+
+	/*
+	 * XXX: As with ktls_alloc_snd_tag, use the cached route in
+	 * the inpcb to find the interface.
+	 */
+	nh = inp->inp_route.ro_nh;
+	if (nh == NULL) {
+		INP_RUNLOCK(inp);
+		return (ENXIO);
+	}
+	ifp = nh->nh_ifp;
+	if_ref(ifp);
+	tls->rx_ifp = ifp;
+
+	params.hdr.type = IF_SND_TAG_TYPE_TLS_RX;
+	params.hdr.flowid = inp->inp_flowid;
+	params.hdr.flowtype = inp->inp_flowtype;
+	params.hdr.numa_domain = inp->inp_numa_domain;
+	params.tls_rx.inp = inp;
+	params.tls_rx.tls = tls;
+	params.tls_rx.vlan_id = 0;
+
+	INP_RUNLOCK(inp);
+
+	if (inp->inp_vflag & INP_IPV6) {
+		if ((ifp->if_capenable2 & IFCAP2_RXTLS6) == 0) {
+			error = EOPNOTSUPP;
+			goto out;
 		}
+	} else {
+		if ((ifp->if_capenable2 & IFCAP2_RXTLS4) == 0) {
+			error = EOPNOTSUPP;
+			goto out;
+		}
+	}
+	error = m_snd_tag_alloc(ifp, &params, mstp);
+
+	/*
+	 * If this connection is over a vlan, vlan_snd_tag_alloc
+	 * rewrites vlan_id with the saved interface.  Save the VLAN
+	 * ID for use in ktls_reset_receive_tag which allocates new
+	 * receive tags directly from the leaf interface bypassing
+	 * if_vlan.
+	 */
+	if (error == 0)
+		tls->rx_vlan_id = params.tls_rx.vlan_id;
+out:
+	return (error);
+}
+
+static int
+ktls_try_ifnet(struct socket *so, struct ktls_session *tls, int direction,
+    bool force)
+{
+	struct m_snd_tag *mst;
+	int error;
+
+	switch (direction) {
+	case KTLS_TX:
+		error = ktls_alloc_snd_tag(so->so_pcb, tls, force, &mst);
+		if (__predict_false(error != 0))
+			goto done;
+		break;
+	case KTLS_RX:
+		KASSERT(!force, ("%s: forced receive tag", __func__));
+		error = ktls_alloc_rcv_tag(so->so_pcb, tls, &mst);
+		if (__predict_false(error != 0))
+			goto done;
+		break;
+	default:
+		__assert_unreachable();
+	}
+
+	tls->mode = TCP_TLS_MODE_IFNET;
+	tls->snd_tag = mst;
+
+	switch (tls->params.cipher_algorithm) {
+	case CRYPTO_AES_CBC:
+		counter_u64_add(ktls_ifnet_cbc, 1);
+		break;
+	case CRYPTO_AES_NIST_GCM_16:
+		counter_u64_add(ktls_ifnet_gcm, 1);
+		break;
+	case CRYPTO_CHACHA20_POLY1305:
+		counter_u64_add(ktls_ifnet_chacha20, 1);
+		break;
+	default:
+		break;
 	}
+done:
 	return (error);
 }
 
@@ -1185,7 +1303,7 @@ ktls_enable_rx(struct socket *so, struct tls_enable *en)
 	if (en->cipher_algorithm == CRYPTO_AES_CBC && !ktls_cbc_enable)
 		return (ENOTSUP);
 
-	error = ktls_create_session(so, en, &tls);
+	error = ktls_create_session(so, en, &tls, KTLS_RX);
 	if (error)
 		return (error);
 
@@ -1206,10 +1324,13 @@ ktls_enable_rx(struct socket *so, struct tls_enable *en)
 	ktls_check_rx(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
+	/* Prefer TOE -> ifnet TLS -> software TLS. */
 #ifdef TCP_OFFLOAD
 	error = ktls_try_toe(so, tls, KTLS_RX);
 	if (error)
 #endif
+		error = ktls_try_ifnet(so, tls, KTLS_RX, false);
+	if (error)
 		ktls_use_sw(tls);
 
 	counter_u64_add(ktls_offload_total, 1);
@@ -1252,7 +1373,7 @@ ktls_enable_tx(struct socket *so, struct tls_enable *en)
 	if (mb_use_ext_pgs == 0)
 		return (ENXIO);
 
-	error = ktls_create_session(so, en, &tls);
+	error = ktls_create_session(so, en, &tls, KTLS_TX);
 	if (error)
 		return (error);
 
@@ -1261,7 +1382,7 @@ ktls_enable_tx(struct socket *so, struct tls_enable *en)
 	error = ktls_try_toe(so, tls, KTLS_TX);
 	if (error)
 #endif
-		error = ktls_try_ifnet(so, tls, false);
+		error = ktls_try_ifnet(so, tls, KTLS_TX, false);
 	if (error)
 		error = ktls_try_sw(so, tls, KTLS_TX);
 
@@ -1418,10 +1539,10 @@ ktls_set_tx_mode(struct socket *so, int mode)
 	SOCKBUF_UNLOCK(&so->so_snd);
 	INP_WUNLOCK(inp);
 
-	tls_new = ktls_clone_session(tls);
+	tls_new = ktls_clone_session(tls, KTLS_TX);
 
 	if (mode == TCP_TLS_MODE_IFNET)
-		error = ktls_try_ifnet(so, tls_new, true);
+		error = ktls_try_ifnet(so, tls_new, KTLS_TX, true);
 	else
 		error = ktls_try_sw(so, tls_new, KTLS_TX);
 	if (error) {
@@ -1479,6 +1600,89 @@ ktls_set_tx_mode(struct socket *so, int mode)
 	return (0);
 }
 
+/*
+ * Try to allocate a new TLS receive tag.  This task is scheduled when
+ * sbappend_ktls_rx detects an input path change.  If a new tag is
+ * allocated, replace the tag in the TLS session.  If a new tag cannot
+ * be allocated, let the session fall back to software decryption.
+ */
+static void
+ktls_reset_receive_tag(void *context, int pending)
+{
+	union if_snd_tag_alloc_params params;
+	struct ktls_session *tls;
+	struct m_snd_tag *mst;
+	struct inpcb *inp;
+	struct ifnet *ifp;
+	struct socket *so;
+	int error;
+
+	MPASS(pending == 1);
+
+	tls = context;
+	so = tls->so;
+	inp = so->so_pcb;
+	ifp = NULL;
+
+	INP_RLOCK(inp);
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		INP_RUNLOCK(inp);
+		goto out;
+	}
+
+	SOCKBUF_LOCK(&so->so_rcv);
+	m_snd_tag_rele(tls->snd_tag);
+	tls->snd_tag = NULL;
+
+	ifp = tls->rx_ifp;
+	if_ref(ifp);
+	SOCKBUF_UNLOCK(&so->so_rcv);
+
+	params.hdr.type = IF_SND_TAG_TYPE_TLS_RX;
+	params.hdr.flowid = inp->inp_flowid;
+	params.hdr.flowtype = inp->inp_flowtype;
+	params.hdr.numa_domain = inp->inp_numa_domain;
+	params.tls_rx.inp = inp;
+	params.tls_rx.tls = tls;
+	params.tls_rx.vlan_id = tls->rx_vlan_id;
+	INP_RUNLOCK(inp);
+
+	if (inp->inp_vflag & INP_IPV6) {
+		if ((ifp->if_capenable2 & IFCAP2_RXTLS6) == 0)
+			goto out;
+	} else {
+		if ((ifp->if_capenable2 & IFCAP2_RXTLS4) == 0)
+			goto out;
+	}
+
+	error = m_snd_tag_alloc(ifp, &params, &mst);
+	if (error == 0) {
+		SOCKBUF_LOCK(&so->so_rcv);
+		tls->snd_tag = mst;
+		SOCKBUF_UNLOCK(&so->so_rcv);
+
+		counter_u64_add(ktls_ifnet_reset, 1);
+	} else {
+		/*
+		 * Just fall back to software decryption if a tag
+		 * cannot be allocated leaving the connection intact.
+		 * If a future input path change switches to another
+		 * interface this connection will resume ifnet TLS.
+		 */
+		counter_u64_add(ktls_ifnet_reset_failed, 1);
+	}
+
+out:
+	mtx_pool_lock(mtxpool_sleep, tls);
+	tls->reset_pending = false;
+	mtx_pool_unlock(mtxpool_sleep, tls);
+
+	if (ifp != NULL)
+		if_rele(ifp);
+	sorele(so);
+	ktls_free(tls);
+}
+
 /*
  * Try to allocate a new TLS send tag.  This task is scheduled when
  * ip_output detects a route change while trying to transmit a packet
@@ -1565,6 +1769,37 @@ ktls_reset_send_tag(void *context, int pending)
 	ktls_free(tls);
 }
 
+void
+ktls_input_ifp_mismatch(struct sockbuf *sb, struct ifnet *ifp)
+{
+	struct ktls_session *tls;
+	struct socket *so;
+
+	SOCKBUF_LOCK_ASSERT(sb);
+	KASSERT(sb->sb_flags & SB_TLS_RX, ("%s: sockbuf %p isn't TLS RX",
+	    __func__, sb));
+	so = __containerof(sb, struct socket, so_rcv);
+
+	tls = sb->sb_tls_info;
+	if_rele(tls->rx_ifp);
+	if_ref(ifp);
+	tls->rx_ifp = ifp;
+
+	/*
+	 * See if we should schedule a task to update the receive tag for
+	 * this session.
+	 */
+	mtx_pool_lock(mtxpool_sleep, tls);
+	if (!tls->reset_pending) {
+		(void) ktls_hold(tls);
+		soref(so);
+		tls->so = so;
+		tls->reset_pending = true;
+		taskqueue_enqueue(taskqueue_thread, &tls->reset_tag_task);
+	}
+	mtx_pool_unlock(mtxpool_sleep, tls);
+}
+
 int
 ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls)
 {
@@ -1908,7 +2143,7 @@ ktls_detach_record(struct sockbuf *sb, int len)
 			return (NULL);
 		}
 	}
-	n->m_flags |= M_NOTREADY;
+	n->m_flags |= (m->m_flags & (M_NOTREADY | M_DECRYPTED));
 
 	/* Store remainder in 'n'. */
 	n->m_len = m->m_len - remain;
@@ -1993,6 +2228,86 @@ tls13_find_record_type(struct ktls_session *tls, struct mbuf *m, int tls_len,
 	return (0);
 }
 
+/*
+ * Check if a mbuf chain is fully decrypted at the given offset and
+ * length. Returns KTLS_MBUF_CRYPTO_ST_DECRYPTED if all data is
+ * decrypted. KTLS_MBUF_CRYPTO_ST_MIXED if there is a mix of encrypted
+ * and decrypted data. Else KTLS_MBUF_CRYPTO_ST_ENCRYPTED if all data
+ * is encrypted.
+ */
+ktls_mbuf_crypto_st_t
+ktls_mbuf_crypto_state(struct mbuf *mb, int offset, int len)
+{
+	int m_flags_ored = 0;
+	int m_flags_anded = -1;
+
+	for (; mb != NULL; mb = mb->m_next) {
+		if (offset < mb->m_len)
+			break;
+		offset -= mb->m_len;
+	}
+	offset += len;
+
+	for (; mb != NULL; mb = mb->m_next) {
+		m_flags_ored |= mb->m_flags;
+		m_flags_anded &= mb->m_flags;
+
+		if (offset <= mb->m_len)
+			break;
+		offset -= mb->m_len;
+	}
+	MPASS(mb != NULL || offset == 0);
+
+	if ((m_flags_ored ^ m_flags_anded) & M_DECRYPTED)
+		return (KTLS_MBUF_CRYPTO_ST_MIXED);
+	else
+		return ((m_flags_ored & M_DECRYPTED) ?
+		    KTLS_MBUF_CRYPTO_ST_DECRYPTED :
+		    KTLS_MBUF_CRYPTO_ST_ENCRYPTED);
+}
+
+/*
+ * ktls_resync_ifnet - get HW TLS RX back on track after packet loss
+ */
+static int
+ktls_resync_ifnet(struct socket *so, uint32_t tls_len, uint64_t tls_rcd_num)
+{
+	union if_snd_tag_modify_params params;
+	struct m_snd_tag *mst;
+	struct inpcb *inp;
+	struct tcpcb *tp;
+
+	mst = so->so_rcv.sb_tls_info->snd_tag;
+	if (__predict_false(mst == NULL))
+		return (EINVAL);
+
+	inp = sotoinpcb(so);
+	if (__predict_false(inp == NULL))
+		return (EINVAL);
+
+	INP_RLOCK(inp);
+	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+		INP_RUNLOCK(inp);
+		return (ECONNRESET);
+	}
+
+	tp = intotcpcb(inp);
+	MPASS(tp != NULL);
+
+	/* Get the TCP sequence number of the next valid TLS header. */
+	SOCKBUF_LOCK(&so->so_rcv);
+	params.tls_rx.tls_hdr_tcp_sn =
+	    tp->rcv_nxt - so->so_rcv.sb_tlscc - tls_len;
+	params.tls_rx.tls_rec_length = tls_len;
+	params.tls_rx.tls_seq_number = tls_rcd_num;
+	SOCKBUF_UNLOCK(&so->so_rcv);
+
+	INP_RUNLOCK(inp);
+
+	MPASS(mst->sw->type == IF_SND_TAG_TYPE_TLS_RX);
+	return (mst->sw->snd_tag_modify(mst, &params));
+}
+
 static void
 ktls_decrypt(struct socket *so)
 {
@@ -2002,6 +2317,7 @@ ktls_decrypt(struct socket *so)
 	struct tls_record_layer *hdr;
 	struct tls_get_record tgr;
 	struct mbuf *control, *data, *m;
+	ktls_mbuf_crypto_st_t state;
 	uint64_t seqno;
 	int error, remain, tls_len, trail_len;
 	bool tls13;
@@ -2074,13 +2390,46 @@ ktls_decrypt(struct socket *so)
 		SBCHECK(sb);
 		SOCKBUF_UNLOCK(sb);
 
-		error = ktls_ocf_decrypt(tls, hdr, data, seqno, &trail_len);
-		if (error == 0) {
-			if (tls13)
+		/* get crypto state for this TLS record */
+		state = ktls_mbuf_crypto_state(data, 0, tls_len);
+
+		switch (state) {
+		case KTLS_MBUF_CRYPTO_ST_MIXED:
+			error = ktls_ocf_recrypt(tls, hdr, data, seqno);
+			if (error)
+				break;
+			/* FALLTHROUGH */
+		case KTLS_MBUF_CRYPTO_ST_ENCRYPTED:
+			error = ktls_ocf_decrypt(tls, hdr, data, seqno,
+			    &trail_len);
+			if (__predict_true(error == 0)) {
+				if (tls13) {
+					error = tls13_find_record_type(tls, data,
+					    tls_len, &trail_len, &record_type);
+				} else {
+					record_type = hdr->tls_type;
+				}
+			}
+			break;
+		case KTLS_MBUF_CRYPTO_ST_DECRYPTED:
+			/*
+			 * NIC TLS is only supported for AEAD
+			 * ciphersuites which used a fixed sized
+			 * trailer.
+			 */
+			if (tls13) {
+				trail_len = tls->params.tls_tlen - 1;
 				error = tls13_find_record_type(tls, data,
 				    tls_len, &trail_len, &record_type);
-			else
+			} else {
+				trail_len = tls->params.tls_tlen;
+				error = 0;
 				record_type = hdr->tls_type;
+			}
+			break;
+		default:
+			error = EINVAL;
+			break;
 		}
 		if (error) {
 			counter_u64_add(ktls_offload_failed_crypto, 1);
@@ -2161,19 +2510,31 @@ ktls_decrypt(struct socket *so)
 			remain = be16toh(tgr.tls_length);
 			m = data;
 			for (m = data; remain > m->m_len; m = m->m_next) {
-				m->m_flags &= ~M_NOTREADY;
+				m->m_flags &= ~(M_NOTREADY | M_DECRYPTED);
 				remain -= m->m_len;
 			}
 			m->m_len = remain;
 			m_freem(m->m_next);
 			m->m_next = NULL;
-			m->m_flags &= ~M_NOTREADY;
+			m->m_flags &= ~(M_NOTREADY | M_DECRYPTED);
 
 			/* Set EOR on the final mbuf. */
 			m->m_flags |= M_EOR;
 		}
 
 		sbappendcontrol_locked(sb, data, control, 0);
+
+		if (__predict_false(state != KTLS_MBUF_CRYPTO_ST_DECRYPTED)) {
+			sb->sb_flags |= SB_TLS_RX_RESYNC;
+			SOCKBUF_UNLOCK(sb);
+			ktls_resync_ifnet(so, tls_len, seqno);
+			SOCKBUF_LOCK(sb);
+		} else if (__predict_false(sb->sb_flags & SB_TLS_RX_RESYNC)) {
+			sb->sb_flags &= ~SB_TLS_RX_RESYNC;
+			SOCKBUF_UNLOCK(sb);
+			ktls_resync_ifnet(so, 0, seqno);
+			SOCKBUF_LOCK(sb);
+		}
 	}
 
 	sb->sb_flags &= ~SB_TLS_RX_RUNNING;
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
index 07c21d3b7802..da02de3e3301 100644
--- a/sys/kern/uipc_sockbuf.c
+++ b/sys/kern/uipc_sockbuf.c
@@ -54,6 +54,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
+#include <netinet/in.h>
+
 /*
  * Function pointer set by the AIO routines so that the socket buffer code
  * can call back into the AIO module if it is loaded.
@@ -946,17 +948,45 @@ sbappend(struct sockbuf *sb, struct mbuf *m, int flags)
 static void
 sbappend_ktls_rx(struct sockbuf *sb, struct mbuf *m)
 {
+	struct ifnet *ifp;
 	struct mbuf *n;
+	int flags;
+
+	ifp = NULL;
+	flags = M_NOTREADY;
 
 	SBLASTMBUFCHK(sb);
 
+	/* Mbuf chain must start with a packet header. */
+	MPASS((m->m_flags & M_PKTHDR) != 0);
+
 	/* Remove all packet headers and mbuf tags to get a pure data chain. */
-	m_demote(m, 1, 0);
+	for (n = m; n != NULL; n = n->m_next) {
+		if (n->m_flags & M_PKTHDR) {
+			ifp = m->m_pkthdr.leaf_rcvif;
+			if ((n->m_pkthdr.csum_flags & CSUM_TLS_MASK) ==
+			    CSUM_TLS_DECRYPTED) {
+				/* Mark all mbufs in this packet decrypted. */
+				flags = M_NOTREADY | M_DECRYPTED;
+			} else {
+				flags = M_NOTREADY;
+			}
+			m_demote_pkthdr(n);
+		}
+
+		n->m_flags &= M_DEMOTEFLAGS;
+		n->m_flags |= flags;
+
+		MPASS((n->m_flags & M_NOTREADY) != 0);
+	}
 
-	for (n = m; n != NULL; n = n->m_next)
-		n->m_flags |= M_NOTREADY;
 	sbcompress_ktls_rx(sb, m, sb->sb_mtlstail);
 	ktls_check_rx(sb);
+
+	/* Check for incoming packet route changes: */
+	if (ifp != NULL && sb->sb_tls_info->rx_ifp != NULL &&
+	    sb->sb_tls_info->rx_ifp != ifp)
+		ktls_input_ifp_mismatch(sb, ifp);
 }
 #endif
 
@@ -1410,7 +1440,8 @@ sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
 		if (n &&
 		    M_WRITABLE(n) &&
 		    ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
-		    !(n->m_flags & (M_EXTPG)) &&
+		    !((m->m_flags ^ n->m_flags) & M_DECRYPTED) &&
+		    !(n->m_flags & M_EXTPG) &&
 		    m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
 		    m->m_len <= M_TRAILINGSPACE(n)) {
 			m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
diff --git a/sys/sys/ktls.h b/sys/sys/ktls.h
index 885e4dc38ef9..66b510ddfe40 100644
--- a/sys/sys/ktls.h
+++ b/sys/sys/ktls.h
@@ -187,7 +187,12 @@ struct ktls_session {
 
 	struct task reset_tag_task;
 	struct task disable_ifnet_task;
-	struct inpcb *inp;
+	union {
+		struct inpcb *inp;	/* Used by transmit tasks. */
+		struct socket *so;	/* Used by receive task. */
+	};
+	struct ifnet *rx_ifp;
+	u_short rx_vlan_id;
 	bool reset_pending;
 	bool disable_ifnet_pending;
 	bool sync_dispatch;
@@ -200,7 +205,14 @@ struct ktls_session {
 
 extern unsigned int ktls_ifnet_max_rexmit_pct;
 
+typedef enum {
+	KTLS_MBUF_CRYPTO_ST_MIXED = 0,
+	KTLS_MBUF_CRYPTO_ST_ENCRYPTED = 1,
+	KTLS_MBUF_CRYPTO_ST_DECRYPTED = -1,
+} ktls_mbuf_crypto_st_t;
+
 void ktls_check_rx(struct sockbuf *sb);
+ktls_mbuf_crypto_st_t ktls_mbuf_crypto_state(struct mbuf *mb, int offset, int len);
 void ktls_disable_ifnet(void *arg);
 int ktls_enable_rx(struct socket *so, struct tls_enable *en);
 int ktls_enable_tx(struct socket *so, struct tls_enable *en);
@@ -215,6 +227,7 @@ int ktls_get_rx_mode(struct socket *so, int *modep);
 int ktls_set_tx_mode(struct socket *so, int mode);
 int ktls_get_tx_mode(struct socket *so, int *modep);
 int ktls_get_rx_sequence(struct inpcb *inp, uint32_t *tcpseq, uint64_t *tlsseq);
+void ktls_input_ifp_mismatch(struct sockbuf *sb, struct ifnet *ifp);
 int ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls);
 #ifdef RATELIMIT
 int ktls_modify_txrtlmt(struct ktls_session *tls, uint64_t max_pacing_rate);
diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h
index a108e19b3ff3..31c351860a94 100644
--- a/sys/sys/sockbuf.h
+++ b/sys/sys/sockbuf.h
@@ -53,6 +53,7 @@
 #define	SB_STOP		0x1000		/* backpressure indicator */
 #define	SB_AIO_RUNNING	0x2000		/* AIO operation running */
 #define	SB_TLS_IFNET	0x4000		/* has used / is using ifnet KTLS */
+#define	SB_TLS_RX_RESYNC 0x8000		/* KTLS RX lost HW sync */
 
 #define	SBS_CANTSENDMORE	0x0010	/* can't send more data to peer */
 #define	SBS_CANTRCVMORE		0x0020	/* can't receive more data from peer */