git: 5fd1a67e885e - main - inpcb: Release the inpcb cred reference before freeing the structure

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Thu, 20 Apr 2023 16:13:54 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=5fd1a67e885e834cda8f1d122e9b0f9d47977e54

commit 5fd1a67e885e834cda8f1d122e9b0f9d47977e54
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2023-04-20 15:48:33 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2023-04-20 16:13:06 +0000

    inpcb: Release the inpcb cred reference before freeing the structure
    
    Now that the inp_cred pointer is accessed only while the inpcb lock is
    held, we can avoid deferring a crfree() call when freeing an inpcb.
    
    This fixes a problem introduced when inpcb hash tables started being
    synchronized with SMR: the credential reference previously could not be
    released until all lockless readers have drained, and there is no
    mechanism to explicitly purge cached, freed UMA items.  Thus, ucred
    references could linger indefinitely, and since ucreds hold a jail
    reference, the jail would linger indefinitely as well.  This manifests
    as jails getting stuck in the DYING state.
    
    Discussed with: glebius
    Tested by:      glebius
    Sponsored by:   Klara, Inc.
    Sponsored by:   Modirum MDPay
    Differential Revision:  https://reviews.freebsd.org/D38573
---
 sys/netinet/in_pcb.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index ea36d684a82b..4c1ba835a066 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -548,7 +548,6 @@ in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
 /*
  * Initialize a pcbstorage - per protocol zones to allocate inpcbs.
  */
-static void inpcb_dtor(void *, int, void *);
 static void inpcb_fini(void *, int);
 void
 in_pcbstorage_init(void *arg)
@@ -556,7 +555,7 @@ in_pcbstorage_init(void *arg)
 	struct inpcbstorage *pcbstor = arg;
 
 	pcbstor->ips_zone = uma_zcreate(pcbstor->ips_zone_name,
-	    pcbstor->ips_size, NULL, inpcb_dtor, pcbstor->ips_pcbinit,
+	    pcbstor->ips_size, NULL, NULL, pcbstor->ips_pcbinit,
 	    inpcb_fini, UMA_ALIGN_CACHE, UMA_ZONE_SMR);
 	pcbstor->ips_portzone = uma_zcreate(pcbstor->ips_portzone_name,
 	    sizeof(struct inpcbport), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
@@ -1694,6 +1693,10 @@ in_pcbrele_rlocked(struct inpcb *inp)
 	MPASS(inp->inp_flags & INP_FREED);
 	MPASS(inp->inp_socket == NULL);
 	MPASS(inp->inp_in_hpts == 0);
+	crfree(inp->inp_cred);
+#ifdef INVARIANTS
+	inp->inp_cred = NULL;
+#endif
 	INP_RUNLOCK(inp);
 	uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp);
 	return (true);
@@ -1711,6 +1714,10 @@ in_pcbrele_wlocked(struct inpcb *inp)
 	MPASS(inp->inp_flags & INP_FREED);
 	MPASS(inp->inp_socket == NULL);
 	MPASS(inp->inp_in_hpts == 0);
+	crfree(inp->inp_cred);
+#ifdef INVARIANTS
+	inp->inp_cred = NULL;
+#endif
 	INP_WUNLOCK(inp);
 	uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp);
 	return (true);
@@ -1788,18 +1795,6 @@ in_pcbfree(struct inpcb *inp)
 #endif
 #ifdef INET
 	inp_freemoptions(imo);
-#endif
-	/* Destruction is finalized in inpcb_dtor(). */
-}
-
-static void
-inpcb_dtor(void *mem, int size, void *arg)
-{
-	struct inpcb *inp = mem;
-
-	crfree(inp->inp_cred);
-#ifdef INVARIANTS
-	inp->inp_cred = NULL;
 #endif
 }