git: bd3813a8e1cf - stable/13 - cred: crsetgroups(): Improve and factor out groups normalization

From: Olivier Certner <olce_at_FreeBSD.org>
Date: Fri, 15 Nov 2024 13:00:50 UTC
The branch stable/13 has been updated by olce:

URL: https://cgit.FreeBSD.org/src/commit/?id=bd3813a8e1cf3e1766283f4f9e7fe085222cd32a

commit bd3813a8e1cf3e1766283f4f9e7fe085222cd32a
Author:     Olivier Certner <olce@FreeBSD.org>
AuthorDate: 2024-10-02 12:40:27 +0000
Commit:     Olivier Certner <olce@FreeBSD.org>
CommitDate: 2024-11-15 12:59:08 +0000

    cred: crsetgroups(): Improve and factor out groups normalization
    
    The groups array has been sorted (not including the first element, which
    is always the effective GID) to enable performing a binary search for
    determining if some group is part of the supplementary groups set.
    
    Factor out this sorting operation into an internal normalization
    function (groups_normalize()), adding to it the removal of duplicates
    after the sort.
    
    Separating groups normalization code allows to perform it in advance,
    and in particular before calling MAC hooks which need the groups array
    to be sorted to perform better.  This also enables sorting input arrays
    ahead of acquiring the process lock (which is not necessary for this
    operation).
    
    kern_setgroups() has been changed accordingly, so MAC modules
    implementing the mac_cred_check_setgroups() hook now can assume
    a normalized groups array (and also that it has at least one element, as
    if kern_setgroups() is passed no groups, the hook is called with an
    array of one element being the current effective GID, as this is
    effectively the effect of such a call to kern_setgroups()).  Further
    commits introducing the setcred() system call and associated MAC hooks
    will also guarantee a normalized groups array to MAC modules
    implementing these hooks.
    
    Rename crsetgroups_locked() into crsetgroups_internal(), as it is no
    more "locked" than crsetgroups() itself.  However, it can be called
    under any lock (as needed), whereas the second may sleep to allocate
    memory.  Update their herald comments to make that explicit.
    
    In passing, using qsort() instead of the old open-coded insertion sort
    (in crsetgroups_locked()) fixes the performance concern about the latter
    when using a large number of groups.  Also, our qsort() falls back to
    insertion sort for small arrays and in case the array is likely to be
    mostly sorted, so this shouldn't cause concerns for the small number of
    groups common case.
    
    While here, add assertions in inner modification routines to check that
    the passed credentials object has a reference count of exactly 1 (in
    particular, it must not be shared).  Remove a redundant one from some
    outer routine.
    
    Reviewed by:    mhorne
    Approved by:    markj (mentor)
    MFC after:      3 days
    Differential Revision:  https://reviews.freebsd.org/D46914
    
    (cherry picked from commit 6d2efbb34fdb59facbe6d83374ef4ab69d395866)
    
    Approved by:    markj (mentor)
---
 sys/kern/kern_prot.c  | 168 ++++++++++++++++++++++++++++++++++++++------------
 sys/sys/syscallsubr.h |   2 +-
 sys/sys/ucred.h       |   2 +-
 3 files changed, 131 insertions(+), 41 deletions(-)

diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c
index 901753f1e5b7..6cdfea2774c0 100644
--- a/sys/kern/kern_prot.c
+++ b/sys/kern/kern_prot.c
@@ -89,8 +89,22 @@ SYSCTL_NODE(_security, OID_AUTO, bsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "BSD security policy");
 
 static void crfree_final(struct ucred *cr);
-static void crsetgroups_locked(struct ucred *cr, int ngrp,
-    gid_t *groups);
+
+static inline void
+groups_check_positive_len(int ngrp)
+{
+	MPASS2(ngrp >= 0, "negative number of groups");
+	MPASS2(ngrp != 0, "at least one group expected (effective GID)");
+}
+static inline void
+groups_check_max_len(int ngrp)
+{
+	MPASS2(ngrp <= ngroups_max + 1, "too many groups");
+}
+
+static void groups_normalize(int *ngrp, gid_t *groups);
+static void crsetgroups_internal(struct ucred *cr, int ngrp,
+    const gid_t *groups);
 
 #ifndef _SYS_SYSPROTO_H_
 struct getpid_args {
@@ -831,9 +845,9 @@ sys_setgroups(struct thread *td, struct setgroups_args *uap)
 
 	error = copyin(uap->gidset, groups, gidsetsize * sizeof(gid_t));
 	if (error == 0)
-		error = kern_setgroups(td, gidsetsize, groups);
+		error = kern_setgroups(td, &gidsetsize, groups);
 
-	if (gidsetsize > CRED_SMALLGROUPS_NB)
+	if (groups != smallgroups)
 		free(groups, M_TEMP);
 	return (error);
 }
@@ -847,25 +861,38 @@ gidp_cmp(const void *p1, const void *p2)
 	return ((g1 > g2) - (g1 < g2));
 }
 
+/*
+ * CAUTION: This function normalizes 'groups', possibly also changing the value
+ * of '*ngrpp' as a consequence.
+ */
 int
-kern_setgroups(struct thread *td, int ngrp, gid_t *groups)
+kern_setgroups(struct thread *td, int *ngrpp, gid_t *groups)
 {
 	struct proc *p = td->td_proc;
 	struct ucred *newcred, *oldcred;
-	int error;
+	int ngrp, error;
 
+	ngrp = *ngrpp;
 	/* Sanity check size. */
 	if (ngrp < 0 || ngrp > ngroups_max + 1)
 		return (EINVAL);
 
 	AUDIT_ARG_GROUPSET(groups, ngrp);
+	if (ngrp != 0) {
+		/* We allow and treat 0 specially below. */
+		groups_normalize(ngrpp, groups);
+		ngrp = *ngrpp;
+	}
 	newcred = crget();
 	crextend(newcred, ngrp);
 	PROC_LOCK(p);
 	oldcred = crcopysafe(p, newcred);
 
 #ifdef MAC
-	error = mac_cred_check_setgroups(oldcred, ngrp, groups);
+	error = ngrp == 0 ?
+	    /* If 'ngrp' is 0, we'll keep just the current effective GID. */
+	    mac_cred_check_setgroups(oldcred, 1, oldcred->cr_groups) :
+	    mac_cred_check_setgroups(oldcred, ngrp, groups);
 	if (error)
 		goto fail;
 #endif
@@ -882,9 +909,9 @@ kern_setgroups(struct thread *td, int ngrp, gid_t *groups)
 		 * when running non-BSD software if we do not do the same.
 		 */
 		newcred->cr_ngroups = 1;
-	} else {
-		crsetgroups_locked(newcred, ngrp, groups);
-	}
+	} else
+		crsetgroups_internal(newcred, ngrp, groups);
+
 	setsugid(p);
 	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
@@ -1294,6 +1321,32 @@ sys___setugid(struct thread *td, struct __setugid_args *uap)
 #endif /* REGRESSION */
 }
 
+#ifdef INVARIANTS
+static void
+groups_check_normalized(int ngrp, const gid_t *groups)
+{
+	gid_t prev_g;
+
+	groups_check_positive_len(ngrp);
+	groups_check_max_len(ngrp);
+
+	if (ngrp == 1)
+		return;
+
+	prev_g = groups[1];
+	for (int i = 2; i < ngrp; ++i) {
+		const gid_t g = groups[i];
+
+		if (prev_g >= g)
+			panic("%s: groups[%d] (%u) >= groups[%d] (%u)",
+			    __func__, i - 1, prev_g, i, g);
+		prev_g = g;
+	}
+}
+#else
+#define groups_check_normalized(...)
+#endif
+
 /*
  * Returns whether gid designates a supplementary group in cred.
  */
@@ -2156,7 +2209,6 @@ void
 crcopy(struct ucred *dest, struct ucred *src)
 {
 
-	KASSERT(dest->cr_ref == 1, ("crcopy of shared ucred"));
 	bcopy(&src->cr_startcopy, &dest->cr_startcopy,
 	    (unsigned)((caddr_t)&src->cr_endcopy -
 		(caddr_t)&src->cr_startcopy));
@@ -2301,6 +2353,8 @@ crextend(struct ucred *cr, int n)
 {
 	int cnt;
 
+	MPASS2(cr->cr_ref == 1, "'cr_ref' must be 1 (referenced, unshared)");
+
 	/* Truncate? */
 	if (n <= cr->cr_agroups)
 		return;
@@ -2334,52 +2388,88 @@ crextend(struct ucred *cr, int n)
 }
 
 /*
- * Copy groups in to a credential, preserving any necessary invariants.
- * Currently this includes the sorting of all supplementary gids.
- * crextend() must have been called before hand to ensure sufficient
- * space is available.
+ * Normalizes a set of groups to be applied to a 'struct ucred'.
+ *
+ * The set of groups is an array that must comprise the effective GID as its
+ * first element (so its length cannot be 0).
+ *
+ * Normalization ensures that elements after the first, which stand for the
+ * supplementary groups, are sorted in ascending order and do not contain
+ * duplicates.
  */
 static void
-crsetgroups_locked(struct ucred *cr, int ngrp, gid_t *groups)
+groups_normalize(int *ngrp, gid_t *groups)
 {
-	int i;
-	int j;
-	gid_t g;
+	gid_t prev_g;
+	int ins_idx;
 
-	KASSERT(cr->cr_agroups >= ngrp, ("cr_ngroups is too small"));
+	groups_check_positive_len(*ngrp);
+	groups_check_max_len(*ngrp);
 
-	bcopy(groups, cr->cr_groups, ngrp * sizeof(gid_t));
-	cr->cr_ngroups = ngrp;
+	if (*ngrp == 1)
+		return;
 
-	/*
-	 * Sort all groups except cr_groups[0] to allow groupmember to
-	 * perform a binary search.
-	 *
-	 * XXX: If large numbers of groups become common this should
-	 * be replaced with shell sort like linux uses or possibly
-	 * heap sort.
-	 */
-	for (i = 2; i < ngrp; i++) {
-		g = cr->cr_groups[i];
-		for (j = i-1; j >= 1 && g < cr->cr_groups[j]; j--)
-			cr->cr_groups[j + 1] = cr->cr_groups[j];
-		cr->cr_groups[j + 1] = g;
+	qsort(groups + 1, *ngrp - 1, sizeof(*groups), gidp_cmp);
+
+	/* Remove duplicates. */
+	prev_g = groups[1];
+	ins_idx = 2;
+	for (int i = 2; i < *ngrp; ++i) {
+		const gid_t g = groups[i];
+
+		if (g != prev_g) {
+			if (i != ins_idx)
+				groups[ins_idx] = g;
+			++ins_idx;
+			prev_g = g;
+		}
 	}
+	*ngrp = ins_idx;
+
+	groups_check_normalized(*ngrp, groups);
+}
+
+/*
+ * Internal function copying groups into a credential.
+ *
+ * 'ngrp' must be strictly positive.  Either the passed 'groups' array must have
+ * been normalized in advance (see groups_normalize()), else it must be so
+ * before the structure is to be used again.
+ *
+ * This function is suitable to be used under any lock (it doesn't take any lock
+ * itself nor sleep, and in particular doesn't allocate memory).  crextend()
+ * must have been called beforehand to ensure sufficient space is available.
+ * See also crsetgroups(), which handles that.
+ */
+static void
+crsetgroups_internal(struct ucred *cr, int ngrp, const gid_t *groups)
+{
+
+	MPASS2(cr->cr_ref == 1, "'cr_ref' must be 1 (referenced, unshared)");
+	MPASS2(cr->cr_agroups >= ngrp, "'cr_agroups' too small");
+	groups_check_positive_len(ngrp);
+
+	bcopy(groups, cr->cr_groups, ngrp * sizeof(gid_t));
+	cr->cr_ngroups = ngrp;
 }
 
 /*
  * Copy groups in to a credential after expanding it if required.
- * Truncate the list to (ngroups_max + 1) if it is too large.
+ *
+ * May sleep in order to allocate memory (except if, e.g., crextend() was called
+ * before with 'ngrp' or greater).  Truncates the list to (ngroups_max + 1) if
+ * it is too large.  Array 'groups' doesn't need to be sorted.  'ngrp' must be
+ * strictly positive.
  */
 void
-crsetgroups(struct ucred *cr, int ngrp, gid_t *groups)
+crsetgroups(struct ucred *cr, int ngrp, const gid_t *groups)
 {
 
 	if (ngrp > ngroups_max + 1)
 		ngrp = ngroups_max + 1;
-
 	crextend(cr, ngrp);
-	crsetgroups_locked(cr, ngrp, groups);
+	crsetgroups_internal(cr, ngrp, groups);
+	groups_normalize(&cr->cr_ngroups, cr->cr_groups);
 }
 
 /*
diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h
index 9edd62729c38..78de8876e51b 100644
--- a/sys/sys/syscallsubr.h
+++ b/sys/sys/syscallsubr.h
@@ -287,7 +287,7 @@ int	kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
 	    fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits);
 int	kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
 	    struct mbuf *control, enum uio_seg segflg);
-int	kern_setgroups(struct thread *td, int ngrp, gid_t *groups);
+int	kern_setgroups(struct thread *td, int *ngrpp, gid_t *groups);
 int	kern_setitimer(struct thread *, u_int, struct itimerval *,
 	    struct itimerval *);
 int	kern_setpriority(struct thread *td, int which, int who, int prio);
diff --git a/sys/sys/ucred.h b/sys/sys/ucred.h
index 243a2431bd0b..8152f962f3cb 100644
--- a/sys/sys/ucred.h
+++ b/sys/sys/ucred.h
@@ -165,7 +165,7 @@ struct ucred	*crcowget(struct ucred *cr);
 void	crcowfree(struct thread *td);
 void	cru2x(struct ucred *cr, struct xucred *xcr);
 void	cru2xt(struct thread *td, struct xucred *xcr);
-void	crsetgroups(struct ucred *cr, int n, gid_t *groups);
+void	crsetgroups(struct ucred *cr, int ngrp, const gid_t *groups);
 
 /*
  * Returns whether gid designates a primary group in cred.