git: b5d43972e394 - main - vfs: decouple freevnodes from vnode batching

From: Mateusz Guzik <mjg_at_FreeBSD.org>
Date: Wed, 22 Mar 2023 23:57:28 UTC
The branch main has been updated by mjg:

URL: https://cgit.FreeBSD.org/src/commit/?id=b5d43972e3941d6c03d935dc423385f3c2178d68

commit b5d43972e3941d6c03d935dc423385f3c2178d68
Author:     Mateusz Guzik <mjg@FreeBSD.org>
AuthorDate: 2023-03-21 07:27:25 +0000
Commit:     Mateusz Guzik <mjg@FreeBSD.org>
CommitDate: 2023-03-22 23:57:25 +0000

    vfs: decouple freevnodes from vnode batching
    
    In principle one cpu can keep vholding vnodes, while another vdrops
    them. In this case it may be the local count will keep growing in an
    unbounded manner. Roll it up after a threshold instead.
    
    While here move it out of dpcpu into struct pcpu.
    
    Reviewed by:    kib (previous version)
    Differential Revision:  https://reviews.freebsd.org/D39195
---
 sys/kern/vfs_subr.c | 55 ++++++++++++++++++++++++++++++++---------------------
 sys/sys/pcpu.h      |  3 ++-
 2 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index c117ada017c9..d89e0ed13dd5 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -287,7 +287,6 @@ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0,
 #define	VDBATCH_SIZE 8
 struct vdbatch {
 	u_int index;
-	long freevnodes;
 	struct mtx lock;
 	struct vnode *tab[VDBATCH_SIZE];
 };
@@ -1418,48 +1417,62 @@ static int vnlruproc_sig;
  * at any given moment can still exceed slop, but it should not be by significant
  * margin in practice.
  */
-#define VNLRU_FREEVNODES_SLOP 128
+#define VNLRU_FREEVNODES_SLOP 126
+
+static void __noinline
+vfs_freevnodes_rollup(int8_t *lfreevnodes)
+{
+
+	atomic_add_long(&freevnodes, *lfreevnodes);
+	*lfreevnodes = 0;
+	critical_exit();
+}
 
 static __inline void
 vfs_freevnodes_inc(void)
 {
-	struct vdbatch *vd;
+	int8_t *lfreevnodes;
 
 	critical_enter();
-	vd = DPCPU_PTR(vd);
-	vd->freevnodes++;
-	critical_exit();
+	lfreevnodes = PCPU_PTR(vfs_freevnodes);
+	(*lfreevnodes)++;
+	if (__predict_false(*lfreevnodes == VNLRU_FREEVNODES_SLOP))
+		vfs_freevnodes_rollup(lfreevnodes);
+	else
+		critical_exit();
 }
 
 static __inline void
 vfs_freevnodes_dec(void)
 {
-	struct vdbatch *vd;
+	int8_t *lfreevnodes;
 
 	critical_enter();
-	vd = DPCPU_PTR(vd);
-	vd->freevnodes--;
-	critical_exit();
+	lfreevnodes = PCPU_PTR(vfs_freevnodes);
+	(*lfreevnodes)--;
+	if (__predict_false(*lfreevnodes == -VNLRU_FREEVNODES_SLOP))
+		vfs_freevnodes_rollup(lfreevnodes);
+	else
+		critical_exit();
 }
 
 static u_long
 vnlru_read_freevnodes(void)
 {
-	struct vdbatch *vd;
-	long slop;
+	long slop, rfreevnodes;
 	int cpu;
 
-	mtx_assert(&vnode_list_mtx, MA_OWNED);
-	if (freevnodes > freevnodes_old)
-		slop = freevnodes - freevnodes_old;
+	rfreevnodes = atomic_load_long(&freevnodes);
+
+	if (rfreevnodes > freevnodes_old)
+		slop = rfreevnodes - freevnodes_old;
 	else
-		slop = freevnodes_old - freevnodes;
+		slop = freevnodes_old - rfreevnodes;
 	if (slop < VNLRU_FREEVNODES_SLOP)
-		return (freevnodes >= 0 ? freevnodes : 0);
-	freevnodes_old = freevnodes;
+		return (rfreevnodes >= 0 ? rfreevnodes : 0);
+	freevnodes_old = rfreevnodes;
 	CPU_FOREACH(cpu) {
-		vd = DPCPU_ID_PTR((cpu), vd);
-		freevnodes_old += vd->freevnodes;
+		freevnodes_old += cpuid_to_pcpu[cpu]->pc_vfs_freevnodes;
 	}
 	return (freevnodes_old >= 0 ? freevnodes_old : 0);
 }
@@ -3513,7 +3526,6 @@ vdbatch_process(struct vdbatch *vd)
 
 	mtx_lock(&vnode_list_mtx);
 	critical_enter();
-	freevnodes += vd->freevnodes;
 	for (i = 0; i < VDBATCH_SIZE; i++) {
 		vp = vd->tab[i];
 		TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
@@ -3522,7 +3534,6 @@ vdbatch_process(struct vdbatch *vd)
 		vp->v_dbatchcpu = NOCPU;
 	}
 	mtx_unlock(&vnode_list_mtx);
-	vd->freevnodes = 0;
 	bzero(vd->tab, sizeof(vd->tab));
 	vd->index = 0;
 	critical_exit();
diff --git a/sys/sys/pcpu.h b/sys/sys/pcpu.h
index 14bef5cf690e..9ac4144d3267 100644
--- a/sys/sys/pcpu.h
+++ b/sys/sys/pcpu.h
@@ -189,7 +189,8 @@ struct pcpu {
 	long		pc_cp_time[CPUSTATES];	/* statclock ticks */
 	struct _device	*pc_device;		/* CPU device handle */
 	void		*pc_netisr;		/* netisr SWI cookie */
-	int		pc_unused1;		/* unused field */
+	int8_t		pc_vfs_freevnodes;	/* freevnodes counter */
+	char		pc_unused1[3];		/* unused pad */
 	int		pc_domain;		/* Memory domain. */
 	struct rm_queue	pc_rm_queue;		/* rmlock list of trackers */
 	uintptr_t	pc_dynamic;		/* Dynamic per-cpu data area */