svn commit: r356673 - in head/sys: kern sys

Mon Jan 13 02:40:26 UTC 2020

On 1/13/20, Mateusz Guzik <mjg at freebsd.org> wrote:
> Author: mjg
> Date: Mon Jan 13 02:39:41 2020
> New Revision: 356673
> URL: https://svnweb.freebsd.org/changeset/base/356673
>
> Log:
>   vfs: per-cpu batched requeuing of free vnodes
>
>   Constant requeuing adds significant lock contention in certain
>   workloads. Lessen the problem by batching it.
>
>   Per-cpu areas are locked in order to synchronize against UMA freeing
>   memory.
>
>   vnode's v_mflag is converted to short to prevent the struct from
>   growing.
>
>   Sample result from an incremental make -s -j 104 bzImage on tmpfs:
>   stock:   122.38s user 1780.45s system 6242% cpu 30.480 total
>   patched: 144.84s user 985.90s system 4856% cpu 23.282 total
>
>   Reviewed by:	jeff

That's: jeff (previous version)
>   Tested by:	pho (in a larger patch, previous version)
>   Differential Revision:	https://reviews.freebsd.org/D22998
>
> Modified:
>   head/sys/kern/vfs_subr.c
>   head/sys/sys/vnode.h
>
> Modified: head/sys/kern/vfs_subr.c
> ==============================================================================
> --- head/sys/kern/vfs_subr.c	Mon Jan 13 02:37:25 2020	(r356672)
> +++ head/sys/kern/vfs_subr.c	Mon Jan 13 02:39:41 2020	(r356673)
> @@ -295,6 +295,16 @@ static int stat_rush_requests;	/* number of times I/O
>  SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW,
> &stat_rush_requests, 0,
>      "Number of times I/O speeded up (rush requests)");
>
> +#define	VDBATCH_SIZE 8
> +struct vdbatch {
> +	u_int index;
> +	struct mtx lock;
> +	struct vnode *tab[VDBATCH_SIZE];
> +};
> +DPCPU_DEFINE_STATIC(struct vdbatch, vd);
> +
> +static void	vdbatch_dequeue(struct vnode *vp);
> +
>  /*
>   * When shutting down the syncer, run it at four times normal speed.
>   */
> @@ -552,6 +562,8 @@ vnode_init(void *mem, int size, int flags)
>  	 */
>  	rangelock_init(&vp->v_rl);
>
> +	vp->v_dbatchcpu = NOCPU;
> +
>  	mtx_lock(&vnode_list_mtx);
>  	TAILQ_INSERT_BEFORE(vnode_list_free_marker, vp, v_vnodelist);
>  	mtx_unlock(&vnode_list_mtx);
> @@ -568,6 +580,7 @@ vnode_fini(void *mem, int size)
>  	struct bufobj *bo;
>
>  	vp = mem;
> +	vdbatch_dequeue(vp);
>  	mtx_lock(&vnode_list_mtx);
>  	TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
>  	mtx_unlock(&vnode_list_mtx);
> @@ -602,8 +615,9 @@ vnode_fini(void *mem, int size)
>  static void
>  vntblinit(void *dummy __unused)
>  {
> +	struct vdbatch *vd;
> +	int cpu, physvnodes, virtvnodes;
>  	u_int i;
> -	int physvnodes, virtvnodes;
>
>  	/*
>  	 * Desiredvnodes is a function of the physical memory size and the
> @@ -669,6 +683,12 @@ vntblinit(void *dummy __unused)
>  	for (i = 1; i <= sizeof(struct vnode); i <<= 1)
>  		vnsz2log++;
>  	vnsz2log--;
> +
> +	CPU_FOREACH(cpu) {
> +		vd = DPCPU_ID_PTR((cpu), vd);
> +		bzero(vd, sizeof(*vd));
> +		mtx_init(&vd->lock, "vdbatch", NULL, MTX_DEF);
> +	}
>  }
>  SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL);
>
> @@ -3199,7 +3219,99 @@ vholdnz(struct vnode *vp)
>  #endif
>  }
>
> +static void __noinline
> +vdbatch_process(struct vdbatch *vd)
> +{
> +	struct vnode *vp;
> +	int i;
> +
> +	mtx_assert(&vd->lock, MA_OWNED);
> +	MPASS(vd->index == VDBATCH_SIZE);
> +
> +	mtx_lock(&vnode_list_mtx);
> +	for (i = 0; i < VDBATCH_SIZE; i++) {
> +		vp = vd->tab[i];
> +		TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
> +		TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist);
> +		MPASS(vp->v_dbatchcpu != NOCPU);
> +		vp->v_dbatchcpu = NOCPU;
> +	}
> +	bzero(vd->tab, sizeof(vd->tab));
> +	vd->index = 0;
> +	mtx_unlock(&vnode_list_mtx);
> +}
> +
> +static void
> +vdbatch_enqueue(struct vnode *vp)
> +{
> +	struct vdbatch *vd;
> +
> +	ASSERT_VI_LOCKED(vp, __func__);
> +	VNASSERT(!VN_IS_DOOMED(vp), vp,
> +	    ("%s: deferring requeue of a doomed vnode", __func__));
> +
> +	if (vp->v_dbatchcpu != NOCPU) {
> +		VI_UNLOCK(vp);
> +		return;
> +	}
> +
> +	/*
> +	 * A hack: pin us to the current CPU so that we know what to put in
> +	 * ->v_dbatchcpu.
> +	 */
> +	sched_pin();
> +	vd = DPCPU_PTR(vd);
> +	mtx_lock(&vd->lock);
> +	MPASS(vd->index < VDBATCH_SIZE);
> +	MPASS(vd->tab[vd->index] == NULL);
> +	vp->v_dbatchcpu = curcpu;
> +	vd->tab[vd->index] = vp;
> +	vd->index++;
> +	VI_UNLOCK(vp);
> +	if (vd->index == VDBATCH_SIZE)
> +		vdbatch_process(vd);
> +	mtx_unlock(&vd->lock);
> +	sched_unpin();
> +}
> +
>  /*
> + * This routine must only be called for vnodes which are about to be
> + * deallocated. Supporting dequeue for arbitrary vndoes would require
> + * validating that the locked batch matches.
> + */
> +static void
> +vdbatch_dequeue(struct vnode *vp)
> +{
> +	struct vdbatch *vd;
> +	int i;
> +	short cpu;
> +
> +	VNASSERT(vp->v_type == VBAD || vp->v_type == VNON, vp,
> +	    ("%s: called for a used vnode\n", __func__));
> +
> +	cpu = vp->v_dbatchcpu;
> +	if (cpu == NOCPU)
> +		return;
> +
> +	vd = DPCPU_ID_PTR(cpu, vd);
> +	mtx_lock(&vd->lock);
> +	for (i = 0; i < vd->index; i++) {
> +		if (vd->tab[i] != vp)
> +			continue;
> +		vp->v_dbatchcpu = NOCPU;
> +		vd->index--;
> +		vd->tab[i] = vd->tab[vd->index];
> +		vd->tab[vd->index] = NULL;
> +		break;
> +	}
> +	mtx_unlock(&vd->lock);
> +	/*
> +	 * Either we dequeued the vnode above or the target CPU beat us to it.
> +	 */
> +	MPASS(vp->v_dbatchcpu == NOCPU);
> +}
> +
> +/*
>   * Drop the hold count of the vnode.  If this is the last reference to
>   * the vnode we place it on the free list unless it has been vgone'd
>   * (marked VIRF_DOOMED) in which case we will free it.
> @@ -3236,12 +3348,8 @@ vdrop_deactivate(struct vnode *vp)
>  		mp->mnt_lazyvnodelistsize--;
>  		mtx_unlock(&mp->mnt_listmtx);
>  	}
> -	mtx_lock(&vnode_list_mtx);
> -	TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
> -	TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist);
> -	mtx_unlock(&vnode_list_mtx);
>  	atomic_add_long(&freevnodes, 1);
> -	VI_UNLOCK(vp);
> +	vdbatch_enqueue(vp);
>  }
>
>  void
>
> Modified: head/sys/sys/vnode.h
> ==============================================================================
> --- head/sys/sys/vnode.h	Mon Jan 13 02:37:25 2020	(r356672)
> +++ head/sys/sys/vnode.h	Mon Jan 13 02:39:41 2020	(r356673)
> @@ -171,7 +171,8 @@ struct vnode {
>  	u_int	v_usecount;			/* I ref count of users */
>  	u_int	v_iflag;			/* i vnode flags (see below) */
>  	u_int	v_vflag;			/* v vnode flags */
> -	u_int	v_mflag;			/* l mnt-specific vnode flags */
> +	u_short	v_mflag;			/* l mnt-specific vnode flags */
> +	short	v_dbatchcpu;			/* i LRU requeue deferral batch */
>  	int	v_writecount;			/* I ref count of writers or
>  						   (negative) text users */
>  	u_int	v_hash;
> _______________________________________________
> svn-src-all at freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-all
> To unsubscribe, send any mail to "svn-src-all-unsubscribe at freebsd.org"
>


-- 
Mateusz Guzik <mjguzik gmail.com>