svn commit: r356673 - in head/sys: kern sys
Mateusz Guzik
mjguzik at gmail.com
Mon Jan 13 02:40:26 UTC 2020
On 1/13/20, Mateusz Guzik <mjg at freebsd.org> wrote:
> Author: mjg
> Date: Mon Jan 13 02:39:41 2020
> New Revision: 356673
> URL: https://svnweb.freebsd.org/changeset/base/356673
>
> Log:
> vfs: per-cpu batched requeuing of free vnodes
>
> Constant requeuing adds significant lock contention in certain
> workloads. Lessen the problem by batching it.
>
> Per-cpu areas are locked in order to synchronize against UMA freeing
> memory.
>
> vnode's v_mflag is converted to short to prevent the struct from
> growing.
>
> Sample result from an incremental make -s -j 104 bzImage on tmpfs:
> stock: 122.38s user 1780.45s system 6242% cpu 30.480 total
> patched: 144.84s user 985.90s system 4856% cpu 23.282 total
>
> Reviewed by: jeff
That's: jeff (previous version)
> Tested by: pho (in a larger patch, previous version)
> Differential Revision: https://reviews.freebsd.org/D22998
>
> Modified:
> head/sys/kern/vfs_subr.c
> head/sys/sys/vnode.h
>
> Modified: head/sys/kern/vfs_subr.c
> ==============================================================================
> --- head/sys/kern/vfs_subr.c Mon Jan 13 02:37:25 2020 (r356672)
> +++ head/sys/kern/vfs_subr.c Mon Jan 13 02:39:41 2020 (r356673)
> @@ -295,6 +295,16 @@ static int stat_rush_requests; /* number of times I/O
> SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW,
> &stat_rush_requests, 0,
> "Number of times I/O speeded up (rush requests)");
>
> +#define VDBATCH_SIZE 8
> +struct vdbatch {
> + u_int index;
> + struct mtx lock;
> + struct vnode *tab[VDBATCH_SIZE];
> +};
> +DPCPU_DEFINE_STATIC(struct vdbatch, vd);
> +
> +static void vdbatch_dequeue(struct vnode *vp);
> +
> /*
> * When shutting down the syncer, run it at four times normal speed.
> */
> @@ -552,6 +562,8 @@ vnode_init(void *mem, int size, int flags)
> */
> rangelock_init(&vp->v_rl);
>
> + vp->v_dbatchcpu = NOCPU;
> +
> mtx_lock(&vnode_list_mtx);
> TAILQ_INSERT_BEFORE(vnode_list_free_marker, vp, v_vnodelist);
> mtx_unlock(&vnode_list_mtx);
> @@ -568,6 +580,7 @@ vnode_fini(void *mem, int size)
> struct bufobj *bo;
>
> vp = mem;
> + vdbatch_dequeue(vp);
> mtx_lock(&vnode_list_mtx);
> TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
> mtx_unlock(&vnode_list_mtx);
> @@ -602,8 +615,9 @@ vnode_fini(void *mem, int size)
> static void
> vntblinit(void *dummy __unused)
> {
> + struct vdbatch *vd;
> + int cpu, physvnodes, virtvnodes;
> u_int i;
> - int physvnodes, virtvnodes;
>
> /*
> * Desiredvnodes is a function of the physical memory size and the
> @@ -669,6 +683,12 @@ vntblinit(void *dummy __unused)
> for (i = 1; i <= sizeof(struct vnode); i <<= 1)
> vnsz2log++;
> vnsz2log--;
> +
> + CPU_FOREACH(cpu) {
> + vd = DPCPU_ID_PTR((cpu), vd);
> + bzero(vd, sizeof(*vd));
> + mtx_init(&vd->lock, "vdbatch", NULL, MTX_DEF);
> + }
> }
> SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL);
>
> @@ -3199,7 +3219,99 @@ vholdnz(struct vnode *vp)
> #endif
> }
>
> +static void __noinline
> +vdbatch_process(struct vdbatch *vd)
> +{
> + struct vnode *vp;
> + int i;
> +
> + mtx_assert(&vd->lock, MA_OWNED);
> + MPASS(vd->index == VDBATCH_SIZE);
> +
> + mtx_lock(&vnode_list_mtx);
> + for (i = 0; i < VDBATCH_SIZE; i++) {
> + vp = vd->tab[i];
> + TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
> + TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist);
> + MPASS(vp->v_dbatchcpu != NOCPU);
> + vp->v_dbatchcpu = NOCPU;
> + }
> + bzero(vd->tab, sizeof(vd->tab));
> + vd->index = 0;
> + mtx_unlock(&vnode_list_mtx);
> +}
> +
> +static void
> +vdbatch_enqueue(struct vnode *vp)
> +{
> + struct vdbatch *vd;
> +
> + ASSERT_VI_LOCKED(vp, __func__);
> + VNASSERT(!VN_IS_DOOMED(vp), vp,
> + ("%s: deferring requeue of a doomed vnode", __func__));
> +
> + if (vp->v_dbatchcpu != NOCPU) {
> + VI_UNLOCK(vp);
> + return;
> + }
> +
> + /*
> + * A hack: pin us to the current CPU so that we know what to put in
> + * ->v_dbatchcpu.
> + */
> + sched_pin();
> + vd = DPCPU_PTR(vd);
> + mtx_lock(&vd->lock);
> + MPASS(vd->index < VDBATCH_SIZE);
> + MPASS(vd->tab[vd->index] == NULL);
> + vp->v_dbatchcpu = curcpu;
> + vd->tab[vd->index] = vp;
> + vd->index++;
> + VI_UNLOCK(vp);
> + if (vd->index == VDBATCH_SIZE)
> + vdbatch_process(vd);
> + mtx_unlock(&vd->lock);
> + sched_unpin();
> +}
> +
> /*
> + * This routine must only be called for vnodes which are about to be
> + * deallocated. Supporting dequeue for arbitrary vndoes would require
> + * validating that the locked batch matches.
> + */
> +static void
> +vdbatch_dequeue(struct vnode *vp)
> +{
> + struct vdbatch *vd;
> + int i;
> + short cpu;
> +
> + VNASSERT(vp->v_type == VBAD || vp->v_type == VNON, vp,
> + ("%s: called for a used vnode\n", __func__));
> +
> + cpu = vp->v_dbatchcpu;
> + if (cpu == NOCPU)
> + return;
> +
> + vd = DPCPU_ID_PTR(cpu, vd);
> + mtx_lock(&vd->lock);
> + for (i = 0; i < vd->index; i++) {
> + if (vd->tab[i] != vp)
> + continue;
> + vp->v_dbatchcpu = NOCPU;
> + vd->index--;
> + vd->tab[i] = vd->tab[vd->index];
> + vd->tab[vd->index] = NULL;
> + break;
> + }
> + mtx_unlock(&vd->lock);
> + /*
> + * Either we dequeued the vnode above or the target CPU beat us to it.
> + */
> + MPASS(vp->v_dbatchcpu == NOCPU);
> +}
> +
> +/*
> * Drop the hold count of the vnode. If this is the last reference to
> * the vnode we place it on the free list unless it has been vgone'd
> * (marked VIRF_DOOMED) in which case we will free it.
> @@ -3236,12 +3348,8 @@ vdrop_deactivate(struct vnode *vp)
> mp->mnt_lazyvnodelistsize--;
> mtx_unlock(&mp->mnt_listmtx);
> }
> - mtx_lock(&vnode_list_mtx);
> - TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
> - TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist);
> - mtx_unlock(&vnode_list_mtx);
> atomic_add_long(&freevnodes, 1);
> - VI_UNLOCK(vp);
> + vdbatch_enqueue(vp);
> }
>
> void
>
> Modified: head/sys/sys/vnode.h
> ==============================================================================
> --- head/sys/sys/vnode.h Mon Jan 13 02:37:25 2020 (r356672)
> +++ head/sys/sys/vnode.h Mon Jan 13 02:39:41 2020 (r356673)
> @@ -171,7 +171,8 @@ struct vnode {
> u_int v_usecount; /* I ref count of users */
> u_int v_iflag; /* i vnode flags (see below) */
> u_int v_vflag; /* v vnode flags */
> - u_int v_mflag; /* l mnt-specific vnode flags */
> + u_short v_mflag; /* l mnt-specific vnode flags */
> + short v_dbatchcpu; /* i LRU requeue deferral batch */
> int v_writecount; /* I ref count of writers or
> (negative) text users */
> u_int v_hash;
> _______________________________________________
> svn-src-all at freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-all
> To unsubscribe, send any mail to "svn-src-all-unsubscribe at freebsd.org"
>
--
Mateusz Guzik <mjguzik gmail.com>
More information about the svn-src-all
mailing list