svn commit: r268852 - in vendor-sys/illumos/dist/uts/common/fs/zfs: . sys
Robert Watson
rwatson at FreeBSD.org
Sat Jul 19 10:42:38 UTC 2014
On Fri, 18 Jul 2014, Xin LI wrote:
> Log:
> 5008 lock contention (rrw_exit) while running a read only load
> Reviewed by: Matthew Ahrens <matthew.ahrens at delphix.com>
> Reviewed by: George Wilson <george.wilson at delphix.com>
> Reviewed by: Alex Reece <alex.reece at delphix.com>
> Reviewed by: Christopher Siden <christopher.siden at delphix.com>
> Reviewed by: Richard Yao <ryao at gentoo.org>
> Reviewed by: Saso Kiselkov <skiselkov.ml at gmail.com>
> Approved by: Garrett D'Amore <garrett at damore.org>
>
> illumos/illumos-gate at c9030f6c93613fe30ee0c16f92b96da7816ac052
Is there an opportunity to use our own read-mostly lock implementation here?
It should be substantially more scalable, has integration with WITNESS, proper
priority propagation, etc.
Robert
>
> Modified:
> vendor-sys/illumos/dist/uts/common/fs/zfs/rrwlock.c
> vendor-sys/illumos/dist/uts/common/fs/zfs/sys/rrwlock.h
> vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_vfsops.h
> vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_znode.h
> vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c
> vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_vfsops.c
> vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_znode.c
>
> Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/rrwlock.c
> ==============================================================================
> --- vendor-sys/illumos/dist/uts/common/fs/zfs/rrwlock.c Fri Jul 18 18:05:09 2014 (r268851)
> +++ vendor-sys/illumos/dist/uts/common/fs/zfs/rrwlock.c Fri Jul 18 18:09:20 2014 (r268852)
> @@ -286,3 +286,91 @@ rrw_tsd_destroy(void *arg)
> (void *)curthread, (void *)rn->rn_rrl);
> }
> }
> +
> +/*
> + * A reader-mostly lock implementation, tuning above reader-writer locks
> + * for hightly parallel read acquisitions, while pessimizing writes.
> + *
> + * The idea is to split single busy lock into array of locks, so that
> + * each reader can lock only one of them for read, depending on result
> + * of simple hash function. That proportionally reduces lock congestion.
> + * Writer same time has to sequentially aquire write on all the locks.
> + * That makes write aquisition proportionally slower, but in places where
> + * it is used (filesystem unmount) performance is not critical.
> + *
> + * All the functions below are direct wrappers around functions above.
> + */
> +void
> +rrm_init(rrmlock_t *rrl, boolean_t track_all)
> +{
> + int i;
> +
> + for (i = 0; i < RRM_NUM_LOCKS; i++)
> + rrw_init(&rrl->locks[i], track_all);
> +}
> +
> +void
> +rrm_destroy(rrmlock_t *rrl)
> +{
> + int i;
> +
> + for (i = 0; i < RRM_NUM_LOCKS; i++)
> + rrw_destroy(&rrl->locks[i]);
> +}
> +
> +void
> +rrm_enter(rrmlock_t *rrl, krw_t rw, void *tag)
> +{
> + if (rw == RW_READER)
> + rrm_enter_read(rrl, tag);
> + else
> + rrm_enter_write(rrl);
> +}
> +
> +/*
> + * This maps the current thread to a specific lock. Note that the lock
> + * must be released by the same thread that acquired it. We do this
> + * mapping by taking the thread pointer mod a prime number. We examine
> + * only the low 32 bits of the thread pointer, because 32-bit division
> + * is faster than 64-bit division, and the high 32 bits have little
> + * entropy anyway.
> + */
> +#define RRM_TD_LOCK() (((uint32_t)(uintptr_t)(curthread)) % RRM_NUM_LOCKS)
> +
> +void
> +rrm_enter_read(rrmlock_t *rrl, void *tag)
> +{
> + rrw_enter_read(&rrl->locks[RRM_TD_LOCK()], tag);
> +}
> +
> +void
> +rrm_enter_write(rrmlock_t *rrl)
> +{
> + int i;
> +
> + for (i = 0; i < RRM_NUM_LOCKS; i++)
> + rrw_enter_write(&rrl->locks[i]);
> +}
> +
> +void
> +rrm_exit(rrmlock_t *rrl, void *tag)
> +{
> + int i;
> +
> + if (rrl->locks[0].rr_writer == curthread) {
> + for (i = 0; i < RRM_NUM_LOCKS; i++)
> + rrw_exit(&rrl->locks[i], tag);
> + } else {
> + rrw_exit(&rrl->locks[RRM_TD_LOCK()], tag);
> + }
> +}
> +
> +boolean_t
> +rrm_held(rrmlock_t *rrl, krw_t rw)
> +{
> + if (rw == RW_WRITER) {
> + return (rrw_held(&rrl->locks[0], rw));
> + } else {
> + return (rrw_held(&rrl->locks[RRM_TD_LOCK()], rw));
> + }
> +}
>
> Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/rrwlock.h
> ==============================================================================
> --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/rrwlock.h Fri Jul 18 18:05:09 2014 (r268851)
> +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/rrwlock.h Fri Jul 18 18:09:20 2014 (r268852)
> @@ -80,6 +80,31 @@ void rrw_tsd_destroy(void *arg);
> #define RRW_LOCK_HELD(x) \
> (rrw_held(x, RW_WRITER) || rrw_held(x, RW_READER))
>
> +/*
> + * A reader-mostly lock implementation, tuning above reader-writer locks
> + * for hightly parallel read acquisitions, pessimizing write acquisitions.
> + *
> + * This should be a prime number. See comment in rrwlock.c near
> + * RRM_TD_LOCK() for details.
> + */
> +#define RRM_NUM_LOCKS 17
> +typedef struct rrmlock {
> + rrwlock_t locks[RRM_NUM_LOCKS];
> +} rrmlock_t;
> +
> +void rrm_init(rrmlock_t *rrl, boolean_t track_all);
> +void rrm_destroy(rrmlock_t *rrl);
> +void rrm_enter(rrmlock_t *rrl, krw_t rw, void *tag);
> +void rrm_enter_read(rrmlock_t *rrl, void *tag);
> +void rrm_enter_write(rrmlock_t *rrl);
> +void rrm_exit(rrmlock_t *rrl, void *tag);
> +boolean_t rrm_held(rrmlock_t *rrl, krw_t rw);
> +
> +#define RRM_READ_HELD(x) rrm_held(x, RW_READER)
> +#define RRM_WRITE_HELD(x) rrm_held(x, RW_WRITER)
> +#define RRM_LOCK_HELD(x) \
> + (rrm_held(x, RW_WRITER) || rrm_held(x, RW_READER))
> +
> #ifdef __cplusplus
> }
> #endif
>
> Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_vfsops.h
> ==============================================================================
> --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_vfsops.h Fri Jul 18 18:05:09 2014 (r268851)
> +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_vfsops.h Fri Jul 18 18:09:20 2014 (r268852)
> @@ -64,7 +64,7 @@ struct zfsvfs {
> int z_norm; /* normalization flags */
> boolean_t z_atime; /* enable atimes mount option */
> boolean_t z_unmounted; /* unmounted */
> - rrwlock_t z_teardown_lock;
> + rrmlock_t z_teardown_lock;
> krwlock_t z_teardown_inactive_lock;
> list_t z_all_znodes; /* all vnodes in the fs */
> kmutex_t z_znodes_lock; /* lock for z_all_znodes */
>
> Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_znode.h
> ==============================================================================
> --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_znode.h Fri Jul 18 18:05:09 2014 (r268851)
> +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_znode.h Fri Jul 18 18:09:20 2014 (r268852)
> @@ -238,7 +238,7 @@ typedef struct znode {
> /* Called on entry to each ZFS vnode and vfs operation */
> #define ZFS_ENTER(zfsvfs) \
> { \
> - rrw_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
> + rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
> if ((zfsvfs)->z_unmounted) { \
> ZFS_EXIT(zfsvfs); \
> return (EIO); \
> @@ -246,7 +246,7 @@ typedef struct znode {
> }
>
> /* Must be called before exiting the vop */
> -#define ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
> +#define ZFS_EXIT(zfsvfs) rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG)
>
> /* Verifies the znode is valid */
> #define ZFS_VERIFY_ZP(zp) \
>
> Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c
> ==============================================================================
> --- vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c Fri Jul 18 18:05:09 2014 (r268851)
> +++ vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c Fri Jul 18 18:09:20 2014 (r268852)
> @@ -1420,7 +1420,7 @@ zfsvfs_hold(const char *name, void *tag,
> if (getzfsvfs(name, zfvp) != 0)
> error = zfsvfs_create(name, zfvp);
> if (error == 0) {
> - rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
> + rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
> RW_READER, tag);
> if ((*zfvp)->z_unmounted) {
> /*
> @@ -1428,7 +1428,7 @@ zfsvfs_hold(const char *name, void *tag,
> * thread should be just about to disassociate the
> * objset from the zfsvfs.
> */
> - rrw_exit(&(*zfvp)->z_teardown_lock, tag);
> + rrm_exit(&(*zfvp)->z_teardown_lock, tag);
> return (SET_ERROR(EBUSY));
> }
> }
> @@ -1438,7 +1438,7 @@ zfsvfs_hold(const char *name, void *tag,
> static void
> zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
> {
> - rrw_exit(&zfsvfs->z_teardown_lock, tag);
> + rrm_exit(&zfsvfs->z_teardown_lock, tag);
>
> if (zfsvfs->z_vfs) {
> VFS_RELE(zfsvfs->z_vfs);
>
> Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_vfsops.c
> ==============================================================================
> --- vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_vfsops.c Fri Jul 18 18:05:09 2014 (r268851)
> +++ vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_vfsops.c Fri Jul 18 18:09:20 2014 (r268852)
> @@ -1004,7 +1004,7 @@ zfsvfs_create(const char *osname, zfsvfs
> mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
> list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
> offsetof(znode_t, z_link_node));
> - rrw_init(&zfsvfs->z_teardown_lock, B_FALSE);
> + rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
> rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
> rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
> for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
> @@ -1119,7 +1119,7 @@ zfsvfs_free(zfsvfs_t *zfsvfs)
> mutex_destroy(&zfsvfs->z_znodes_lock);
> mutex_destroy(&zfsvfs->z_lock);
> list_destroy(&zfsvfs->z_all_znodes);
> - rrw_destroy(&zfsvfs->z_teardown_lock);
> + rrm_destroy(&zfsvfs->z_teardown_lock);
> rw_destroy(&zfsvfs->z_teardown_inactive_lock);
> rw_destroy(&zfsvfs->z_fuid_lock);
> for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
> @@ -1784,7 +1784,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolea
> {
> znode_t *zp;
>
> - rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
> + rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
>
> if (!unmounting) {
> /*
> @@ -1814,7 +1814,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolea
> */
> if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
> rw_exit(&zfsvfs->z_teardown_inactive_lock);
> - rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
> + rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
> return (SET_ERROR(EIO));
> }
>
> @@ -1841,7 +1841,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolea
> */
> if (unmounting) {
> zfsvfs->z_unmounted = B_TRUE;
> - rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
> + rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
> rw_exit(&zfsvfs->z_teardown_inactive_lock);
> }
>
> @@ -2073,7 +2073,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const ch
> znode_t *zp;
> uint64_t sa_obj = 0;
>
> - ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
> + ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
> ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
>
> /*
> @@ -2129,7 +2129,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const ch
> bail:
> /* release the VOPs */
> rw_exit(&zfsvfs->z_teardown_inactive_lock);
> - rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
> + rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
>
> if (err) {
> /*
>
> Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_znode.c
> ==============================================================================
> --- vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_znode.c Fri Jul 18 18:05:09 2014 (r268851)
> +++ vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_znode.c Fri Jul 18 18:09:20 2014 (r268852)
> @@ -276,7 +276,7 @@ zfs_znode_move(void *buf, void *newbuf,
> * can safely ensure that the filesystem is not and will not be
> * unmounted. The next statement is equivalent to ZFS_ENTER().
> */
> - rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
> + rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
> if (zfsvfs->z_unmounted) {
> ZFS_EXIT(zfsvfs);
> rw_exit(&zfsvfs_lock);
>
>
More information about the svn-src-all
mailing list