svn commit: r330987 - in stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys
Andriy Gapon
avg at FreeBSD.org
Thu Mar 15 11:00:56 UTC 2018
Author: avg
Date: Thu Mar 15 11:00:55 2018
New Revision: 330987
URL: https://svnweb.freebsd.org/changeset/base/330987
Log:
MFC r322245,r329717: MFV r322242: 8373 TXG_WAIT in ZIL commit path
MFC r322245: MFV r322242: 8373 TXG_WAIT in ZIL commit path
MFC r329717: MFV r329715: 8997 ztest assertion failure in zil_lwb_write_issue
Modified:
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
Directory Properties:
stable/10/ (props changed)
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c Thu Mar 15 10:52:08 2018 (r330986)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c Thu Mar 15 11:00:55 2018 (r330987)
@@ -1159,7 +1159,7 @@ dmu_tx_delay(dmu_tx_t *tx, uint64_t dirty)
}
static int
-dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
+dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
{
dmu_tx_hold_t *txh;
spa_t *spa = tx->tx_pool->dp_spa;
@@ -1182,13 +1182,13 @@ dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
* of the failuremode setting.
*/
if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE &&
- txg_how != TXG_WAIT)
+ !(txg_how & TXG_WAIT))
return (SET_ERROR(EIO));
return (SET_ERROR(ERESTART));
}
- if (!tx->tx_waited &&
+ if (!tx->tx_dirty_delayed &&
dsl_pool_need_dirty_delay(tx->tx_pool)) {
tx->tx_wait_dirty = B_TRUE;
return (SET_ERROR(ERESTART));
@@ -1308,41 +1308,44 @@ dmu_tx_unassign(dmu_tx_t *tx)
}
/*
- * Assign tx to a transaction group. txg_how can be one of:
+ * Assign tx to a transaction group; txg_how is a bitmask:
*
- * (1) TXG_WAIT. If the current open txg is full, waits until there's
- * a new one. This should be used when you're not holding locks.
- * It will only fail if we're truly out of space (or over quota).
+ * If TXG_WAIT is set and the currently open txg is full, this function
+ * will wait until there's a new txg. This should be used when no locks
+ * are being held. With this bit set, this function will only fail if
+ * we're truly out of space (or over quota).
*
- * (2) TXG_NOWAIT. If we can't assign into the current open txg without
- * blocking, returns immediately with ERESTART. This should be used
- * whenever you're holding locks. On an ERESTART error, the caller
- * should drop locks, do a dmu_tx_wait(tx), and try again.
+ * If TXG_WAIT is *not* set and we can't assign into the currently open
+ * txg without blocking, this function will return immediately with
+ * ERESTART. This should be used whenever locks are being held. On an
+ * ERESTART error, the caller should drop all locks, call dmu_tx_wait(),
+ * and try again.
*
- * (3) TXG_WAITED. Like TXG_NOWAIT, but indicates that dmu_tx_wait()
- * has already been called on behalf of this operation (though
- * most likely on a different tx).
+ * If TXG_NOTHROTTLE is set, this indicates that this tx should not be
+ * delayed due on the ZFS Write Throttle (see comments in dsl_pool.c for
+ * details on the throttle). This is used by the VFS operations, after
+ * they have already called dmu_tx_wait() (though most likely on a
+ * different tx).
*/
int
-dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how)
+dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
{
int err;
ASSERT(tx->tx_txg == 0);
- ASSERT(txg_how == TXG_WAIT || txg_how == TXG_NOWAIT ||
- txg_how == TXG_WAITED);
+ ASSERT0(txg_how & ~(TXG_WAIT | TXG_NOTHROTTLE));
ASSERT(!dsl_pool_sync_context(tx->tx_pool));
/* If we might wait, we must not hold the config lock. */
- ASSERT(txg_how != TXG_WAIT || !dsl_pool_config_held(tx->tx_pool));
+ IMPLY((txg_how & TXG_WAIT), !dsl_pool_config_held(tx->tx_pool));
- if (txg_how == TXG_WAITED)
- tx->tx_waited = B_TRUE;
+ if ((txg_how & TXG_NOTHROTTLE))
+ tx->tx_dirty_delayed = B_TRUE;
while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
dmu_tx_unassign(tx);
- if (err != ERESTART || txg_how != TXG_WAIT)
+ if (err != ERESTART || !(txg_how & TXG_WAIT))
return (err);
dmu_tx_wait(tx);
@@ -1379,12 +1382,12 @@ dmu_tx_wait(dmu_tx_t *tx)
tx->tx_wait_dirty = B_FALSE;
/*
- * Note: setting tx_waited only has effect if the caller
- * used TX_WAIT. Otherwise they are going to destroy
- * this tx and try again. The common case, zfs_write(),
- * uses TX_WAIT.
+ * Note: setting tx_dirty_delayed only has effect if the
+ * caller used TX_WAIT. Otherwise they are going to
+ * destroy this tx and try again. The common case,
+ * zfs_write(), uses TX_WAIT.
*/
- tx->tx_waited = B_TRUE;
+ tx->tx_dirty_delayed = B_TRUE;
} else if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) {
/*
* If the pool is suspended we need to wait until it
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h Thu Mar 15 10:52:08 2018 (r330986)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h Thu Mar 15 11:00:55 2018 (r330987)
@@ -230,11 +230,14 @@ typedef enum dmu_object_type {
DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
} dmu_object_type_t;
-typedef enum txg_how {
- TXG_WAIT = 1,
- TXG_NOWAIT,
- TXG_WAITED,
-} txg_how_t;
+/*
+ * These flags are intended to be used to specify the "txg_how"
+ * parameter when calling the dmu_tx_assign() function. See the comment
+ * above dmu_tx_assign() for more details on the meaning of these flags.
+ */
+#define TXG_NOWAIT (0ULL)
+#define TXG_WAIT (1ULL<<0)
+#define TXG_NOTHROTTLE (1ULL<<1)
void byteswap_uint64_array(void *buf, size_t size);
void byteswap_uint32_array(void *buf, size_t size);
@@ -676,7 +679,7 @@ void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object);
void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
void dmu_tx_abort(dmu_tx_t *tx);
-int dmu_tx_assign(dmu_tx_t *tx, enum txg_how txg_how);
+int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_commit(dmu_tx_t *tx);
void dmu_tx_mark_netfree(dmu_tx_t *tx);
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h Thu Mar 15 10:52:08 2018 (r330986)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h Thu Mar 15 11:00:55 2018 (r330987)
@@ -66,15 +66,15 @@ struct dmu_tx {
/* placeholder for syncing context, doesn't need specific holds */
boolean_t tx_anyobj;
- /* has this transaction already been delayed? */
- boolean_t tx_waited;
-
/* time this transaction was created */
hrtime_t tx_start;
/* need to wait for sufficient dirty space */
boolean_t tx_wait_dirty;
+ /* has this transaction already been delayed? */
+ boolean_t tx_dirty_delayed;
+
int tx_err;
#ifdef ZFS_DEBUG
uint64_t tx_space_towrite;
@@ -124,7 +124,7 @@ typedef struct dmu_tx_callback {
* These routines are defined in dmu.h, and are called by the user.
*/
dmu_tx_t *dmu_tx_create(objset_t *dd);
-int dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how);
+int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
void dmu_tx_commit(dmu_tx_t *tx);
void dmu_tx_abort(dmu_tx_t *tx);
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Thu Mar 15 10:52:08 2018 (r330986)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Thu Mar 15 11:00:55 2018 (r330987)
@@ -125,7 +125,7 @@
*
* If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
* then drop all locks, call dmu_tx_wait(), and try again. On subsequent
- * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT,
+ * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
* to indicate that this operation has already called dmu_tx_wait().
* This will ensure that we don't retry forever, waiting a short bit
* each time.
@@ -150,7 +150,7 @@
* rw_enter(...); // grab any other locks you need
* tx = dmu_tx_create(...); // get DMU tx
* dmu_tx_hold_*(); // hold each object you might modify
- * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
+ * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
* if (error) {
* rw_exit(...); // drop locks
* zfs_dirent_unlock(dl); // unlock directory entry
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c Thu Mar 15 10:52:08 2018 (r330986)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c Thu Mar 15 11:00:55 2018 (r330987)
@@ -994,7 +994,15 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb, boolea
* to clean up in the event of allocation failure or I/O failure.
*/
tx = dmu_tx_create(zilog->zl_os);
- VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0);
+
+ /*
+ * Since we are not going to create any new dirty data, and we
+ * can even help with clearing the existing dirty data, we
+ * should not be subject to the dirty data based delays. We
+ * use TXG_NOTHROTTLE to bypass the delay mechanism.
+ */
+ VERIFY0(dmu_tx_assign(tx, TXG_WAIT | TXG_NOTHROTTLE));
+
dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
txg = dmu_tx_get_txg(tx);
More information about the svn-src-stable-10
mailing list