svn commit: r248575 - in head/sys/cddl: compat/opensolaris/sys contrib/opensolaris/uts/common/fs/zfs
Steven Hartland
smh at FreeBSD.org
Thu Mar 21 10:29:06 UTC 2013
Author: smh
Date: Thu Mar 21 10:29:05 2013
New Revision: 248575
URL: http://svnweb.freebsd.org/changeset/base/248575
Log:
TRIM cache devices based on time instead of TXGs.
Currently, the trim module uses the same algorithm for data and cache
devices when deciding to issue TRIM requests, based on how far in the
past the TXG is.
Unfortunately, this is not ideal for cache devices, because the L2ARC
doesn't use the concept of TXGs at all. In fact, when using a pool for
reading only, the L2ARC is written but the TXG counter doesn't
increase, and so no new TRIM requests are issued to the cache device.
This patch fixes the issue by using time instead of the TXG number as
the criteria for trimming on cache devices. The basic delay principle
stays the same, but parameters are expressed in seconds instead of
TXGs. The new parameters are named trim_l2arc_limit and
trim_l2arc_batch, and both default to 30 second.
Reviewed by: pjd (mentor)
Approved by: pjd (mentor)
Obtained from: https://github.com/dechamps/zfs/commit/17122c31ac7f82875e837019205c21651c05f8cd
MFC after: 2 weeks
Modified:
head/sys/cddl/compat/opensolaris/sys/time.h
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c
Modified: head/sys/cddl/compat/opensolaris/sys/time.h
==============================================================================
--- head/sys/cddl/compat/opensolaris/sys/time.h Thu Mar 21 10:16:10 2013 (r248574)
+++ head/sys/cddl/compat/opensolaris/sys/time.h Thu Mar 21 10:29:05 2013 (r248575)
@@ -35,6 +35,7 @@
#define MILLISEC 1000
#define MICROSEC 1000000
#define NANOSEC 1000000000
+#define TIME_MAX LLONG_MAX
typedef longlong_t hrtime_t;
Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c Thu Mar 21 10:16:10 2013 (r248574)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c Thu Mar 21 10:29:05 2013 (r248575)
@@ -27,6 +27,7 @@
#include <sys/spa_impl.h>
#include <sys/vdev_impl.h>
#include <sys/trim_map.h>
+#include <sys/time.h>
/*
* Calculate the zio end, upgrading based on ashift which would be
@@ -54,6 +55,7 @@ typedef struct trim_seg {
uint64_t ts_start; /* Starting offset of this segment. */
uint64_t ts_end; /* Ending offset (non-inclusive). */
uint64_t ts_txg; /* Segment creation txg. */
+ hrtime_t ts_time; /* Segment creation time. */
} trim_seg_t;
extern boolean_t zfs_notrim;
@@ -65,6 +67,11 @@ TUNABLE_INT("vfs.zfs.trim_txg_limit", &t
SYSCTL_INT(_vfs_zfs, OID_AUTO, trim_txg_limit, CTLFLAG_RW, &trim_txg_limit, 0,
"Delay TRIMs by that many TXGs.");
+static int trim_l2arc_limit = 30;
+TUNABLE_INT("vfs.zfs.trim_l2arc_limit", &trim_l2arc_limit);
+SYSCTL_INT(_vfs_zfs, OID_AUTO, trim_l2arc_limit, CTLFLAG_RWTUN, &trim_l2arc_limit, 0,
+ "Delay TRIMs by this many seconds for cache devices.");
+
static void trim_map_vdev_commit_done(spa_t *spa, vdev_t *vd);
static int
@@ -176,10 +183,12 @@ trim_map_segment_add(trim_map_t *tm, uin
avl_index_t where;
trim_seg_t tsearch, *ts_before, *ts_after, *ts;
boolean_t merge_before, merge_after;
+ hrtime_t time;
ASSERT(MUTEX_HELD(&tm->tm_lock));
VERIFY(start < end);
+ time = gethrtime();
tsearch.ts_start = start;
tsearch.ts_end = end;
@@ -214,6 +223,7 @@ trim_map_segment_add(trim_map_t *tm, uin
ts->ts_start = start;
ts->ts_end = end;
ts->ts_txg = txg;
+ ts->ts_time = time;
avl_insert(&tm->tm_queued_frees, ts, where);
list_insert_tail(&tm->tm_head, ts);
}
@@ -236,6 +246,7 @@ trim_map_segment_remove(trim_map_t *tm,
nts->ts_start = end;
nts->ts_end = ts->ts_end;
nts->ts_txg = ts->ts_txg;
+ nts->ts_time = ts->ts_time;
ts->ts_end = start;
avl_insert_here(&tm->tm_queued_frees, nts, ts, AVL_AFTER);
list_insert_after(&tm->tm_head, ts, nts);
@@ -359,17 +370,18 @@ trim_map_write_done(zio_t *zio)
/*
* Return the oldest segment (the one with the lowest txg) or false if
* the list is empty or the first element's txg is greater than txg given
- * as function argument.
+ * as function argument, or the first element's time is greater than time
+ * given as function argument
*/
static trim_seg_t *
-trim_map_first(trim_map_t *tm, uint64_t txg)
+trim_map_first(trim_map_t *tm, uint64_t txg, hrtime_t time)
{
trim_seg_t *ts;
ASSERT(MUTEX_HELD(&tm->tm_lock));
ts = list_head(&tm->tm_head);
- if (ts != NULL && ts->ts_txg <= txg)
+ if (ts != NULL && ts->ts_txg <= txg && ts->ts_time <= time)
return (ts);
return (NULL);
}
@@ -380,20 +392,28 @@ trim_map_vdev_commit(spa_t *spa, zio_t *
trim_map_t *tm = vd->vdev_trimmap;
trim_seg_t *ts;
uint64_t start, size, txglimit;
+ hrtime_t timelimit;
ASSERT(vd->vdev_ops->vdev_op_leaf);
if (tm == NULL)
return;
- txglimit = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa)) -
- trim_txg_limit;
+ if (vd->vdev_isl2cache) {
+ timelimit = gethrtime() - trim_l2arc_limit * NANOSEC;
+ txglimit = UINT64_MAX;
+ } else {
+ timelimit = TIME_MAX;
+ txglimit = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa)) -
+ trim_txg_limit;
+ }
mutex_enter(&tm->tm_lock);
/*
- * Loop until we send all frees up to the txglimit.
+ * Loop until we send all frees up to the txglimit
+ * or time limit if this is a cache device.
*/
- while ((ts = trim_map_first(tm, txglimit)) != NULL) {
+ while ((ts = trim_map_first(tm, txglimit, timelimit)) != NULL) {
list_remove(&tm->tm_head, ts);
avl_remove(&tm->tm_queued_frees, ts);
avl_add(&tm->tm_inflight_frees, ts);
More information about the svn-src-head
mailing list