svn commit: r280296 - in user/delphij/zfs-arc-rebase: cddl/contrib/opensolaris/cmd/ztest sys/cddl/contrib/opensolaris/uts/common/fs/zfs sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys

Fri Mar 20 18:39:09 UTC 2015

Author: delphij
Date: Fri Mar 20 18:39:07 2015
New Revision: 280296
URL: https://svnweb.freebsd.org/changeset/base/280296

Log:
  MFV r277426:
  
  Reduce L2ARC memory footprint by extracting L1 and L2 only fields and only
  allocate the fields when needed.
  
  Illumos issue:
      5408 managing ZFS cache devices requires lots of RAM

Modified:
  user/delphij/zfs-arc-rebase/cddl/contrib/opensolaris/cmd/ztest/ztest.c
  user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
  user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
Directory Properties:
  user/delphij/zfs-arc-rebase/cddl/contrib/opensolaris/   (props changed)
  user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/   (props changed)

Modified: user/delphij/zfs-arc-rebase/cddl/contrib/opensolaris/cmd/ztest/ztest.c
==============================================================================

--- user/delphij/zfs-arc-rebase/cddl/contrib/opensolaris/cmd/ztest/ztest.c	Fri Mar 20 18:02:56 2015	(r280295)
+++ user/delphij/zfs-arc-rebase/cddl/contrib/opensolaris/cmd/ztest/ztest.c	Fri Mar 20 18:39:07 2015	(r280296)
@@ -3907,7 +3907,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *z
 		 * assign an arcbuf to a dbuf.
 		 */
 		for (j = 0; j < s; j++) {
-			if (i != 5) {
+			if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
 				bigbuf_arcbufs[j] =
 				    dmu_request_arcbuf(bonus_db, chunksize);
 			} else {
@@ -3931,7 +3931,8 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *z
 			umem_free(packbuf, packsize);
 			umem_free(bigbuf, bigsize);
 			for (j = 0; j < s; j++) {
-				if (i != 5) {
+				if (i != 5 ||
+				    chunksize < (SPA_MINBLOCKSIZE * 2)) {
 					dmu_return_arcbuf(bigbuf_arcbufs[j]);
 				} else {
 					dmu_return_arcbuf(
@@ -3975,7 +3976,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *z
 		}
 		for (off = bigoff, j = 0; j < s; j++, off += chunksize) {
 			dmu_buf_t *dbt;
-			if (i != 5) {
+			if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
 				bcopy((caddr_t)bigbuf + (off - bigoff),
 				    bigbuf_arcbufs[j]->b_data, chunksize);
 			} else {
@@ -3992,7 +3993,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *z
 				VERIFY(dmu_buf_hold(os, bigobj, off,
 				    FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0);
 			}
-			if (i != 5) {
+			if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
 				dmu_assign_arcbuf(bonus_db, off,
 				    bigbuf_arcbufs[j], tx);
 			} else {

Modified: user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Fri Mar 20 18:02:56 2015	(r280295)
+++ user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Fri Mar 20 18:39:07 2015	(r280296)
@@ -111,7 +111,7 @@
  * Note that the majority of the performance stats are manipulated
  * with atomic operations.
  *
- * The L2ARC uses the l2arc_buflist_mtx global mutex for the following:
+ * The L2ARC uses the l2ad_mtx on each vdev for the following:
  *
  *	- L2ARC buflist creation
  *	- L2ARC buflist eviction
@@ -390,6 +390,7 @@ typedef struct arc_stats {
 	kstat_named_t arcstat_l2_writes_hdr_miss;
 	kstat_named_t arcstat_l2_evict_lock_retry;
 	kstat_named_t arcstat_l2_evict_reading;
+	kstat_named_t arcstat_l2_evict_l1cached;
 	kstat_named_t arcstat_l2_free_on_write;
 	kstat_named_t arcstat_l2_abort_lowmem;
 	kstat_named_t arcstat_l2_cksum_bad;
@@ -471,6 +472,7 @@ static arc_stats_t arc_stats = {
 	{ "l2_writes_hdr_miss",		KSTAT_DATA_UINT64 },
 	{ "l2_evict_lock_retry",	KSTAT_DATA_UINT64 },
 	{ "l2_evict_reading",		KSTAT_DATA_UINT64 },
+	{ "l2_evict_l1cached",		KSTAT_DATA_UINT64 },
 	{ "l2_free_on_write",		KSTAT_DATA_UINT64 },
 	{ "l2_abort_lowmem",		KSTAT_DATA_UINT64 },
 	{ "l2_cksum_bad",		KSTAT_DATA_UINT64 },
@@ -574,8 +576,6 @@ static int		arc_no_grow;	/* Don't try to
 static uint64_t		arc_tempreserve;
 static uint64_t		arc_loaned_bytes;
 
-typedef struct l2arc_buf_hdr l2arc_buf_hdr_t;
-
 typedef struct arc_callback arc_callback_t;
 
 struct arc_callback {
@@ -596,29 +596,53 @@ struct arc_write_callback {
 	arc_buf_t	*awcb_buf;
 };
 
-struct arc_buf_hdr {
-	/* protected by hash lock */
-	dva_t			b_dva;
-	uint64_t		b_birth;
-	uint64_t		b_cksum0;
-
+/*
+ * ARC buffers are separated into multiple structs as a memory saving measure:
+ *   - Common fields struct, always defined, and embedded within it:
+ *       - L2-only fields, always allocated but undefined when not in L2ARC
+ *       - L1-only fields, only allocated when in L1ARC
+ *
+ *           Buffer in L1                     Buffer only in L2
+ *    +------------------------+          +------------------------+
+ *    | arc_buf_hdr_t          |          | arc_buf_hdr_t          |
+ *    |                        |          |                        |
+ *    |                        |          |                        |
+ *    |                        |          |                        |
+ *    +------------------------+          +------------------------+
+ *    | l2arc_buf_hdr_t        |          | l2arc_buf_hdr_t        |
+ *    | (undefined if L1-only) |          |                        |
+ *    +------------------------+          +------------------------+
+ *    | l1arc_buf_hdr_t        |
+ *    |                        |
+ *    |                        |
+ *    |                        |
+ *    |                        |
+ *    +------------------------+
+ *
+ * Because it's possible for the L2ARC to become extremely large, we can wind
+ * up eating a lot of memory in L2ARC buffer headers, so the size of a header
+ * is minimized by only allocating the fields necessary for an L1-cached buffer
+ * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and
+ * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple
+ * words in pointers. arc_hdr_realloc() is used to switch a header between
+ * these two allocation states.
+ */
+typedef struct l1arc_buf_hdr {
 	kmutex_t		b_freeze_lock;
-	zio_cksum_t		*b_freeze_cksum;
+#ifdef ZFS_DEBUG
+	/*
+	 * used for debugging wtih kmem_flags - by allocating and freeing
+	 * b_thawed when the buffer is thawed, we get a record of the stack
+	 * trace that thawed it.
+	 */
 	void			*b_thawed;
+#endif
 
-	arc_buf_hdr_t		*b_hash_next;
 	arc_buf_t		*b_buf;
-	arc_flags_t		b_flags;
 	uint32_t		b_datacnt;
-
-	arc_callback_t		*b_acb;
+	/* for waiting on writes to complete */
 	kcondvar_t		b_cv;
 
-	/* immutable */
-	arc_buf_contents_t	b_type;
-	uint64_t		b_size;
-	uint64_t		b_spa;
-
 	/* protected by arc state mutex */
 	arc_state_t		*b_state;
 	list_node_t		b_arc_node;
@@ -629,8 +653,46 @@ struct arc_buf_hdr {
 	/* self protecting */
 	refcount_t		b_refcnt;
 
-	l2arc_buf_hdr_t		*b_l2hdr;
+	arc_callback_t		*b_acb;
+	/* temporary buffer holder for in-flight compressed data */
+	void			*b_tmp_cdata;
+} l1arc_buf_hdr_t;
+
+typedef struct l2arc_dev l2arc_dev_t;
+
+typedef struct l2arc_buf_hdr {
+	/* protected by arc_buf_hdr mutex */
+	l2arc_dev_t		*b_dev;		/* L2ARC device */
+	uint64_t		b_daddr;	/* disk address, offset byte */
+	/* real alloc'd buffer size depending on b_compress applied */
+	int32_t			b_asize;
+
 	list_node_t		b_l2node;
+} l2arc_buf_hdr_t;
+
+struct arc_buf_hdr {
+	/* protected by hash lock */
+	dva_t			b_dva;
+	uint64_t		b_birth;
+	/*
+	 * Even though this checksum is only set/verified when a buffer is in
+	 * the L1 cache, it needs to be in the set of common fields because it
+	 * must be preserved from the time before a buffer is written out to
+	 * L2ARC until after it is read back in.
+	 */
+	zio_cksum_t		*b_freeze_cksum;
+
+	arc_buf_hdr_t		*b_hash_next;
+	arc_flags_t		b_flags;
+
+	/* immutable */
+	int32_t			b_size;
+	uint64_t		b_spa;
+
+	/* L2ARC fields. Undefined when not in L2ARC. */
+	l2arc_buf_hdr_t		b_l2hdr;
+	/* L1ARC fields. Undefined when in l2arc_only state */
+	l1arc_buf_hdr_t		b_l1hdr;
 };
 
 #ifdef _KERNEL
@@ -667,22 +729,38 @@ static arc_buf_hdr_t arc_eviction_hdr;
 #define	HDR_PREFETCH(hdr)	((hdr)->b_flags & ARC_FLAG_PREFETCH)
 #define	HDR_FREED_IN_READ(hdr)	((hdr)->b_flags & ARC_FLAG_FREED_IN_READ)
 #define	HDR_BUF_AVAILABLE(hdr)	((hdr)->b_flags & ARC_FLAG_BUF_AVAILABLE)
-#define	HDR_FREE_IN_PROGRESS(hdr)	\
-	((hdr)->b_flags & ARC_FLAG_FREE_IN_PROGRESS)
+
 #define	HDR_L2CACHE(hdr)	((hdr)->b_flags & ARC_FLAG_L2CACHE)
+#define	HDR_L2COMPRESS(hdr)	((hdr)->b_flags & ARC_FLAG_L2COMPRESS)
 #define	HDR_L2_READING(hdr)	\
-	((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS &&	\
-	    (hdr)->b_l2hdr != NULL)
+	    (((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) &&	\
+	    ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR))
 #define	HDR_L2_WRITING(hdr)	((hdr)->b_flags & ARC_FLAG_L2_WRITING)
 #define	HDR_L2_EVICTED(hdr)	((hdr)->b_flags & ARC_FLAG_L2_EVICTED)
 #define	HDR_L2_WRITE_HEAD(hdr)	((hdr)->b_flags & ARC_FLAG_L2_WRITE_HEAD)
 
+#define	HDR_ISTYPE_METADATA(hdr)	\
+	    ((hdr)->b_flags & ARC_FLAG_BUFC_METADATA)
+#define	HDR_ISTYPE_DATA(hdr)	(!HDR_ISTYPE_METADATA(hdr))
+
+#define	HDR_HAS_L1HDR(hdr)	((hdr)->b_flags & ARC_FLAG_HAS_L1HDR)
+#define	HDR_HAS_L2HDR(hdr)	((hdr)->b_flags & ARC_FLAG_HAS_L2HDR)
+
+/* For storing compression mode in b_flags */
+#define	HDR_COMPRESS_OFFSET	24
+#define	HDR_COMPRESS_NBITS	7
+
+#define	HDR_GET_COMPRESS(hdr)	((enum zio_compress)BF32_GET(hdr->b_flags, \
+	    HDR_COMPRESS_OFFSET, HDR_COMPRESS_NBITS))
+#define	HDR_SET_COMPRESS(hdr, cmp) BF32_SET(hdr->b_flags, \
+	    HDR_COMPRESS_OFFSET, HDR_COMPRESS_NBITS, (cmp))
+
 /*
  * Other sizes
  */
 
-#define	HDR_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
-#define	L2HDR_SIZE ((int64_t)sizeof (l2arc_buf_hdr_t))
+#define	HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
+#define	HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr))
 
 /*
  * Hash table routines
@@ -806,7 +884,7 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2c_onl
 /*
  * L2ARC Internals
  */
-typedef struct l2arc_dev {
+struct l2arc_dev {
 	vdev_t			*l2ad_vdev;	/* vdev */
 	spa_t			*l2ad_spa;	/* spa */
 	uint64_t		l2ad_hand;	/* next write location */
@@ -815,15 +893,15 @@ typedef struct l2arc_dev {
 	uint64_t		l2ad_evict;	/* last addr eviction reached */
 	boolean_t		l2ad_first;	/* first sweep through */
 	boolean_t		l2ad_writing;	/* currently writing */
-	list_t			*l2ad_buflist;	/* buffer list */
+	kmutex_t		l2ad_mtx;	/* lock for buffer list */
+	list_t			l2ad_buflist;	/* buffer list */
 	list_node_t		l2ad_node;	/* device list node */
-} l2arc_dev_t;
+};
 
 static list_t L2ARC_dev_list;			/* device list */
 static list_t *l2arc_dev_list;			/* device list pointer */
 static kmutex_t l2arc_dev_mtx;			/* device list mutex */
 static l2arc_dev_t *l2arc_dev_last;		/* last device used */
-static kmutex_t l2arc_buflist_mtx;		/* mutex for all buflists */
 static list_t L2ARC_free_on_write;		/* free after write buf list */
 static list_t *l2arc_free_on_write;		/* free after write list ptr */
 static kmutex_t l2arc_free_on_write_mtx;	/* mutex for list */
@@ -843,18 +921,6 @@ typedef struct l2arc_write_callback {
 	arc_buf_hdr_t	*l2wcb_head;		/* head of write buflist */
 } l2arc_write_callback_t;
 
-struct l2arc_buf_hdr {
-	/* protected by arc_buf_hdr  mutex */
-	l2arc_dev_t		*b_dev;		/* L2ARC device */
-	uint64_t		b_daddr;	/* disk address, offset byte */
-	/* compression applied to buffer data */
-	enum zio_compress	b_compress;
-	/* real alloc'd buffer size depending on b_compress applied */
-	int			b_asize;
-	/* temporary buffer holder for in-flight compressed data */
-	void			*b_tmp_cdata;
-};
-
 typedef struct l2arc_data_free {
 	/* protected by l2arc_free_on_write_mtx */
 	void		*l2df_data;
@@ -873,12 +939,13 @@ static int arc_evict_needed(arc_buf_cont
 static void arc_evict_ghost(arc_state_t *, uint64_t, int64_t);
 static void arc_buf_watch(arc_buf_t *);
 
+static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *);
+static uint32_t arc_bufc_to_flags(arc_buf_contents_t);
+
 static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *);
 static void l2arc_read_done(zio_t *);
-static void l2arc_hdr_stat_add(void);
-static void l2arc_hdr_stat_remove(void);
 
-static boolean_t l2arc_compress_buf(l2arc_buf_hdr_t *);
+static boolean_t l2arc_compress_buf(arc_buf_hdr_t *);
 static void l2arc_decompress_zio(zio_t *, arc_buf_hdr_t *, enum zio_compress);
 static void l2arc_release_cdata_buf(arc_buf_hdr_t *);
 
@@ -901,8 +968,7 @@ buf_hash(uint64_t spa, const dva_t *dva,
 
 #define	BUF_EMPTY(buf)						\
 	((buf)->b_dva.dva_word[0] == 0 &&			\
-	(buf)->b_dva.dva_word[1] == 0 &&			\
-	(buf)->b_cksum0 == 0)
+	(buf)->b_dva.dva_word[1] == 0)
 
 #define	BUF_EQUAL(spa, dva, birth, buf)				\
 	((buf)->b_dva.dva_word[0] == (dva)->dva_word[0]) &&	\
@@ -915,7 +981,6 @@ buf_discard_identity(arc_buf_hdr_t *hdr)
 	hdr->b_dva.dva_word[0] = 0;
 	hdr->b_dva.dva_word[1] = 0;
 	hdr->b_birth = 0;
-	hdr->b_cksum0 = 0;
 }
 
 static arc_buf_hdr_t *
@@ -945,6 +1010,7 @@ buf_hash_find(uint64_t spa, const blkptr
  * equal to elem in the hash table, then the already existing element
  * will be returned and the new element will not be inserted.
  * Otherwise returns NULL.
+ * If lockp == NULL, the caller is assumed to already hold the hash lock.
  */
 static arc_buf_hdr_t *
 buf_hash_insert(arc_buf_hdr_t *hdr, kmutex_t **lockp)
@@ -957,8 +1023,14 @@ buf_hash_insert(arc_buf_hdr_t *hdr, kmut
 	ASSERT(!DVA_IS_EMPTY(&hdr->b_dva));
 	ASSERT(hdr->b_birth != 0);
 	ASSERT(!HDR_IN_HASH_TABLE(hdr));
-	*lockp = hash_lock;
-	mutex_enter(hash_lock);
+
+	if (lockp != NULL) {
+		*lockp = hash_lock;
+		mutex_enter(hash_lock);
+	} else {
+		ASSERT(MUTEX_HELD(hash_lock));
+	}
+
 	for (fhdr = buf_hash_table.ht_table[idx], i = 0; fhdr != NULL;
 	    fhdr = fhdr->b_hash_next, i++) {
 		if (BUF_EQUAL(hdr->b_spa, &hdr->b_dva, hdr->b_birth, fhdr))
@@ -1013,7 +1085,8 @@ buf_hash_remove(arc_buf_hdr_t *hdr)
 /*
  * Global data structures and functions for the buf kmem cache.
  */
-static kmem_cache_t *hdr_cache;
+static kmem_cache_t *hdr_full_cache;
+static kmem_cache_t *hdr_l2only_cache;
 static kmem_cache_t *buf_cache;
 
 static void
@@ -1025,7 +1098,8 @@ buf_fini(void)
 	    (buf_hash_table.ht_mask + 1) * sizeof (void *));
 	for (i = 0; i < BUF_LOCKS; i++)
 		mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
-	kmem_cache_destroy(hdr_cache);
+	kmem_cache_destroy(hdr_full_cache);
+	kmem_cache_destroy(hdr_l2only_cache);
 	kmem_cache_destroy(buf_cache);
 }
 
@@ -1035,15 +1109,27 @@ buf_fini(void)
  */
 /* ARGSUSED */
 static int
-hdr_cons(void *vbuf, void *unused, int kmflag)
+hdr_full_cons(void *vbuf, void *unused, int kmflag)
 {
 	arc_buf_hdr_t *hdr = vbuf;
 
-	bzero(hdr, sizeof (arc_buf_hdr_t));
-	refcount_create(&hdr->b_refcnt);
-	cv_init(&hdr->b_cv, NULL, CV_DEFAULT, NULL);
-	mutex_init(&hdr->b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
-	arc_space_consume(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS);
+	bzero(hdr, HDR_FULL_SIZE);
+	cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL);
+	refcount_create(&hdr->b_l1hdr.b_refcnt);
+	mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
+	arc_space_consume(HDR_FULL_SIZE, ARC_SPACE_HDRS);
+
+	return (0);
+}
+
+/* ARGSUSED */
+static int
+hdr_l2only_cons(void *vbuf, void *unused, int kmflag)
+{
+	arc_buf_hdr_t *hdr = vbuf;
+
+	bzero(hdr, HDR_L2ONLY_SIZE);
+	arc_space_consume(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS);
 
 	return (0);
 }
@@ -1067,15 +1153,25 @@ buf_cons(void *vbuf, void *unused, int k
  */
 /* ARGSUSED */
 static void
-hdr_dest(void *vbuf, void *unused)
+hdr_full_dest(void *vbuf, void *unused)
+{
+	arc_buf_hdr_t *hdr = vbuf;
+
+	ASSERT(BUF_EMPTY(hdr));
+	cv_destroy(&hdr->b_l1hdr.b_cv);
+	refcount_destroy(&hdr->b_l1hdr.b_refcnt);
+	mutex_destroy(&hdr->b_l1hdr.b_freeze_lock);
+	arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS);
+}
+
+/* ARGSUSED */
+static void
+hdr_l2only_dest(void *vbuf, void *unused)
 {
 	arc_buf_hdr_t *hdr = vbuf;
 
 	ASSERT(BUF_EMPTY(hdr));
-	refcount_destroy(&hdr->b_refcnt);
-	cv_destroy(&hdr->b_cv);
-	mutex_destroy(&hdr->b_freeze_lock);
-	arc_space_return(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS);
+	arc_space_return(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS);
 }
 
 /* ARGSUSED */
@@ -1129,8 +1225,11 @@ retry:
 		goto retry;
 	}
 
-	hdr_cache = kmem_cache_create("arc_buf_hdr_t", sizeof (arc_buf_hdr_t),
-	    0, hdr_cons, hdr_dest, hdr_recl, NULL, NULL, 0);
+	hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE,
+	    0, hdr_full_cons, hdr_full_dest, hdr_recl, NULL, NULL, 0);
+	hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only",
+	    HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, hdr_recl,
+	    NULL, NULL, 0);
 	buf_cache = kmem_cache_create("arc_buf_t", sizeof (arc_buf_t),
 	    0, buf_cons, buf_dest, NULL, NULL, NULL, 0);
 
@@ -1144,6 +1243,81 @@ retry:
 	}
 }
 
+/*
+ * Transition between the two allocation states for the arc_buf_hdr struct.
+ * The arc_buf_hdr struct can be allocated with (hdr_full_cache) or without
+ * (hdr_l2only_cache) the fields necessary for the L1 cache - the smaller
+ * version is used when a cache buffer is only in the L2ARC in order to reduce
+ * memory usage.
+ */
+static arc_buf_hdr_t *
+arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
+{
+	ASSERT(HDR_HAS_L2HDR(hdr));
+
+	arc_buf_hdr_t *nhdr;
+	l2arc_dev_t *dev = hdr->b_l2hdr.b_dev;
+
+	ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) ||
+	    (old == hdr_l2only_cache && new == hdr_full_cache));
+
+	nhdr = kmem_cache_alloc(new, KM_PUSHPAGE);
+
+	ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
+	buf_hash_remove(hdr);
+
+	bcopy(hdr, nhdr, HDR_L2ONLY_SIZE);
+	if (new == hdr_full_cache) {
+		nhdr->b_flags |= ARC_FLAG_HAS_L1HDR;
+		/*
+		 * arc_access and arc_change_state need to be aware that a
+		 * header has just come out of L2ARC, so we set its state to
+		 * l2c_only even though it's about to change.
+		 */
+		nhdr->b_l1hdr.b_state = arc_l2c_only;
+	} else {
+		ASSERT(hdr->b_l1hdr.b_buf == NULL);
+		ASSERT0(hdr->b_l1hdr.b_datacnt);
+		ASSERT(!list_link_active(&hdr->b_l1hdr.b_arc_node));
+		/*
+		 * We might be removing the L1hdr of a buffer which was just
+		 * written out to L2ARC. If such a buffer is compressed then we
+		 * need to free its b_tmp_cdata before destroying the header.
+		 */
+		if (hdr->b_l1hdr.b_tmp_cdata != NULL &&
+		    HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF)
+			l2arc_release_cdata_buf(hdr);
+		nhdr->b_flags &= ~ARC_FLAG_HAS_L1HDR;
+	}
+	/*
+	 * The header has been reallocated so we need to re-insert it into any
+	 * lists it was on.
+	 */
+	(void) buf_hash_insert(nhdr, NULL);
+
+	ASSERT(list_link_active(&hdr->b_l2hdr.b_l2node));
+
+	mutex_enter(&dev->l2ad_mtx);
+
+	/*
+	 * We must place the realloc'ed header back into the list at
+	 * the same spot. Otherwise, if it's placed earlier in the list,
+	 * l2arc_write_buffers() could find it during the function's
+	 * write phase, and try to write it out to the l2arc.
+	 */
+	list_insert_after(&dev->l2ad_buflist, hdr, nhdr);
+	list_remove(&dev->l2ad_buflist, hdr);
+
+	mutex_exit(&dev->l2ad_mtx);
+
+	buf_discard_identity(hdr);
+	hdr->b_freeze_cksum = NULL;
+	kmem_cache_free(old, hdr);
+
+	return (nhdr);
+}
+
+
 #define	ARC_MINTIME	(hz>>4) /* 62 ms */
 
 static void
@@ -1154,16 +1328,15 @@ arc_cksum_verify(arc_buf_t *buf)
 	if (!(zfs_flags & ZFS_DEBUG_MODIFY))
 		return;
 
-	mutex_enter(&buf->b_hdr->b_freeze_lock);
-	if (buf->b_hdr->b_freeze_cksum == NULL ||
-	    (buf->b_hdr->b_flags & ARC_FLAG_IO_ERROR)) {
-		mutex_exit(&buf->b_hdr->b_freeze_lock);
+	mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
+	if (buf->b_hdr->b_freeze_cksum == NULL || HDR_IO_ERROR(buf->b_hdr)) {
+		mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
 		return;
 	}
 	fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc);
 	if (!ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc))
 		panic("buffer modified while frozen!");
-	mutex_exit(&buf->b_hdr->b_freeze_lock);
+	mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
 }
 
 static int
@@ -1172,10 +1345,10 @@ arc_cksum_equal(arc_buf_t *buf)
 	zio_cksum_t zc;
 	int equal;
 
-	mutex_enter(&buf->b_hdr->b_freeze_lock);
+	mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
 	fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc);
 	equal = ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc);
-	mutex_exit(&buf->b_hdr->b_freeze_lock);
+	mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
 
 	return (equal);
 }
@@ -1186,15 +1359,15 @@ arc_cksum_compute(arc_buf_t *buf, boolea
 	if (!force && !(zfs_flags & ZFS_DEBUG_MODIFY))
 		return;
 
-	mutex_enter(&buf->b_hdr->b_freeze_lock);
+	mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
 	if (buf->b_hdr->b_freeze_cksum != NULL) {
-		mutex_exit(&buf->b_hdr->b_freeze_lock);
+		mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
 		return;
 	}
 	buf->b_hdr->b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t), KM_SLEEP);
 	fletcher_2_native(buf->b_data, buf->b_hdr->b_size,
 	    buf->b_hdr->b_freeze_cksum);
-	mutex_exit(&buf->b_hdr->b_freeze_lock);
+	mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
 #ifdef illumos
 	arc_buf_watch(buf);
 #endif
@@ -1245,30 +1418,58 @@ arc_buf_watch(arc_buf_t *buf)
 }
 #endif /* illumos */
 
+static arc_buf_contents_t
+arc_buf_type(arc_buf_hdr_t *hdr)
+{
+	if (HDR_ISTYPE_METADATA(hdr)) {
+		return (ARC_BUFC_METADATA);
+	} else {
+		return (ARC_BUFC_DATA);
+	}
+}
+
+static uint32_t
+arc_bufc_to_flags(arc_buf_contents_t type)
+{
+	switch (type) {
+	case ARC_BUFC_DATA:
+		/* metadata field is 0 if buffer contains normal data */
+		return (0);
+	case ARC_BUFC_METADATA:
+		return (ARC_FLAG_BUFC_METADATA);
+	default:
+		break;
+	}
+	panic("undefined ARC buffer type!");
+	return ((uint32_t)-1);
+}
+
 void
 arc_buf_thaw(arc_buf_t *buf)
 {
 	if (zfs_flags & ZFS_DEBUG_MODIFY) {
-		if (buf->b_hdr->b_state != arc_anon)
+		if (buf->b_hdr->b_l1hdr.b_state != arc_anon)
 			panic("modifying non-anon buffer!");
-		if (buf->b_hdr->b_flags & ARC_FLAG_IO_IN_PROGRESS)
+		if (HDR_IO_IN_PROGRESS(buf->b_hdr))
 			panic("modifying buffer while i/o in progress!");
 		arc_cksum_verify(buf);
 	}
 
-	mutex_enter(&buf->b_hdr->b_freeze_lock);
+	mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
 	if (buf->b_hdr->b_freeze_cksum != NULL) {
 		kmem_free(buf->b_hdr->b_freeze_cksum, sizeof (zio_cksum_t));
 		buf->b_hdr->b_freeze_cksum = NULL;
 	}
 
+#ifdef ZFS_DEBUG
 	if (zfs_flags & ZFS_DEBUG_MODIFY) {
-		if (buf->b_hdr->b_thawed)
-			kmem_free(buf->b_hdr->b_thawed, 1);
-		buf->b_hdr->b_thawed = kmem_alloc(1, KM_SLEEP);
+		if (buf->b_hdr->b_l1hdr.b_thawed != NULL)
+			kmem_free(buf->b_hdr->b_l1hdr.b_thawed, 1);
+		buf->b_hdr->b_l1hdr.b_thawed = kmem_alloc(1, KM_SLEEP);
 	}
+#endif
 
-	mutex_exit(&buf->b_hdr->b_freeze_lock);
+	mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
 
 #ifdef illumos
 	arc_buf_unwatch(buf);
@@ -1287,7 +1488,7 @@ arc_buf_freeze(arc_buf_t *buf)
 	mutex_enter(hash_lock);
 
 	ASSERT(buf->b_hdr->b_freeze_cksum != NULL ||
-	    buf->b_hdr->b_state == arc_anon);
+	    buf->b_hdr->b_l1hdr.b_state == arc_anon);
 	arc_cksum_compute(buf, B_FALSE);
 	mutex_exit(hash_lock);
 
@@ -1296,30 +1497,34 @@ arc_buf_freeze(arc_buf_t *buf)
 static void
 add_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
 {
+	ASSERT(HDR_HAS_L1HDR(hdr));
 	ASSERT(MUTEX_HELD(hash_lock));
+	arc_state_t *state = hdr->b_l1hdr.b_state;
+
+	if ((refcount_add(&hdr->b_l1hdr.b_refcnt, tag) == 1) &&
+	    (state != arc_anon)) {
+		/* We don't use the L2-only state list. */
+		if (state != arc_l2c_only) {
+			uint64_t delta = hdr->b_size * hdr->b_l1hdr.b_datacnt;
+			list_t *list = &state->arcs_list[arc_buf_type(hdr)];
+			uint64_t *size = &state->arcs_lsize[arc_buf_type(hdr)];
 
-	if ((refcount_add(&hdr->b_refcnt, tag) == 1) &&
-	    (hdr->b_state != arc_anon)) {
-		uint64_t delta = hdr->b_size * hdr->b_datacnt;
-		list_t *list = &hdr->b_state->arcs_list[hdr->b_type];
-		uint64_t *size = &hdr->b_state->arcs_lsize[hdr->b_type];
-
-		ASSERT(!MUTEX_HELD(&hdr->b_state->arcs_mtx));
-		mutex_enter(&hdr->b_state->arcs_mtx);
-		ASSERT(list_link_active(&hdr->b_arc_node));
-		list_remove(list, hdr);
-		if (GHOST_STATE(hdr->b_state)) {
-			ASSERT0(hdr->b_datacnt);
-			ASSERT3P(hdr->b_buf, ==, NULL);
-			delta = hdr->b_size;
-		}
-		ASSERT(delta > 0);
-		ASSERT3U(*size, >=, delta);
-		atomic_add_64(size, -delta);
-		mutex_exit(&hdr->b_state->arcs_mtx);
+			ASSERT(!MUTEX_HELD(&state->arcs_mtx));
+			mutex_enter(&state->arcs_mtx);
+			ASSERT(list_link_active(&hdr->b_l1hdr.b_arc_node));
+			list_remove(list, hdr);
+			if (GHOST_STATE(state)) {
+				ASSERT0(hdr->b_l1hdr.b_datacnt);
+				ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+				delta = hdr->b_size;
+			}
+			ASSERT(delta > 0);
+			ASSERT3U(*size, >=, delta);
+			atomic_add_64(size, -delta);
+			mutex_exit(&state->arcs_mtx);
+		}
 		/* remove the prefetch flag if we get a reference */
-		if (hdr->b_flags & ARC_FLAG_PREFETCH)
-			hdr->b_flags &= ~ARC_FLAG_PREFETCH;
+		hdr->b_flags &= ~ARC_FLAG_PREFETCH;
 	}
 }
 
@@ -1327,21 +1532,27 @@ static int
 remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
 {
 	int cnt;
-	arc_state_t *state = hdr->b_state;
+	arc_state_t *state = hdr->b_l1hdr.b_state;
 
+	ASSERT(HDR_HAS_L1HDR(hdr));
 	ASSERT(state == arc_anon || MUTEX_HELD(hash_lock));
 	ASSERT(!GHOST_STATE(state));
 
-	if (((cnt = refcount_remove(&hdr->b_refcnt, tag)) == 0) &&
+	/*
+	 * arc_l2c_only counts as a ghost state so we don't need to explicitly
+	 * check to prevent usage of the arc_l2c_only list.
+	 */
+	if (((cnt = refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) &&
 	    (state != arc_anon)) {
-		uint64_t *size = &state->arcs_lsize[hdr->b_type];
+		uint64_t *size = &state->arcs_lsize[arc_buf_type(hdr)];
 
 		ASSERT(!MUTEX_HELD(&state->arcs_mtx));
 		mutex_enter(&state->arcs_mtx);
-		ASSERT(!list_link_active(&hdr->b_arc_node));
-		list_insert_head(&state->arcs_list[hdr->b_type], hdr);
-		ASSERT(hdr->b_datacnt > 0);
-		atomic_add_64(size, hdr->b_size * hdr->b_datacnt);
+		ASSERT(!list_link_active(&hdr->b_l1hdr.b_arc_node));
+		list_insert_head(&state->arcs_list[arc_buf_type(hdr)], hdr);
+		ASSERT(hdr->b_l1hdr.b_datacnt > 0);
+		atomic_add_64(size, hdr->b_size *
+		    hdr->b_l1hdr.b_datacnt);
 		mutex_exit(&state->arcs_mtx);
 	}
 	return (cnt);
@@ -1355,40 +1566,60 @@ static void
 arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
     kmutex_t *hash_lock)
 {
-	arc_state_t *old_state = hdr->b_state;
-	int64_t refcnt = refcount_count(&hdr->b_refcnt);
+	arc_state_t *old_state;
+	int64_t refcnt;
+	uint32_t datacnt;
 	uint64_t from_delta, to_delta;
+	arc_buf_contents_t buftype = arc_buf_type(hdr);
+
+	/*
+	 * We almost always have an L1 hdr here, since we call arc_hdr_realloc()
+	 * in arc_read() when bringing a buffer out of the L2ARC.  However, the
+	 * L1 hdr doesn't always exist when we change state to arc_anon before
+	 * destroying a header, in which case reallocating to add the L1 hdr is
+	 * pointless.
+	 */
+	if (HDR_HAS_L1HDR(hdr)) {
+		old_state = hdr->b_l1hdr.b_state;
+		refcnt = refcount_count(&hdr->b_l1hdr.b_refcnt);
+		datacnt = hdr->b_l1hdr.b_datacnt;
+	} else {
+		old_state = arc_l2c_only;
+		refcnt = 0;
+		datacnt = 0;
+	}
 
 	ASSERT(MUTEX_HELD(hash_lock));
 	ASSERT3P(new_state, !=, old_state);
-	ASSERT(refcnt == 0 || hdr->b_datacnt > 0);
-	ASSERT(hdr->b_datacnt == 0 || !GHOST_STATE(new_state));
-	ASSERT(hdr->b_datacnt <= 1 || old_state != arc_anon);
+	ASSERT(refcnt == 0 || datacnt > 0);
+	ASSERT(!GHOST_STATE(new_state) || datacnt == 0);
+	ASSERT(old_state != arc_anon || datacnt <= 1);
 
-	from_delta = to_delta = hdr->b_datacnt * hdr->b_size;
+	from_delta = to_delta = datacnt * hdr->b_size;
 
 	/*
 	 * If this buffer is evictable, transfer it from the
 	 * old state list to the new state list.
 	 */
 	if (refcnt == 0) {
-		if (old_state != arc_anon) {
+		if (old_state != arc_anon && old_state != arc_l2c_only) {
 			int use_mutex = !MUTEX_HELD(&old_state->arcs_mtx);
-			uint64_t *size = &old_state->arcs_lsize[hdr->b_type];
+			uint64_t *size = &old_state->arcs_lsize[buftype];
 
 			if (use_mutex)
 				mutex_enter(&old_state->arcs_mtx);
 
-			ASSERT(list_link_active(&hdr->b_arc_node));
-			list_remove(&old_state->arcs_list[hdr->b_type], hdr);
+			ASSERT(HDR_HAS_L1HDR(hdr));
+			ASSERT(list_link_active(&hdr->b_l1hdr.b_arc_node));
+			list_remove(&old_state->arcs_list[buftype], hdr);
 
 			/*
 			 * If prefetching out of the ghost cache,
 			 * we will have a non-zero datacnt.
 			 */
-			if (GHOST_STATE(old_state) && hdr->b_datacnt == 0) {
+			if (GHOST_STATE(old_state) && datacnt == 0) {
 				/* ghost elements have a ghost size */
-				ASSERT(hdr->b_buf == NULL);
+				ASSERT(hdr->b_l1hdr.b_buf == NULL);
 				from_delta = hdr->b_size;
 			}
 			ASSERT3U(*size, >=, from_delta);
@@ -1397,20 +1628,26 @@ arc_change_state(arc_state_t *new_state,
 			if (use_mutex)
 				mutex_exit(&old_state->arcs_mtx);
 		}
-		if (new_state != arc_anon) {
+		if (new_state != arc_anon && new_state != arc_l2c_only) {
 			int use_mutex = !MUTEX_HELD(&new_state->arcs_mtx);
-			uint64_t *size = &new_state->arcs_lsize[hdr->b_type];
+			uint64_t *size = &new_state->arcs_lsize[buftype];
 
+			/*
+			 * An L1 header always exists here, since if we're
+			 * moving to some L1-cached state (i.e. not l2c_only or
+			 * anonymous), we realloc the header to add an L1hdr
+			 * beforehand.
+			 */
+			ASSERT(HDR_HAS_L1HDR(hdr));
 			if (use_mutex)
 				mutex_enter(&new_state->arcs_mtx);
 
-			list_insert_head(&new_state->arcs_list[hdr->b_type],
-			    hdr);
+			list_insert_head(&new_state->arcs_list[buftype], hdr);
 
 			/* ghost elements have a ghost size */
 			if (GHOST_STATE(new_state)) {
-				ASSERT(hdr->b_datacnt == 0);
-				ASSERT(hdr->b_buf == NULL);
+				ASSERT0(datacnt);
+				ASSERT(hdr->b_l1hdr.b_buf == NULL);
 				to_delta = hdr->b_size;
 			}
 			atomic_add_64(size, to_delta);
@@ -1424,20 +1661,22 @@ arc_change_state(arc_state_t *new_state,
 	if (new_state == arc_anon && HDR_IN_HASH_TABLE(hdr))
 		buf_hash_remove(hdr);
 
-	/* adjust state sizes */
-	if (to_delta)
+	/* adjust state sizes (ignore arc_l2c_only) */
+	if (to_delta && new_state != arc_l2c_only)
 		atomic_add_64(&new_state->arcs_size, to_delta);
-	if (from_delta) {
+	if (from_delta && old_state != arc_l2c_only) {
 		ASSERT3U(old_state->arcs_size, >=, from_delta);
 		atomic_add_64(&old_state->arcs_size, -from_delta);
 	}
-	hdr->b_state = new_state;
+	if (HDR_HAS_L1HDR(hdr))
+		hdr->b_l1hdr.b_state = new_state;
 
-	/* adjust l2arc hdr stats */
-	if (new_state == arc_l2c_only)
-		l2arc_hdr_stat_add();
-	else if (old_state == arc_l2c_only)
-		l2arc_hdr_stat_remove();
+	/*
+	 * L2 headers should never be on the L2 state list since they don't
+	 * have L1 headers allocated.
+	 */
+	ASSERT(list_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]) &&
+	    list_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]));
 }
 
 void
@@ -1493,31 +1732,36 @@ arc_space_return(uint64_t space, arc_spa
 }
 
 arc_buf_t *
-arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
+arc_buf_alloc(spa_t *spa, int32_t size, void *tag, arc_buf_contents_t type)
 {
 	arc_buf_hdr_t *hdr;
 	arc_buf_t *buf;
 
 	ASSERT3U(size, >, 0);
-	hdr = kmem_cache_alloc(hdr_cache, KM_PUSHPAGE);
+	hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
 	ASSERT(BUF_EMPTY(hdr));
+	ASSERT3P(hdr->b_freeze_cksum, ==, NULL);
 	hdr->b_size = size;
-	hdr->b_type = type;
 	hdr->b_spa = spa_load_guid(spa);
-	hdr->b_state = arc_anon;
-	hdr->b_arc_access = 0;
+
 	buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
 	buf->b_hdr = hdr;
 	buf->b_data = NULL;
 	buf->b_efunc = NULL;
 	buf->b_private = NULL;
 	buf->b_next = NULL;
-	hdr->b_buf = buf;
+
+	hdr->b_flags = arc_bufc_to_flags(type);
+	hdr->b_flags |= ARC_FLAG_HAS_L1HDR;
+
+	hdr->b_l1hdr.b_buf = buf;
+	hdr->b_l1hdr.b_state = arc_anon;
+	hdr->b_l1hdr.b_arc_access = 0;
+	hdr->b_l1hdr.b_datacnt = 1;
+
 	arc_get_data_buf(buf);
-	hdr->b_datacnt = 1;
-	hdr->b_flags = 0;
-	ASSERT(refcount_is_zero(&hdr->b_refcnt));
-	(void) refcount_add(&hdr->b_refcnt, tag);
+	ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
+	(void) refcount_add(&hdr->b_l1hdr.b_refcnt, tag);
 
 	return (buf);
 }
@@ -1550,8 +1794,9 @@ arc_return_buf(arc_buf_t *buf, void *tag
 	arc_buf_hdr_t *hdr = buf->b_hdr;
 
 	ASSERT(buf->b_data != NULL);
-	(void) refcount_add(&hdr->b_refcnt, tag);
-	(void) refcount_remove(&hdr->b_refcnt, arc_onloan_tag);
+	ASSERT(HDR_HAS_L1HDR(hdr));
+	(void) refcount_add(&hdr->b_l1hdr.b_refcnt, tag);
+	(void) refcount_remove(&hdr->b_l1hdr.b_refcnt, arc_onloan_tag);
 
 	atomic_add_64(&arc_loaned_bytes, -hdr->b_size);
 }
@@ -1560,12 +1805,12 @@ arc_return_buf(arc_buf_t *buf, void *tag
 void
 arc_loan_inuse_buf(arc_buf_t *buf, void *tag)
 {
-	arc_buf_hdr_t *hdr;
+	arc_buf_hdr_t *hdr = buf->b_hdr;
 
 	ASSERT(buf->b_data != NULL);
-	hdr = buf->b_hdr;
-	(void) refcount_add(&hdr->b_refcnt, arc_onloan_tag);
-	(void) refcount_remove(&hdr->b_refcnt, tag);
+	ASSERT(HDR_HAS_L1HDR(hdr));
+	(void) refcount_add(&hdr->b_l1hdr.b_refcnt, arc_onloan_tag);
+	(void) refcount_remove(&hdr->b_l1hdr.b_refcnt, tag);
 	buf->b_efunc = NULL;
 	buf->b_private = NULL;
 
@@ -1579,15 +1824,16 @@ arc_buf_clone(arc_buf_t *from)
 	arc_buf_hdr_t *hdr = from->b_hdr;
 	uint64_t size = hdr->b_size;
 
-	ASSERT(hdr->b_state != arc_anon);
+	ASSERT(HDR_HAS_L1HDR(hdr));
+	ASSERT(hdr->b_l1hdr.b_state != arc_anon);
 
 	buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
 	buf->b_hdr = hdr;
 	buf->b_data = NULL;
 	buf->b_efunc = NULL;
 	buf->b_private = NULL;
-	buf->b_next = hdr->b_buf;
-	hdr->b_buf = buf;
+	buf->b_next = hdr->b_l1hdr.b_buf;
+	hdr->b_l1hdr.b_buf = buf;
 	arc_get_data_buf(buf);
 	bcopy(from->b_data, buf->b_data, size);
 
@@ -1597,11 +1843,11 @@ arc_buf_clone(arc_buf_t *from)
 	 * then track the size and number of duplicates.  These stats will be
 	 * updated as duplicate buffers are created and destroyed.
 	 */
-	if (hdr->b_type == ARC_BUFC_DATA) {
+	if (HDR_ISTYPE_DATA(hdr)) {
 		ARCSTAT_BUMP(arcstat_duplicate_buffers);
 		ARCSTAT_INCR(arcstat_duplicate_buffers_size, size);
 	}
-	hdr->b_datacnt += 1;
+	hdr->b_l1hdr.b_datacnt += 1;
 	return (buf);
 }
 
@@ -1624,17 +1870,20 @@ arc_buf_add_ref(arc_buf_t *buf, void* ta
 	hash_lock = HDR_LOCK(buf->b_hdr);
 	mutex_enter(hash_lock);
 	hdr = buf->b_hdr;
+	ASSERT(HDR_HAS_L1HDR(hdr));
 	ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
 	mutex_exit(&buf->b_evict_lock);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***