svn commit: r319950 - vendor-sys/illumos/dist/uts/common/fs/zfs
Andriy Gapon
avg at FreeBSD.org
Wed Jun 14 16:44:12 UTC 2017
Author: avg
Date: Wed Jun 14 16:44:10 2017
New Revision: 319950
URL: https://svnweb.freebsd.org/changeset/base/319950
Log:
5220 L2ARC does not support devices that do not provide 512B access
illumos/illumos-gate at 403a8da73c64ff9dfb6230ba045c765a242213fb
https://github.com/illumos/illumos-gate/commit/403a8da73c64ff9dfb6230ba045c765a242213fb
https://www.illumos.org/issues/5220
There are disk devices that have logical sector size larger than 512B, for
example 4KB. That is, their physical sector size is larger than 512B and they
do not provide emulation for 512B sector sizes. For such devices both a data
offset and a data size must be properly aligned. L2ARC should arrange that
because it uses physical I/O.
zio_vdev_io_start() performs a necessary transformation if io_size is not
aligned to vdev_ashift, but that is done only for logical I/O. Something
similar should be done in L2ARC code.
* a temporary write buffer should be allocated if the original buffer is
not going to be compressed and its size is not aligned
* size of a temporary compression buffer should be ashift aligned
* for the reads, if a size of a target buffer is not sufficiently large and
it is not aligned then a temporary read buffer should be allocated
Reviewed by: George Wilson <george.wilson at delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel at delphix.com>
Reviewed by: Saso Kiselkov <saso.kiselkov at nexenta.com>
Approved by: Dan McDonald <danmcd at joyent.com>
Author: Andriy Gapon <avg at FreeBSD.org>
Modified:
vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c
Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c Wed Jun 14 16:42:38 2017 (r319949)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c Wed Jun 14 16:44:10 2017 (r319950)
@@ -1075,6 +1075,7 @@ typedef struct l2arc_read_callback {
blkptr_t l2rcb_bp; /* original blkptr */
zbookmark_phys_t l2rcb_zb; /* original bookmark */
int l2rcb_flags; /* original flags */
+ abd_t *l2rcb_abd; /* temporary buffer */
} l2arc_read_callback_t;
typedef struct l2arc_write_callback {
@@ -5048,6 +5049,8 @@ top:
!HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) &&
!(l2arc_noprefetch && HDR_PREFETCH(hdr))) {
l2arc_read_callback_t *cb;
+ abd_t *abd;
+ uint64_t asize;
DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
ARCSTAT_BUMP(arcstat_l2_hits);
@@ -5059,8 +5062,17 @@ top:
cb->l2rcb_zb = *zb;
cb->l2rcb_flags = zio_flags;
+ asize = vdev_psize_to_asize(vd, size);
+ if (asize != size) {
+ abd = abd_alloc_for_io(asize,
+ HDR_ISTYPE_METADATA(hdr));
+ cb->l2rcb_abd = abd;
+ } else {
+ abd = hdr->b_l1hdr.b_pabd;
+ }
+
ASSERT(addr >= VDEV_LABEL_START_SIZE &&
- addr + lsize < vd->vdev_psize -
+ addr + asize <= vd->vdev_psize -
VDEV_LABEL_END_SIZE);
/*
@@ -5072,7 +5084,7 @@ top:
ASSERT3U(HDR_GET_COMPRESS(hdr), !=,
ZIO_COMPRESS_EMPTY);
rzio = zio_read_phys(pio, vd, addr,
- size, hdr->b_l1hdr.b_pabd,
+ asize, abd,
ZIO_CHECKSUM_OFF,
l2arc_read_done, cb, priority,
zio_flags | ZIO_FLAG_DONT_CACHE |
@@ -6566,6 +6578,33 @@ l2arc_read_done(zio_t *zio)
mutex_enter(hash_lock);
ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
+ /*
+ * If the data was read into a temporary buffer,
+ * move it and free the buffer.
+ */
+ if (cb->l2rcb_abd != NULL) {
+ ASSERT3U(arc_hdr_size(hdr), <, zio->io_size);
+ if (zio->io_error == 0) {
+ abd_copy(hdr->b_l1hdr.b_pabd, cb->l2rcb_abd,
+ arc_hdr_size(hdr));
+ }
+
+ /*
+ * The following must be done regardless of whether
+ * there was an error:
+ * - free the temporary buffer
+ * - point zio to the real ARC buffer
+ * - set zio size accordingly
+ * These are required because zio is either re-used for
+ * an I/O of the block in the case of the error
+ * or the zio is passed to arc_read_done() and it
+ * needs real data.
+ */
+ abd_free(cb->l2rcb_abd);
+ zio->io_size = zio->io_orig_size = arc_hdr_size(hdr);
+ zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd;
+ }
+
ASSERT3P(zio->io_abd, !=, NULL);
/*
@@ -6903,23 +6942,34 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint
* Normally the L2ARC can use the hdr's data, but if
* we're sharing data between the hdr and one of its
* bufs, L2ARC needs its own copy of the data so that
- * the ZIO below can't race with the buf consumer. To
- * ensure that this copy will be available for the
+ * the ZIO below can't race with the buf consumer.
+ * Another case where we need to create a copy of the
+ * data is when the buffer size is not device-aligned
+ * and we need to pad the block to make it such.
+ * That also keeps the clock hand suitably aligned.
+ *
+ * To ensure that the copy will be available for the
* lifetime of the ZIO and be cleaned up afterwards, we
* add it to the l2arc_free_on_write queue.
*/
+ uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev,
+ size);
abd_t *to_write;
- if (!HDR_SHARED_DATA(hdr)) {
+ if (!HDR_SHARED_DATA(hdr) && size == asize) {
to_write = hdr->b_l1hdr.b_pabd;
} else {
- to_write = abd_alloc_for_io(size,
+ to_write = abd_alloc_for_io(asize,
HDR_ISTYPE_METADATA(hdr));
abd_copy(to_write, hdr->b_l1hdr.b_pabd, size);
+ if (asize != size) {
+ abd_zero_off(to_write, size,
+ asize - size);
+ }
l2arc_free_abd_on_write(to_write, size,
arc_buf_type(hdr));
}
wzio = zio_write_phys(pio, dev->l2ad_vdev,
- hdr->b_l2hdr.b_daddr, size, to_write,
+ hdr->b_l2hdr.b_daddr, asize, to_write,
ZIO_CHECKSUM_OFF, NULL, hdr,
ZIO_PRIORITY_ASYNC_WRITE,
ZIO_FLAG_CANFAIL, B_FALSE);
@@ -6929,11 +6979,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint
zio_t *, wzio);
write_asize += size;
- /*
- * Keep the clock hand suitably device-aligned.
- */
- uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev,
- size);
write_psize += asize;
dev->l2ad_hand += asize;
More information about the svn-src-vendor
mailing list