svn commit: r287722 - in projects/iosched: cddl/contrib/opensolaris/cmd/sgs/tools/common share/man/man4 sys/cam/ctl sys/cam/scsi sys/cddl/contrib/opensolaris/common/avl sys/cddl/contrib/opensolaris...
Warner Losh
imp at FreeBSD.org
Sat Sep 12 20:14:59 UTC 2015
Author: imp
Date: Sat Sep 12 20:14:54 2015
New Revision: 287722
URL: https://svnweb.freebsd.org/changeset/base/287722
Log:
MFC at 287721
Modified:
projects/iosched/cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c
projects/iosched/share/man/man4/ctl.4
projects/iosched/share/man/man4/geom_fox.4
projects/iosched/sys/cam/ctl/README.ctl.txt
projects/iosched/sys/cam/ctl/ctl.c
projects/iosched/sys/cam/ctl/ctl_cmd_table.c
projects/iosched/sys/cam/ctl/ctl_tpc.c
projects/iosched/sys/cam/ctl/ctl_tpc_local.c
projects/iosched/sys/cam/scsi/scsi_all.c
projects/iosched/sys/cddl/contrib/opensolaris/common/avl/avl.c
projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c
projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_zfetch.h
projects/iosched/sys/dev/dwc/if_dwc.c
projects/iosched/sys/netinet/sctp.h
projects/iosched/sys/netinet/sctp_auth.c
projects/iosched/sys/netinet/sctp_header.h
projects/iosched/sys/netinet/sctp_indata.c
projects/iosched/sys/netinet/sctp_input.c
projects/iosched/usr.sbin/ctladm/ctladm.8
projects/iosched/usr.sbin/pw/pw_user.c
Directory Properties:
projects/iosched/ (props changed)
projects/iosched/cddl/ (props changed)
projects/iosched/cddl/contrib/opensolaris/ (props changed)
projects/iosched/share/ (props changed)
projects/iosched/share/man/man4/ (props changed)
projects/iosched/sys/ (props changed)
projects/iosched/sys/cddl/contrib/opensolaris/ (props changed)
Modified: projects/iosched/cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c
==============================================================================
--- projects/iosched/cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c Sat Sep 12 20:06:22 2015 (r287721)
+++ projects/iosched/cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c Sat Sep 12 20:14:54 2015 (r287722)
@@ -132,6 +132,8 @@ typedef struct msg_string {
static msg_string *msg_head;
static msg_string *msg_tail;
+int aok;
+
/*
* message_append() is responsible for both inserting strings into
* the master Str_tbl as well as maintaining a list of the
Modified: projects/iosched/share/man/man4/ctl.4
==============================================================================
--- projects/iosched/share/man/man4/ctl.4 Sat Sep 12 20:06:22 2015 (r287721)
+++ projects/iosched/share/man/man4/ctl.4 Sat Sep 12 20:14:54 2015 (r287722)
@@ -1,4 +1,5 @@
.\" Copyright (c) 2013 Edward Tomasz Napierala
+.\" Copyright (c) 2015 Alexander Motin <mav at FreeBSD.org>
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
@@ -23,7 +24,7 @@
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
-.Dd August 9, 2015
+.Dd September 12, 2015
.Dt CTL 4
.Os
.Sh NAME
@@ -80,6 +81,8 @@ Mode sense/select support
.It
Error injection support
.It
+High Availability clustering support with ALUA
+.It
All I/O handled in-kernel, no userland context switch overhead
.El
.Pp
@@ -99,9 +102,57 @@ log commands with errors;
.It 2
log all commands;
.It 4
-log received data for commands except READ/WRITE.
+log data for commands other then READ/WRITE.
.El
Defaults to 0.
+.It Va kern.cam.ctl.ha_id
+Specifies unique position of this node within High Availability cluster.
+Default is 0 -- no HA, 1 and 2 -- HA enabled at specified position.
+.It Va kern.cam.ctl.ha_mode
+Specifies High Availability cluster operation mode:
+.Bl -tag -offset indent -compact
+.It 0
+Active/Standby -- primary node has backend access and processes requests,
+while secondary can only do basic LUN discovery and reservation;
+.It 1
+Active/Active -- both nodes have backend access and process requests,
+while secondary node synchronizes processing with primary one;
+.It 2
+Active/Active -- primary node has backend access and processes requests,
+while secondary node forwards all requests and data to primary one;
+.El
+All above modes require established connection between HA cluster nodes.
+If connection is not configured, secondary node will report Unavailable
+state; if configured but not established -- Transitioning state.
+Defaults to 0.
+.It Va kern.cam.ctl.ha_peer
+String value, specifying method to establish connection to peer HA node.
+Can be "listen IP:port", "connect IP:port" or empty.
+.It Va kern.cam.ctl.ha_link
+Reports present state of connection between HA cluster nodes:
+.Bl -tag -offset indent -compact
+.It 0
+not configured;
+.It 1
+configured but not established;
+.It 2
+established.
+.El
+.It Va kern.cam.ctl.ha_role
+Specifies default role of this node:
+.Bl -tag -offset indent -compact
+.It 0
+primary;
+.It 1
+secondary.
+.El
+This role can be overriden on per-LUN basis using "ha_role" LUN option,
+so that for one LUN one node is primary, while for another -- another.
+Role change from primary to secondary for HA modes 0 and 2 closes backends,
+the opposite change -- opens.
+If there is no primary node (both nodes are secondary, or secondary node has
+no connection to primary one), secondary node(s) report Transitioning state.
+State with two primary nodes is illegal (split brain condition).
.It Va kern.cam.ctl.iscsi.debug
Verbosity level for log messages from the kernel part of iSCSI target.
Set to 0 to disable logging or 1 to warn about potential problems.
@@ -132,5 +183,7 @@ subsystem first appeared in
.Sh AUTHORS
The
.Nm
-subsystem was written by
+subsystem was originally written by
.An Kenneth Merry Aq Mt ken at FreeBSD.org .
+Later work was done by
+.An Alexander Motin Aq Mt mav at FreeBSD.org .
Modified: projects/iosched/share/man/man4/geom_fox.4
==============================================================================
--- projects/iosched/share/man/man4/geom_fox.4 Sat Sep 12 20:06:22 2015 (r287721)
+++ projects/iosched/share/man/man4/geom_fox.4 Sat Sep 12 20:14:54 2015 (r287722)
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd January 2, 2005
+.Dd September 12, 2015
.Dt GEOM_FOX 4
.Os
.Sh NAME
@@ -46,6 +46,13 @@ module at boot time, place the following
geom_fox_load="YES"
.Ed
.Sh DESCRIPTION
+.Bf -symbolic
+This driver is obsolete.
+Users are advised to use
+.Xr gmultipath 8
+instead.
+.Ef
+.Pp
The intent of the
.Nm
framework is to provide basic multipathing support to access direct
Modified: projects/iosched/sys/cam/ctl/README.ctl.txt
==============================================================================
--- projects/iosched/sys/cam/ctl/README.ctl.txt Sat Sep 12 20:06:22 2015 (r287721)
+++ projects/iosched/sys/cam/ctl/README.ctl.txt Sat Sep 12 20:14:54 2015 (r287722)
@@ -40,25 +40,24 @@ Features:
- Support for multiple ports
- Support for multiple simultaneous initiators
- Support for multiple simultaneous backing stores
+ - Support for VMWare VAAI: COMPARE AND WRITE, XCOPY, WRITE SAME and
+ UNMAP commands
+ - Support for Microsoft ODX: POPULATE TOKEN/WRITE USING TOKEN, WRITE SAME
+ and UNMAP commands
- Persistent reservation support
- Mode sense/select support
- Error injection support
- - High Availability support
+ - High Availability clustering support with ALUA
- All I/O handled in-kernel, no userland context switch overhead.
Configuring and Running CTL:
===========================
- - After applying the CTL patchset to your tree, build world and install it
- on your target system.
-
- - Add 'device ctl' to your kernel configuration file.
+ - Add 'device ctl' to your kernel configuration file or load the module.
- If you're running with a 8Gb or 4Gb Qlogic FC board, add
- 'options ISP_TARGET_MODE' to your kernel config file. Keep in mind that
- the isp(4) driver can run in target or initiator mode, but not both on
- the same machine. 'device ispfw' or loading the ispfw module is also
- recommended.
+ 'options ISP_TARGET_MODE' to your kernel config file. 'device ispfw' or
+ loading the ispfw module is also recommended.
- Rebuild and install a new kernel.
Modified: projects/iosched/sys/cam/ctl/ctl.c
==============================================================================
--- projects/iosched/sys/cam/ctl/ctl.c Sat Sep 12 20:06:22 2015 (r287721)
+++ projects/iosched/sys/cam/ctl/ctl.c Sat Sep 12 20:14:54 2015 (r287722)
@@ -673,7 +673,10 @@ ctl_isc_ha_link_down(struct ctl_softc *s
mtx_lock(&softc->ctl_lock);
STAILQ_FOREACH(lun, &softc->lun_list, links) {
mtx_lock(&lun->lun_lock);
- lun->flags &= ~CTL_LUN_PEER_SC_PRIMARY;
+ if (lun->flags & CTL_LUN_PEER_SC_PRIMARY) {
+ lun->flags &= ~CTL_LUN_PEER_SC_PRIMARY;
+ ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
+ }
mtx_unlock(&lun->lun_lock);
mtx_unlock(&softc->ctl_lock);
@@ -700,8 +703,11 @@ ctl_isc_ua(struct ctl_softc *softc, unio
struct ctl_lun *lun;
uint32_t iid = ctl_get_initindex(&msg->hdr.nexus);
+ mtx_lock(&softc->ctl_lock);
if (msg->hdr.nexus.targ_lun < CTL_MAX_LUNS &&
- (lun = softc->ctl_luns[msg->hdr.nexus.targ_lun]) != NULL) {
+ (lun = softc->ctl_luns[msg->hdr.nexus.targ_mapped_lun]) != NULL) {
+ mtx_lock(&lun->lun_lock);
+ mtx_unlock(&softc->ctl_lock);
if (msg->ua.ua_all) {
if (msg->ua.ua_set)
ctl_est_ua_all(lun, iid, msg->ua.ua_type);
@@ -713,7 +719,9 @@ ctl_isc_ua(struct ctl_softc *softc, unio
else
ctl_clr_ua(lun, iid, msg->ua.ua_type);
}
- }
+ mtx_unlock(&lun->lun_lock);
+ } else
+ mtx_unlock(&softc->ctl_lock);
}
static void
@@ -722,58 +730,69 @@ ctl_isc_lun_sync(struct ctl_softc *softc
struct ctl_lun *lun;
struct ctl_ha_msg_lun_pr_key pr_key;
int i, k;
+ ctl_lun_flags oflags;
+ uint32_t targ_lun;
- lun = softc->ctl_luns[msg->hdr.nexus.targ_lun];
- if (lun == NULL) {
- CTL_DEBUG_PRINT(("%s: Unknown LUN %d\n", __func__,
- msg->hdr.nexus.targ_lun));
+ targ_lun = msg->hdr.nexus.targ_mapped_lun;
+ mtx_lock(&softc->ctl_lock);
+ if ((targ_lun >= CTL_MAX_LUNS) ||
+ ((lun = softc->ctl_luns[targ_lun]) == NULL)) {
+ mtx_unlock(&softc->ctl_lock);
+ return;
+ }
+ mtx_lock(&lun->lun_lock);
+ mtx_unlock(&softc->ctl_lock);
+ if (lun->flags & CTL_LUN_DISABLED) {
+ mtx_unlock(&lun->lun_lock);
+ return;
+ }
+ i = (lun->lun_devid != NULL) ? lun->lun_devid->len : 0;
+ if (msg->lun.lun_devid_len != i || (i > 0 &&
+ memcmp(&msg->lun.data[0], lun->lun_devid->data, i) != 0)) {
+ mtx_unlock(&lun->lun_lock);
+ printf("%s: Received conflicting HA LUN %d\n",
+ __func__, msg->hdr.nexus.targ_lun);
+ return;
} else {
- mtx_lock(&lun->lun_lock);
- i = (lun->lun_devid != NULL) ? lun->lun_devid->len : 0;
- if (msg->lun.lun_devid_len != i || (i > 0 &&
- memcmp(&msg->lun.data[0], lun->lun_devid->data, i) != 0)) {
- mtx_unlock(&lun->lun_lock);
- printf("%s: Received conflicting HA LUN %d\n",
- __func__, msg->hdr.nexus.targ_lun);
- return;
- } else {
- /* Record whether peer is primary. */
- if ((msg->lun.flags & CTL_LUN_PRIMARY_SC) &&
- (msg->lun.flags & CTL_LUN_DISABLED) == 0)
- lun->flags |= CTL_LUN_PEER_SC_PRIMARY;
- else
- lun->flags &= ~CTL_LUN_PEER_SC_PRIMARY;
-
- /* If peer is primary and we are not -- use data */
- if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0 &&
- (lun->flags & CTL_LUN_PEER_SC_PRIMARY)) {
- lun->PRGeneration = msg->lun.pr_generation;
- lun->pr_res_idx = msg->lun.pr_res_idx;
- lun->res_type = msg->lun.pr_res_type;
- lun->pr_key_count = msg->lun.pr_key_count;
- for (k = 0; k < CTL_MAX_INITIATORS; k++)
- ctl_clr_prkey(lun, k);
- for (k = 0; k < msg->lun.pr_key_count; k++) {
- memcpy(&pr_key, &msg->lun.data[i],
- sizeof(pr_key));
- ctl_alloc_prkey(lun, pr_key.pr_iid);
- ctl_set_prkey(lun, pr_key.pr_iid,
- pr_key.pr_key);
- i += sizeof(pr_key);
- }
+ /* Record whether peer is primary. */
+ oflags = lun->flags;
+ if ((msg->lun.flags & CTL_LUN_PRIMARY_SC) &&
+ (msg->lun.flags & CTL_LUN_DISABLED) == 0)
+ lun->flags |= CTL_LUN_PEER_SC_PRIMARY;
+ else
+ lun->flags &= ~CTL_LUN_PEER_SC_PRIMARY;
+ if (oflags != lun->flags)
+ ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
+
+ /* If peer is primary and we are not -- use data */
+ if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0 &&
+ (lun->flags & CTL_LUN_PEER_SC_PRIMARY)) {
+ lun->PRGeneration = msg->lun.pr_generation;
+ lun->pr_res_idx = msg->lun.pr_res_idx;
+ lun->res_type = msg->lun.pr_res_type;
+ lun->pr_key_count = msg->lun.pr_key_count;
+ for (k = 0; k < CTL_MAX_INITIATORS; k++)
+ ctl_clr_prkey(lun, k);
+ for (k = 0; k < msg->lun.pr_key_count; k++) {
+ memcpy(&pr_key, &msg->lun.data[i],
+ sizeof(pr_key));
+ ctl_alloc_prkey(lun, pr_key.pr_iid);
+ ctl_set_prkey(lun, pr_key.pr_iid,
+ pr_key.pr_key);
+ i += sizeof(pr_key);
}
-
- mtx_unlock(&lun->lun_lock);
- CTL_DEBUG_PRINT(("%s: Known LUN %d, peer is %s\n",
- __func__, msg->hdr.nexus.targ_lun,
- (msg->lun.flags & CTL_LUN_PRIMARY_SC) ?
- "primary" : "secondary"));
-
- /* If we are primary but peer doesn't know -- notify */
- if ((lun->flags & CTL_LUN_PRIMARY_SC) &&
- (msg->lun.flags & CTL_LUN_PEER_SC_PRIMARY) == 0)
- ctl_isc_announce_lun(lun);
}
+
+ mtx_unlock(&lun->lun_lock);
+ CTL_DEBUG_PRINT(("%s: Known LUN %d, peer is %s\n",
+ __func__, msg->hdr.nexus.targ_lun,
+ (msg->lun.flags & CTL_LUN_PRIMARY_SC) ?
+ "primary" : "secondary"));
+
+ /* If we are primary but peer doesn't know -- notify */
+ if ((lun->flags & CTL_LUN_PRIMARY_SC) &&
+ (msg->lun.flags & CTL_LUN_PEER_SC_PRIMARY) == 0)
+ ctl_isc_announce_lun(lun);
}
}
@@ -1730,20 +1749,24 @@ ctl_serialize_other_sc_cmd(struct ctl_sc
softc = control_softc;
targ_lun = ctsio->io_hdr.nexus.targ_mapped_lun;
+ mtx_lock(&softc->ctl_lock);
if ((targ_lun < CTL_MAX_LUNS) &&
((lun = softc->ctl_luns[targ_lun]) != NULL)) {
+ mtx_lock(&lun->lun_lock);
+ mtx_unlock(&softc->ctl_lock);
/*
* If the LUN is invalid, pretend that it doesn't exist.
* It will go away as soon as all pending I/O has been
* completed.
*/
- mtx_lock(&lun->lun_lock);
if (lun->flags & CTL_LUN_DISABLED) {
mtx_unlock(&lun->lun_lock);
lun = NULL;
}
- } else
+ } else {
+ mtx_unlock(&softc->ctl_lock);
lun = NULL;
+ }
if (lun == NULL) {
/*
* The other node would not send this request to us unless
@@ -2514,6 +2537,7 @@ ctl_ioctl(struct cdev *dev, u_long cmd,
if (lun == NULL) {
mtx_unlock(&softc->ctl_lock);
sync_info->status = CTL_GS_SYNC_NO_LUN;
+ break;
}
/*
* Get or set the sync interval. We're not bounds checking
@@ -4531,8 +4555,8 @@ ctl_lun_primary(struct ctl_be_lun *be_lu
mtx_lock(&lun->lun_lock);
lun->flags |= CTL_LUN_PRIMARY_SC;
- mtx_unlock(&lun->lun_lock);
ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
+ mtx_unlock(&lun->lun_lock);
ctl_isc_announce_lun(lun);
return (0);
}
@@ -4544,8 +4568,8 @@ ctl_lun_secondary(struct ctl_be_lun *be_
mtx_lock(&lun->lun_lock);
lun->flags &= ~CTL_LUN_PRIMARY_SC;
- mtx_unlock(&lun->lun_lock);
ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
+ mtx_unlock(&lun->lun_lock);
ctl_isc_announce_lun(lun);
return (0);
}
@@ -8380,13 +8404,23 @@ ctl_hndl_per_res_out_on_other_sc(union c
struct ctl_lun *lun;
struct ctl_softc *softc;
int i;
- uint32_t targ_lun;
+ uint32_t residx, targ_lun;
softc = control_softc;
-
targ_lun = msg->hdr.nexus.targ_mapped_lun;
- lun = softc->ctl_luns[targ_lun];
+ mtx_lock(&softc->ctl_lock);
+ if ((targ_lun >= CTL_MAX_LUNS) ||
+ ((lun = softc->ctl_luns[targ_lun]) == NULL)) {
+ mtx_unlock(&softc->ctl_lock);
+ return;
+ }
mtx_lock(&lun->lun_lock);
+ mtx_unlock(&softc->ctl_lock);
+ if (lun->flags & CTL_LUN_DISABLED) {
+ mtx_unlock(&lun->lun_lock);
+ return;
+ }
+ residx = ctl_get_initindex(&msg->hdr.nexus);
switch(msg->pr.pr_info.action) {
case CTL_PR_REG_KEY:
ctl_alloc_prkey(lun, msg->pr.pr_info.residx);
@@ -8451,8 +8485,9 @@ ctl_hndl_per_res_out_on_other_sc(union c
if (lun->res_type != SPR_TYPE_EX_AC
&& lun->res_type != SPR_TYPE_WR_EX) {
for (i = softc->init_min; i < softc->init_max; i++)
- if (ctl_get_prkey(lun, i) != 0)
- ctl_est_ua(lun, i, CTL_UA_RES_RELEASE);
+ if (i == residx || ctl_get_prkey(lun, i) == 0)
+ continue;
+ ctl_est_ua(lun, i, CTL_UA_RES_RELEASE);
}
lun->flags &= ~CTL_LUN_PR_RESERVED;
@@ -11481,13 +11516,24 @@ ctl_i_t_nexus_reset(union ctl_io *io)
struct ctl_lun *lun;
uint32_t initidx;
+ if (!(io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)) {
+ union ctl_ha_msg msg_info;
+
+ msg_info.hdr.nexus = io->io_hdr.nexus;
+ msg_info.task.task_action = CTL_TASK_I_T_NEXUS_RESET;
+ msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS;
+ msg_info.hdr.original_sc = NULL;
+ msg_info.hdr.serializing_sc = NULL;
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
+ sizeof(msg_info.task), M_WAITOK);
+ }
+
initidx = ctl_get_initindex(&io->io_hdr.nexus);
mtx_lock(&softc->ctl_lock);
STAILQ_FOREACH(lun, &softc->lun_list, links) {
mtx_lock(&lun->lun_lock);
ctl_abort_tasks_lun(lun, io->io_hdr.nexus.targ_port,
- io->io_hdr.nexus.initid,
- (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) != 0);
+ io->io_hdr.nexus.initid, 1);
#ifdef CTL_WITH_CA
ctl_clear_mask(lun->have_ca, initidx);
#endif
Modified: projects/iosched/sys/cam/ctl/ctl_cmd_table.c
==============================================================================
--- projects/iosched/sys/cam/ctl/ctl_cmd_table.c Sat Sep 12 20:06:22 2015 (r287721)
+++ projects/iosched/sys/cam/ctl/ctl_cmd_table.c Sat Sep 12 20:14:54 2015 (r287722)
@@ -486,7 +486,7 @@ const struct ctl_cmd_entry ctl_cmd_table
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
- 12, {0x0a, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
+ 12, {0xea, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
/* 0B */
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
Modified: projects/iosched/sys/cam/ctl/ctl_tpc.c
==============================================================================
--- projects/iosched/sys/cam/ctl/ctl_tpc.c Sat Sep 12 20:06:22 2015 (r287721)
+++ projects/iosched/sys/cam/ctl/ctl_tpc.c Sat Sep 12 20:14:54 2015 (r287722)
@@ -394,8 +394,7 @@ ctl_inquiry_evpd_tpc(struct ctl_scsiio *
scsi_ulto2b(0, rtfb_ptr->optimal_length_granularity);
scsi_u64to8b(0, rtfb_ptr->maximum_bytes);
scsi_u64to8b(0, rtfb_ptr->optimal_bytes);
- scsi_u64to8b(TPC_MAX_IOCHUNK_SIZE,
- rtfb_ptr->optimal_bytes_to_token_per_segment);
+ scsi_u64to8b(UINT64_MAX, rtfb_ptr->optimal_bytes_to_token_per_segment);
scsi_u64to8b(TPC_MAX_IOCHUNK_SIZE,
rtfb_ptr->optimal_bytes_from_token_per_segment);
@@ -1590,6 +1589,10 @@ ctl_extended_copy_lid1(struct ctl_scsiio
cdb = (struct scsi_extended_copy *)ctsio->cdb;
len = scsi_4btoul(cdb->length);
+ if (len == 0) {
+ ctl_set_success(ctsio);
+ goto done;
+ }
if (len < sizeof(struct scsi_extended_copy_lid1_data) ||
len > sizeof(struct scsi_extended_copy_lid1_data) +
TPC_MAX_LIST + TPC_MAX_INLINE) {
@@ -1620,20 +1623,22 @@ ctl_extended_copy_lid1(struct ctl_scsiio
lencscd = scsi_2btoul(data->cscd_list_length);
lenseg = scsi_4btoul(data->segment_list_length);
leninl = scsi_4btoul(data->inline_data_length);
- if (len < sizeof(struct scsi_extended_copy_lid1_data) +
- lencscd + lenseg + leninl ||
- leninl > TPC_MAX_INLINE) {
- ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0,
- /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0);
- goto done;
- }
if (lencscd > TPC_MAX_CSCDS * sizeof(struct scsi_ec_cscd)) {
ctl_set_sense(ctsio, /*current_error*/ 1,
/*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
/*asc*/ 0x26, /*ascq*/ 0x06, SSD_ELEM_NONE);
goto done;
}
- if (lencscd + lenseg > TPC_MAX_LIST) {
+ if (lenseg > TPC_MAX_SEGS * sizeof(struct scsi_ec_segment)) {
+ ctl_set_sense(ctsio, /*current_error*/ 1,
+ /*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
+ /*asc*/ 0x26, /*ascq*/ 0x08, SSD_ELEM_NONE);
+ goto done;
+ }
+ if (lencscd + lenseg > TPC_MAX_LIST ||
+ leninl > TPC_MAX_INLINE ||
+ len < sizeof(struct scsi_extended_copy_lid1_data) +
+ lencscd + lenseg + leninl) {
ctl_set_param_len_error(ctsio);
goto done;
}
@@ -1717,6 +1722,10 @@ ctl_extended_copy_lid4(struct ctl_scsiio
cdb = (struct scsi_extended_copy *)ctsio->cdb;
len = scsi_4btoul(cdb->length);
+ if (len == 0) {
+ ctl_set_success(ctsio);
+ goto done;
+ }
if (len < sizeof(struct scsi_extended_copy_lid4_data) ||
len > sizeof(struct scsi_extended_copy_lid4_data) +
TPC_MAX_LIST + TPC_MAX_INLINE) {
@@ -1747,20 +1756,22 @@ ctl_extended_copy_lid4(struct ctl_scsiio
lencscd = scsi_2btoul(data->cscd_list_length);
lenseg = scsi_2btoul(data->segment_list_length);
leninl = scsi_2btoul(data->inline_data_length);
- if (len < sizeof(struct scsi_extended_copy_lid4_data) +
- lencscd + lenseg + leninl ||
- leninl > TPC_MAX_INLINE) {
- ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0,
- /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0);
- goto done;
- }
if (lencscd > TPC_MAX_CSCDS * sizeof(struct scsi_ec_cscd)) {
ctl_set_sense(ctsio, /*current_error*/ 1,
/*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
/*asc*/ 0x26, /*ascq*/ 0x06, SSD_ELEM_NONE);
goto done;
}
- if (lencscd + lenseg > TPC_MAX_LIST) {
+ if (lenseg > TPC_MAX_SEGS * sizeof(struct scsi_ec_segment)) {
+ ctl_set_sense(ctsio, /*current_error*/ 1,
+ /*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
+ /*asc*/ 0x26, /*ascq*/ 0x08, SSD_ELEM_NONE);
+ goto done;
+ }
+ if (lencscd + lenseg > TPC_MAX_LIST ||
+ leninl > TPC_MAX_INLINE ||
+ len < sizeof(struct scsi_extended_copy_lid1_data) +
+ lencscd + lenseg + leninl) {
ctl_set_param_len_error(ctsio);
goto done;
}
Modified: projects/iosched/sys/cam/ctl/ctl_tpc_local.c
==============================================================================
--- projects/iosched/sys/cam/ctl/ctl_tpc_local.c Sat Sep 12 20:06:22 2015 (r287721)
+++ projects/iosched/sys/cam/ctl/ctl_tpc_local.c Sat Sep 12 20:14:54 2015 (r287722)
@@ -281,7 +281,8 @@ tpcl_resolve(struct ctl_softc *softc, in
struct ctl_lun *lun;
uint64_t lunid = UINT64_MAX;
- if (cscd->type_code != EC_CSCD_ID)
+ if (cscd->type_code != EC_CSCD_ID ||
+ (cscd->luidt_pdt & EC_LUIDT_MASK) != EC_LUIDT_LUN)
return (lunid);
cscdid = (struct scsi_ec_cscd_id *)cscd;
Modified: projects/iosched/sys/cam/scsi/scsi_all.c
==============================================================================
--- projects/iosched/sys/cam/scsi/scsi_all.c Sat Sep 12 20:06:22 2015 (r287721)
+++ projects/iosched/sys/cam/scsi/scsi_all.c Sat Sep 12 20:14:54 2015 (r287722)
@@ -509,7 +509,8 @@ static struct op_table_entry scsi_op_cod
/* 99 */
/* 9A */
/* 9B */
- /* 9C */
+ /* 9C O WRITE ATOMIC(16) */
+ { 0x9C, D, "WRITE ATOMIC(16)" },
/* 9D */
/* XXX KDM ALL for this? op-num.txt defines it for none.. */
/* 9E SERVICE ACTION IN(16) */
Modified: projects/iosched/sys/cddl/contrib/opensolaris/common/avl/avl.c
==============================================================================
--- projects/iosched/sys/cddl/contrib/opensolaris/common/avl/avl.c Sat Sep 12 20:06:22 2015 (r287721)
+++ projects/iosched/sys/cddl/contrib/opensolaris/common/avl/avl.c Sat Sep 12 20:14:54 2015 (r287722)
@@ -25,6 +25,7 @@
/*
* Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -635,14 +636,17 @@ avl_add(avl_tree_t *tree, void *new_node
/*
* This is unfortunate. We want to call panic() here, even for
* non-DEBUG kernels. In userland, however, we can't depend on anything
- * in libc or else the rtld build process gets confused. So, all we can
- * do in userland is resort to a normal ASSERT().
+ * in libc or else the rtld build process gets confused.
+ * Thankfully, rtld provides us with its own assfail() so we can use
+ * that here. We use assfail() directly to get a nice error message
+ * in the core - much like what panic() does for crashdumps.
*/
if (avl_find(tree, new_node, &where) != NULL)
#ifdef _KERNEL
panic("avl_find() succeeded inside avl_add()");
#else
- ASSERT(0);
+ (void) assfail("avl_find() succeeded inside avl_add()",
+ __FILE__, __LINE__);
#endif
avl_insert(tree, new_node, where);
}
Modified: projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Sat Sep 12 20:06:22 2015 (r287721)
+++ projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Sat Sep 12 20:14:54 2015 (r287722)
@@ -213,7 +213,7 @@ static int arc_min_prefetch_lifespan;
int arc_lotsfree_percent = 10;
static int arc_dead;
-extern int zfs_prefetch_disable;
+extern boolean_t zfs_prefetch_disable;
/*
* The arc has filled available memory and has now warmed up.
@@ -582,6 +582,8 @@ typedef struct arc_stats {
kstat_named_t arcstat_meta_limit;
kstat_named_t arcstat_meta_max;
kstat_named_t arcstat_meta_min;
+ kstat_named_t arcstat_sync_wait_for_async;
+ kstat_named_t arcstat_demand_hit_predictive_prefetch;
} arc_stats_t;
static arc_stats_t arc_stats = {
@@ -680,7 +682,9 @@ static arc_stats_t arc_stats = {
{ "arc_meta_used", KSTAT_DATA_UINT64 },
{ "arc_meta_limit", KSTAT_DATA_UINT64 },
{ "arc_meta_max", KSTAT_DATA_UINT64 },
- { "arc_meta_min", KSTAT_DATA_UINT64 }
+ { "arc_meta_min", KSTAT_DATA_UINT64 },
+ { "sync_wait_for_async", KSTAT_DATA_UINT64 },
+ { "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 },
};
#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
@@ -844,6 +848,7 @@ typedef struct l2arc_buf_hdr {
uint64_t b_daddr; /* disk address, offset byte */
/* real alloc'd buffer size depending on b_compress applied */
int32_t b_asize;
+ uint8_t b_compress;
list_node_t b_l2node;
} l2arc_buf_hdr_t;
@@ -923,15 +928,6 @@ static arc_buf_hdr_t arc_eviction_hdr;
#define HDR_HAS_L1HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L1HDR)
#define HDR_HAS_L2HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR)
-/* For storing compression mode in b_flags */
-#define HDR_COMPRESS_OFFSET 24
-#define HDR_COMPRESS_NBITS 7
-
-#define HDR_GET_COMPRESS(hdr) ((enum zio_compress)BF32_GET(hdr->b_flags, \
- HDR_COMPRESS_OFFSET, HDR_COMPRESS_NBITS))
-#define HDR_SET_COMPRESS(hdr, cmp) BF32_SET(hdr->b_flags, \
- HDR_COMPRESS_OFFSET, HDR_COMPRESS_NBITS, (cmp))
-
/*
* Other sizes
*/
@@ -2222,7 +2218,7 @@ arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr
* separately compressed buffer, so there's nothing to free (it
* points to the same buffer as the arc_buf_t's b_data field).
*/
- if (HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_OFF) {
+ if (hdr->b_l2hdr.b_compress == ZIO_COMPRESS_OFF) {
hdr->b_l1hdr.b_tmp_cdata = NULL;
return;
}
@@ -2231,12 +2227,12 @@ arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr
* There's nothing to free since the buffer was all zero's and
* compressed to a zero length buffer.
*/
- if (HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_EMPTY) {
+ if (hdr->b_l2hdr.b_compress == ZIO_COMPRESS_EMPTY) {
ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
return;
}
- ASSERT(L2ARC_IS_VALID_COMPRESS(HDR_GET_COMPRESS(hdr)));
+ ASSERT(L2ARC_IS_VALID_COMPRESS(hdr->b_l2hdr.b_compress));
arc_buf_free_on_write(hdr->b_l1hdr.b_tmp_cdata,
hdr->b_size, zio_data_buf_free);
@@ -4250,6 +4246,36 @@ top:
if (HDR_IO_IN_PROGRESS(hdr)) {
+ if ((hdr->b_flags & ARC_FLAG_PRIO_ASYNC_READ) &&
+ priority == ZIO_PRIORITY_SYNC_READ) {
+ /*
+ * This sync read must wait for an
+ * in-progress async read (e.g. a predictive
+ * prefetch). Async reads are queued
+ * separately at the vdev_queue layer, so
+ * this is a form of priority inversion.
+ * Ideally, we would "inherit" the demand
+ * i/o's priority by moving the i/o from
+ * the async queue to the synchronous queue,
+ * but there is currently no mechanism to do
+ * so. Track this so that we can evaluate
+ * the magnitude of this potential performance
+ * problem.
+ *
+ * Note that if the prefetch i/o is already
+ * active (has been issued to the device),
+ * the prefetch improved performance, because
+ * we issued it sooner than we would have
+ * without the prefetch.
+ */
+ DTRACE_PROBE1(arc__sync__wait__for__async,
+ arc_buf_hdr_t *, hdr);
+ ARCSTAT_BUMP(arcstat_sync_wait_for_async);
+ }
+ if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) {
+ hdr->b_flags &= ~ARC_FLAG_PREDICTIVE_PREFETCH;
+ }
+
if (*arc_flags & ARC_FLAG_WAIT) {
cv_wait(&hdr->b_l1hdr.b_cv, hash_lock);
mutex_exit(hash_lock);
@@ -4258,7 +4284,7 @@ top:
ASSERT(*arc_flags & ARC_FLAG_NOWAIT);
if (done) {
- arc_callback_t *acb = NULL;
+ arc_callback_t *acb = NULL;
acb = kmem_zalloc(sizeof (arc_callback_t),
KM_SLEEP);
@@ -4283,6 +4309,19 @@ top:
hdr->b_l1hdr.b_state == arc_mfu);
if (done) {
+ if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) {
+ /*
+ * This is a demand read which does not have to
+ * wait for i/o because we did a predictive
+ * prefetch i/o for it, which has completed.
+ */
+ DTRACE_PROBE1(
+ arc__demand__hit__predictive__prefetch,
+ arc_buf_hdr_t *, hdr);
+ ARCSTAT_BUMP(
+ arcstat_demand_hit_predictive_prefetch);
+ hdr->b_flags &= ~ARC_FLAG_PREDICTIVE_PREFETCH;
+ }
add_reference(hdr, hash_lock, private);
/*
* If this block is already in use, create a new
@@ -4345,12 +4384,16 @@ top:
goto top; /* restart the IO request */
}
- /* if this is a prefetch, we don't have a reference */
- if (*arc_flags & ARC_FLAG_PREFETCH) {
+ /*
+ * If there is a callback, we pass our reference to
+ * it; otherwise we remove our reference.
+ */
+ if (done == NULL) {
(void) remove_reference(hdr, hash_lock,
private);
- hdr->b_flags |= ARC_FLAG_PREFETCH;
}
+ if (*arc_flags & ARC_FLAG_PREFETCH)
+ hdr->b_flags |= ARC_FLAG_PREFETCH;
if (*arc_flags & ARC_FLAG_L2CACHE)
hdr->b_flags |= ARC_FLAG_L2CACHE;
if (*arc_flags & ARC_FLAG_L2COMPRESS)
@@ -4373,11 +4416,13 @@ top:
ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
- /* if this is a prefetch, we don't have a reference */
+ /*
+ * If there is a callback, we pass a reference to it.
+ */
+ if (done != NULL)
+ add_reference(hdr, hash_lock, private);
if (*arc_flags & ARC_FLAG_PREFETCH)
hdr->b_flags |= ARC_FLAG_PREFETCH;
- else
- add_reference(hdr, hash_lock, private);
if (*arc_flags & ARC_FLAG_L2CACHE)
hdr->b_flags |= ARC_FLAG_L2CACHE;
if (*arc_flags & ARC_FLAG_L2COMPRESS)
@@ -4395,6 +4440,8 @@ top:
arc_access(hdr, hash_lock);
}
+ if (*arc_flags & ARC_FLAG_PREDICTIVE_PREFETCH)
+ hdr->b_flags |= ARC_FLAG_PREDICTIVE_PREFETCH;
ASSERT(!GHOST_STATE(hdr->b_l1hdr.b_state));
acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP);
@@ -4409,7 +4456,7 @@ top:
(vd = hdr->b_l2hdr.b_dev->l2ad_vdev) != NULL) {
devw = hdr->b_l2hdr.b_dev->l2ad_writing;
addr = hdr->b_l2hdr.b_daddr;
- b_compress = HDR_GET_COMPRESS(hdr);
+ b_compress = hdr->b_l2hdr.b_compress;
b_asize = hdr->b_l2hdr.b_asize;
/*
* Lock out device removal.
@@ -4437,6 +4484,11 @@ top:
curthread->td_ru.ru_inblock++;
#endif
+ if (priority == ZIO_PRIORITY_ASYNC_READ)
+ hdr->b_flags |= ARC_FLAG_PRIO_ASYNC_READ;
+ else
+ hdr->b_flags &= ~ARC_FLAG_PRIO_ASYNC_READ;
+
if (vd != NULL && l2arc_ndev != 0 && !(l2arc_norw && devw)) {
/*
* Read from the L2ARC if the following are true:
@@ -5965,6 +6017,8 @@ l2arc_read_done(zio_t *zio)
if (cb->l2rcb_compress != ZIO_COMPRESS_OFF)
l2arc_decompress_zio(zio, hdr, cb->l2rcb_compress);
ASSERT(zio->io_data != NULL);
+ ASSERT3U(zio->io_size, ==, hdr->b_size);
+ ASSERT3U(BP_GET_LSIZE(&cb->l2rcb_bp), ==, hdr->b_size);
/*
* Check this survived the L2ARC journey.
@@ -6001,7 +6055,7 @@ l2arc_read_done(zio_t *zio)
ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL);
zio_nowait(zio_read(pio, cb->l2rcb_spa, &cb->l2rcb_bp,
- buf->b_data, zio->io_size, arc_read_done, buf,
+ buf->b_data, hdr->b_size, arc_read_done, buf,
zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb));
}
}
@@ -6318,7 +6372,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
* can't access without holding the ARC list locks
* (which we want to avoid during compression/writing).
*/
- HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF);
+ hdr->b_l2hdr.b_compress = ZIO_COMPRESS_OFF;
hdr->b_l2hdr.b_asize = hdr->b_size;
hdr->b_l1hdr.b_tmp_cdata = hdr->b_l1hdr.b_buf->b_data;
@@ -6520,7 +6574,7 @@ l2arc_compress_buf(arc_buf_hdr_t *hdr)
l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
ASSERT(HDR_HAS_L1HDR(hdr));
- ASSERT(HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_OFF);
+ ASSERT3S(l2hdr->b_compress, ==, ZIO_COMPRESS_OFF);
ASSERT(hdr->b_l1hdr.b_tmp_cdata != NULL);
len = l2hdr->b_asize;
@@ -6532,7 +6586,7 @@ l2arc_compress_buf(arc_buf_hdr_t *hdr)
if (csize == 0) {
/* zero block, indicate that there's nothing to write */
zio_data_buf_free(cdata, len);
- HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_EMPTY);
+ l2hdr->b_compress = ZIO_COMPRESS_EMPTY;
l2hdr->b_asize = 0;
hdr->b_l1hdr.b_tmp_cdata = NULL;
ARCSTAT_BUMP(arcstat_l2_compress_zeros);
@@ -6550,7 +6604,7 @@ l2arc_compress_buf(arc_buf_hdr_t *hdr)
bzero((char *)cdata + csize, rounded - csize);
csize = rounded;
}
- HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_LZ4);
+ l2hdr->b_compress = ZIO_COMPRESS_LZ4;
l2hdr->b_asize = csize;
hdr->b_l1hdr.b_tmp_cdata = cdata;
ARCSTAT_BUMP(arcstat_l2_compress_successes);
@@ -6637,7 +6691,8 @@ l2arc_decompress_zio(zio_t *zio, arc_buf
static void
l2arc_release_cdata_buf(arc_buf_hdr_t *hdr)
{
- enum zio_compress comp = HDR_GET_COMPRESS(hdr);
+ ASSERT(HDR_HAS_L2HDR(hdr));
+ enum zio_compress comp = hdr->b_l2hdr.b_compress;
ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT(comp == ZIO_COMPRESS_OFF || L2ARC_IS_VALID_COMPRESS(comp));
Modified: projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c Sat Sep 12 20:06:22 2015 (r287721)
+++ projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c Sat Sep 12 20:14:54 2015 (r287722)
@@ -618,7 +618,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *bu
}
static void
-dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
+dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
{
dnode_t *dn;
zbookmark_phys_t zb;
@@ -664,7 +664,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t
db->db.db_size, db, type));
bzero(db->db.db_data, db->db.db_size);
db->db_state = DB_CACHED;
- *flags |= DB_RF_CACHED;
mutex_exit(&db->db_mtx);
return;
}
@@ -687,10 +686,8 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t
(void) arc_read(zio, db->db_objset->os_spa, db->db_blkptr,
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
- (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
+ (flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
&aflags, &zb);
- if (aflags & ARC_FLAG_CACHED)
- *flags |= DB_RF_CACHED;
}
int
@@ -723,8 +720,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio
if (db->db_state == DB_CACHED) {
mutex_exit(&db->db_mtx);
if (prefetch)
- dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
- db->db.db_size, TRUE);
+ dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1);
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db);
@@ -733,13 +729,12 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio
if (zio == NULL)
zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
- dbuf_read_impl(db, zio, &flags);
+ dbuf_read_impl(db, zio, flags);
/* dbuf_read_impl has dropped db_mtx for us */
if (prefetch)
- dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
- db->db.db_size, flags & DB_RF_CACHED);
+ dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1);
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
@@ -758,8 +753,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio
*/
mutex_exit(&db->db_mtx);
if (prefetch)
- dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
- db->db.db_size, TRUE);
+ dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1);
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db);
@@ -2059,6 +2053,9 @@ dbuf_prefetch(dnode_t *dn, int64_t level
ASSERT(blkid != DMU_BONUS_BLKID);
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
+ if (blkid > dn->dn_maxblkid)
+ return;
+
if (dnode_block_freed(dn, blkid))
return;
Modified: projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
==============================================================================
--- projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c Sat Sep 12 20:06:22 2015 (r287721)
+++ projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c Sat Sep 12 20:14:54 2015 (r287722)
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
*/
/* Copyright (c) 2013 by Saso Kiselkov. All rights reserved. */
/* Copyright (c) 2013, Joyent, Inc. All rights reserved. */
@@ -389,7 +389,7 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus
*/
static int
dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
- int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
+ boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
{
dmu_buf_t **dbp;
uint64_t blkid, nblks, i;
@@ -399,15 +399,19 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn,
ASSERT(length <= DMU_MAX_ACCESS);
- dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT;
- if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz)
- dbuf_flags |= DB_RF_NOPREFETCH;
+ /*
+ * Note: We directly notify the prefetch code of this read, so that
+ * we can tell it about the multi-block read. dbuf_read() only knows
+ * about the one block it is accessing.
+ */
+ dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT |
+ DB_RF_NOPREFETCH;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
if (dn->dn_datablkshift) {
int blkshift = dn->dn_datablkshift;
- nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) -
- P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift;
+ nblks = (P2ROUNDUP(offset + length, 1ULL << blkshift) -
+ P2ALIGN(offset, 1ULL << blkshift)) >> blkshift;
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-projects
mailing list