svn commit: r218470 - projects/graid/head/sys/geom/raid

Wed Feb 9 04:35:33 UTC 2011

Author: imp
Date: Wed Feb  9 04:35:33 2011
New Revision: 218470
URL: http://svn.freebsd.org/changeset/base/218470

Log:
  When we were failing the subdisk due to too many read errors, we were
  returning the error that caused us to kick that subdisk out rather
  than retrying the read on the other disk.  Flag this condition so we
  go through all the motions, except the write, so that we return the
  results of this other read.
  
  We now fail the disk w/o EIO being returned.

Modified:
  projects/graid/head/sys/geom/raid/g_raid.c
  projects/graid/head/sys/geom/raid/g_raid.h
  projects/graid/head/sys/geom/raid/tr_raid1.c

Modified: projects/graid/head/sys/geom/raid/g_raid.c
==============================================================================

--- projects/graid/head/sys/geom/raid/g_raid.c	Tue Feb  8 23:23:55 2011	(r218469)
+++ projects/graid/head/sys/geom/raid/g_raid.c	Wed Feb  9 04:35:33 2011	(r218470)
@@ -910,6 +910,7 @@ g_raid_start_request(struct bio *bp)
 	sc = bp->bio_to->geom->softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 	vol = bp->bio_to->private;
+
 	/*
 	 * Check to see if this item is in a locked range.  If so,
 	 * queue it to our locked queue and return.  We'll requeue

Modified: projects/graid/head/sys/geom/raid/g_raid.h
==============================================================================
--- projects/graid/head/sys/geom/raid/g_raid.h	Tue Feb  8 23:23:55 2011	(r218469)
+++ projects/graid/head/sys/geom/raid/g_raid.h	Wed Feb  9 04:35:33 2011	(r218470)
@@ -98,6 +98,8 @@ extern struct g_class g_raid_class;
  *				doing some desirable action such as bad
  *				block remapping after we detect a bad part
  *				of the disk.
+ * G_RAID_BIO_FLAG_FAKE_REMAP	Only doing the reading half of a remap
+ *				operation.
  *
  * and the following meta item:
  * G_RAID_BIO_FLAG_SPECIAL	And of the I/O flags that need to make it
@@ -109,6 +111,7 @@ extern struct g_class g_raid_class;
 #define	G_RAID_BIO_FLAG_REMAP		0x02
 #define G_RAID_BIO_FLAG_SPECIAL \
 		(G_RAID_BIO_FLAG_SYNC|G_RAID_BIO_FLAG_REMAP)
+#define G_RAID_BIO_FLAG_FAKE_REMAP	0x80
 
 struct g_raid_lock {
 	off_t			 l_offset;

Modified: projects/graid/head/sys/geom/raid/tr_raid1.c
==============================================================================
--- projects/graid/head/sys/geom/raid/tr_raid1.c	Tue Feb  8 23:23:55 2011	(r218469)
+++ projects/graid/head/sys/geom/raid/tr_raid1.c	Wed Feb  9 04:35:33 2011	(r218470)
@@ -630,7 +630,7 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 	struct g_raid_volume *vol;
 	struct bio *pbp;
 	struct g_raid_tr_raid1_object *trs;
-	int i, error;
+	int i, error, do_write;
 
 	trs = (struct g_raid_tr_raid1_object *)tr;
 	pbp = bp->bio_parent;
@@ -768,10 +768,11 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 		 * everything to get it back in sync), or just degrade the
 		 * drive, which kicks off a resync?
 		 */
+		do_write = 1;
 		if (sd->sd_read_errs > g_raid1_read_err_thresh) {
 			g_raid_fail_disk(sd->sd_softc, sd, sd->sd_disk);
 			if (pbp->bio_children == 1)
-				goto remapdone;
+				do_write = 0;
 		}
 
 		/*
@@ -792,6 +793,8 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 			pbp->bio_driver1 = sd; /* Save original subdisk. */
 			cbp->bio_caller1 = nsd;
 			cbp->bio_cflags = G_RAID_BIO_FLAG_REMAP;
+			if (!do_write)
+				cbp->bio_cflags |= G_RAID_BIO_FLAG_FAKE_REMAP;
 			/* Lock callback starts I/O */
 			g_raid_lock_range(sd->sd_volume,
 			    cbp->bio_offset, cbp->bio_length, pbp, cbp);
@@ -805,8 +808,10 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 		 */
 		G_RAID_LOGREQ(2, bp, "Couldn't retry read, failing it");
 	}
-	if (bp->bio_cmd == BIO_READ && bp->bio_error == 0 &&
-	    pbp->bio_children > 1) {
+	if (bp->bio_cmd == BIO_READ &&
+	    bp->bio_error == 0 &&
+	    pbp->bio_children > 1 &&
+	    !(bp->bio_cflags & G_RAID_BIO_FLAG_FAKE_REMAP)) {
 		/*
 		 * If it was a read, and bio_children is 2, then we just
 		 * recovered the data from the second drive.  We should try to
@@ -817,6 +822,11 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 		 * affect the return code of this current read, and can be
 		 * done at our liesure.  However, to make the code simpler, it
 		 * is done syncrhonously.
+		 *
+		 * When the FAKE_REMAP flag is set, we fall through to the
+		 * code below which handles the read without the next
+		 * write so we don't return the error that failed the drive,
+		 * but the results of reading the other disk.
 		 */
 		G_RAID_LOGREQ(3, bp, "Recovered data from other drive");
 		cbp = g_clone_bio(pbp);
@@ -829,7 +839,6 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 			return;
 		}
 	}
-remapdone:
 	if (bp->bio_cflags & G_RAID_BIO_FLAG_REMAP) {
 		/*
 		 * We're done with a remap write, mark the range as unlocked.