svn commit: r218459 - projects/graid/head/sys/geom/raid
Alexander Motin
mav at FreeBSD.org
Tue Feb 8 22:09:00 UTC 2011
Author: mav
Date: Tue Feb 8 22:09:00 2011
New Revision: 218459
URL: http://svn.freebsd.org/changeset/base/218459
Log:
Fix bad sector recovery:
- Lock region on first recovery read, not ot write. Locking on write is
too late, as some other write can slip in before it.
- Add optional ignore BIO argument to g_raid_lock_range(). It is needed to
not block on request that caused this recovery. This fixes dead lock on
bad sector recovery.
- Prepare some ground to do multiple recovery reads for 3+ disks RAID0s.
Modified:
projects/graid/head/sys/geom/raid/g_raid.c
projects/graid/head/sys/geom/raid/g_raid.h
projects/graid/head/sys/geom/raid/tr_raid1.c
Modified: projects/graid/head/sys/geom/raid/g_raid.c
==============================================================================
--- projects/graid/head/sys/geom/raid/g_raid.c Tue Feb 8 22:08:00 2011 (r218458)
+++ projects/graid/head/sys/geom/raid/g_raid.c Tue Feb 8 22:09:00 2011 (r218459)
@@ -1001,7 +1001,8 @@ g_raid_iodone(struct bio *bp, int error)
}
int
-g_raid_lock_range(struct g_raid_volume *vol, off_t off, off_t len, void *argp)
+g_raid_lock_range(struct g_raid_volume *vol, off_t off, off_t len,
+ struct bio *ignore, void *argp)
{
struct g_raid_softc *sc;
struct g_raid_lock *lp;
@@ -1016,7 +1017,7 @@ g_raid_lock_range(struct g_raid_volume *
lp->l_pending = 0;
TAILQ_FOREACH(bp, &vol->v_inflight.queue, bio_queue) {
- if (g_raid_bio_overlaps(bp, off, len))
+ if (bp != ignore && g_raid_bio_overlaps(bp, off, len))
lp->l_pending++;
}
Modified: projects/graid/head/sys/geom/raid/g_raid.h
==============================================================================
--- projects/graid/head/sys/geom/raid/g_raid.h Tue Feb 8 22:08:00 2011 (r218458)
+++ projects/graid/head/sys/geom/raid/g_raid.h Tue Feb 8 22:09:00 2011 (r218459)
@@ -377,7 +377,8 @@ struct g_raid_subdisk * g_raid_get_subdi
#define G_RAID_DESTROY_HARD 2
int g_raid_destroy(struct g_raid_softc *sc, int how);
int g_raid_event_send(void *arg, int event, int flags);
-int g_raid_lock_range(struct g_raid_volume *vol, off_t off, off_t len, void *argp);
+int g_raid_lock_range(struct g_raid_volume *vol, off_t off, off_t len,
+ struct bio *ignore, void *argp);
int g_raid_unlock_range(struct g_raid_volume *vol, off_t off, off_t len);
g_ctl_req_t g_raid_ctl;
Modified: projects/graid/head/sys/geom/raid/tr_raid1.c
==============================================================================
--- projects/graid/head/sys/geom/raid/tr_raid1.c Tue Feb 8 22:08:00 2011 (r218458)
+++ projects/graid/head/sys/geom/raid/tr_raid1.c Tue Feb 8 22:09:00 2011 (r218459)
@@ -255,7 +255,7 @@ g_raid_tr_raid1_rebuild_some(struct g_ra
trs->trso_flags |= TR_RAID1_F_DOING_SOME;
trs->trso_flags |= TR_RAID1_F_LOCKED;
g_raid_lock_range(sd->sd_volume, /* Lock callback starts I/O */
- bp->bio_offset, bp->bio_length, bp);
+ bp->bio_offset, bp->bio_length, NULL, bp);
}
static void
@@ -737,7 +737,7 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
/* Lock callback starts I/O */
trs->trso_flags |= TR_RAID1_F_LOCKED;
g_raid_lock_range(sd->sd_volume,
- bp->bio_offset, bp->bio_length, bp);
+ bp->bio_offset, bp->bio_length, NULL, bp);
}
} else if (trs->trso_type == TR_RAID1_RESYNC) {
/*
@@ -749,8 +749,8 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
}
return;
}
- if (bp->bio_error != 0 && bp->bio_cmd == BIO_READ &&
- pbp->bio_children == 1 && bp->bio_cflags == 0) {
+ pbp->bio_inbed++;
+ if (bp->bio_cmd == BIO_READ && bp->bio_error != 0) {
/*
* Read failed on first drive. Retry the read error on
* another disk drive, if available, before erroring out the
@@ -758,9 +758,9 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
*/
vol = tr->tro_volume;
sd->sd_read_errs++;
- G_RAID_LOGREQ(3, bp,
- "Read failure, attempting recovery. %d total read errs",
- sd->sd_read_errs);
+ G_RAID_LOGREQ(0, bp,
+ "Read error (%d), %d read errors total",
+ bp->bio_error, sd->sd_read_errs);
/*
* If there are too many read errors, we move to degraded.
@@ -768,13 +768,18 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
* everything to get it back in sync), or just degrade the
* drive, which kicks off a resync?
*/
- if (sd->sd_read_errs > g_raid1_read_err_thresh)
+ if (sd->sd_read_errs > g_raid1_read_err_thresh) {
g_raid_fail_disk(sd->sd_softc, sd, sd->sd_disk);
+ if (pbp->bio_children == 1)
+ goto remapdone;
+ }
/*
* Find the other disk, and try to do the I/O to it.
*/
for (nsd = NULL, i = 0; i < vol->v_disks_count; i++) {
+ if (pbp->bio_children > 1)
+ break;
nsd = &vol->v_subdisks[i];
if (sd == nsd)
continue;
@@ -784,8 +789,12 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
if (cbp == NULL)
break;
G_RAID_LOGREQ(2, cbp, "Retrying read");
- g_raid_subdisk_iostart(nsd, cbp);
- pbp->bio_inbed++;
+ pbp->bio_driver1 = sd; /* Save original subdisk. */
+ cbp->bio_caller1 = nsd;
+ cbp->bio_cflags = G_RAID_BIO_FLAG_REMAP;
+ /* Lock callback starts I/O */
+ g_raid_lock_range(sd->sd_volume,
+ cbp->bio_offset, cbp->bio_length, pbp, cbp);
return;
}
/*
@@ -796,9 +805,8 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
*/
G_RAID_LOGREQ(2, bp, "Couldn't retry read, failing it");
}
- pbp->bio_inbed++;
- if (pbp->bio_cmd == BIO_READ && pbp->bio_children == 2 &&
- bp->bio_cflags == 0) {
+ if (bp->bio_cmd == BIO_READ && bp->bio_error == 0 &&
+ pbp->bio_children > 1) {
/*
* If it was a read, and bio_children is 2, then we just
* recovered the data from the second drive. We should try to
@@ -813,17 +821,15 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
G_RAID_LOGREQ(3, bp, "Recovered data from other drive");
cbp = g_clone_bio(pbp);
if (cbp != NULL) {
- nsd = bp->bio_caller1;
cbp->bio_cmd = BIO_WRITE;
cbp->bio_cflags = G_RAID_BIO_FLAG_REMAP;
- cbp->bio_caller1 = nsd;
- G_RAID_LOGREQ(3, bp,
+ G_RAID_LOGREQ(3, cbp,
"Attempting bad sector remap on failing drive.");
- /* Lock callback starts I/O */
- g_raid_lock_range(sd->sd_volume,
- cbp->bio_offset, cbp->bio_length, cbp);
+ g_raid_subdisk_iostart(pbp->bio_driver1, cbp);
+ return;
}
}
+remapdone:
if (bp->bio_cflags & G_RAID_BIO_FLAG_REMAP) {
/*
* We're done with a remap write, mark the range as unlocked.
@@ -834,14 +840,15 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
* it now. However, we need to reset error to 0 in that case
* because we're not failing the original I/O which succeeded.
*/
- G_RAID_LOGREQ(2, bp, "REMAP done %d.", bp->bio_error);
- g_raid_unlock_range(sd->sd_volume, bp->bio_offset,
- bp->bio_length);
- if (bp->bio_error) {
- G_RAID_LOGREQ(3, bp, "Error on remap: mark subdisk bad.");
+ if (pbp->bio_cmd == BIO_WRITE && bp->bio_error) {
+ G_RAID_LOGREQ(0, bp, "Remap write failed: "
+ "failing subdisk.");
g_raid_fail_disk(sd->sd_softc, sd, sd->sd_disk);
bp->bio_error = 0;
}
+ G_RAID_LOGREQ(2, bp, "REMAP done %d.", bp->bio_error);
+ g_raid_unlock_range(sd->sd_volume, bp->bio_offset,
+ bp->bio_length);
}
error = bp->bio_error;
g_destroy_bio(bp);
More information about the svn-src-projects
mailing list