svn commit: r235871 - stable/9/sys/geom/raid
Alexander Motin
mav at FreeBSD.org
Thu May 24 01:41:55 UTC 2012
Author: mav
Date: Thu May 24 01:41:54 2012
New Revision: 235871
URL: http://svn.freebsd.org/changeset/base/235871
Log:
MFC r235270:
- Prevent error status leak if write to some of the RAID1/1E volume disks
failed while write to some other succeeded. Instead mark disk as failed.
- Make RAID1E less aggressive in failing disks to avoid volume breakage.
Modified:
stable/9/sys/geom/raid/tr_raid1.c
stable/9/sys/geom/raid/tr_raid1e.c
Directory Properties:
stable/9/sys/ (props changed)
Modified: stable/9/sys/geom/raid/tr_raid1.c
==============================================================================
--- stable/9/sys/geom/raid/tr_raid1.c Thu May 24 01:31:10 2012 (r235870)
+++ stable/9/sys/geom/raid/tr_raid1.c Thu May 24 01:41:54 2012 (r235871)
@@ -893,7 +893,16 @@ rebuild_round_done:
g_raid_unlock_range(sd->sd_volume, bp->bio_offset,
bp->bio_length);
}
- error = bp->bio_error;
+ if (pbp->bio_cmd != BIO_READ) {
+ if (pbp->bio_inbed == 1 || pbp->bio_error != 0)
+ pbp->bio_error = bp->bio_error;
+ if (bp->bio_error != 0) {
+ G_RAID_LOGREQ(0, bp, "Write failed: failing subdisk.");
+ g_raid_tr_raid1_fail_disk(sd->sd_softc, sd, sd->sd_disk);
+ }
+ error = pbp->bio_error;
+ } else
+ error = bp->bio_error;
g_destroy_bio(bp);
if (pbp->bio_children == pbp->bio_inbed) {
pbp->bio_completed = pbp->bio_length;
Modified: stable/9/sys/geom/raid/tr_raid1e.c
==============================================================================
--- stable/9/sys/geom/raid/tr_raid1e.c Thu May 24 01:31:10 2012 (r235870)
+++ stable/9/sys/geom/raid/tr_raid1e.c Thu May 24 01:41:54 2012 (r235871)
@@ -338,6 +338,9 @@ static void
g_raid_tr_raid1e_fail_disk(struct g_raid_softc *sc, struct g_raid_subdisk *sd,
struct g_raid_disk *disk)
{
+ struct g_raid_volume *vol;
+
+ vol = sd->sd_volume;
/*
* We don't fail the last disk in the pack, since it still has decent
* data on it and that's better than failing the disk if it is the root
@@ -347,8 +350,12 @@ g_raid_tr_raid1e_fail_disk(struct g_raid
* the volume that has / on it. I can't think of a case where we'd
* want the volume to go away on this kind of event.
*/
- if (g_raid_nsubdisks(sd->sd_volume, G_RAID_SUBDISK_S_ACTIVE) == 1 &&
- g_raid_get_subdisk(sd->sd_volume, G_RAID_SUBDISK_S_ACTIVE) == sd)
+ if ((g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE) +
+ g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_RESYNC) +
+ g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_STALE) +
+ g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_UNINITIALIZED) <
+ vol->v_disks_count) &&
+ (sd->sd_state >= G_RAID_SUBDISK_S_UNINITIALIZED))
return;
g_raid_fail_disk(sc, sd, disk);
}
@@ -1113,7 +1120,16 @@ rebuild_round_done:
G_RAID_LOGREQ(2, bp, "REMAP done %d.", bp->bio_error);
g_raid_unlock_range(sd->sd_volume, virtual, bp->bio_length);
}
- error = bp->bio_error;
+ if (pbp->bio_cmd != BIO_READ) {
+ if (pbp->bio_inbed == 1 || pbp->bio_error != 0)
+ pbp->bio_error = bp->bio_error;
+ if (bp->bio_error != 0) {
+ G_RAID_LOGREQ(0, bp, "Write failed: failing subdisk.");
+ g_raid_tr_raid1e_fail_disk(sd->sd_softc, sd, sd->sd_disk);
+ }
+ error = pbp->bio_error;
+ } else
+ error = bp->bio_error;
g_destroy_bio(bp);
if (pbp->bio_children == pbp->bio_inbed) {
pbp->bio_completed = pbp->bio_length;
More information about the svn-src-stable-9
mailing list