svn commit: r235874 - in stable/9: sbin/geom/class/raid sys/conf sys/geom/raid sys/modules/geom/geom_raid

Alexander Motin mav at FreeBSD.org
Thu May 24 02:34:04 UTC 2012


Author: mav
Date: Thu May 24 02:34:03 2012
New Revision: 235874
URL: http://svn.freebsd.org/changeset/base/235874

Log:
  MFC r234458, r234603, r234610, r234727, r234816, r234848, r234868,
  r234869, r234899, r234940, r234993, r234994, r235071 -c r235076, r235080,
  r235096:
   - Add support for the DDF metadata format, as defined by the SNIA Common
  RAID Disk Data Format Specification v2.0;
   - Add support for reading non-degraded RAID4/5/5E/5EE/5R/6/MDF volumes.
  
  Sponsored by:	iXsystems, Inc.

Added:
  stable/9/sys/geom/raid/md_ddf.c
     - copied, changed from r234848, head/sys/geom/raid/md_ddf.c
  stable/9/sys/geom/raid/md_ddf.h
     - copied unchanged from r234848, head/sys/geom/raid/md_ddf.h
  stable/9/sys/geom/raid/tr_raid5.c
     - copied, changed from r234458, head/sys/geom/raid/tr_raid5.c
Modified:
  stable/9/sbin/geom/class/raid/geom_raid.c
  stable/9/sbin/geom/class/raid/graid.8
  stable/9/sys/conf/files
  stable/9/sys/geom/raid/g_raid.c
  stable/9/sys/geom/raid/g_raid.h
  stable/9/sys/geom/raid/g_raid_ctl.c
  stable/9/sys/geom/raid/g_raid_md_if.m
  stable/9/sys/geom/raid/md_intel.c
  stable/9/sys/geom/raid/md_jmicron.c
  stable/9/sys/geom/raid/md_nvidia.c
  stable/9/sys/geom/raid/md_promise.c
  stable/9/sys/geom/raid/md_sii.c
  stable/9/sys/geom/raid/tr_raid1.c
  stable/9/sys/geom/raid/tr_raid1e.c
  stable/9/sys/modules/geom/geom_raid/Makefile
Directory Properties:
  stable/9/sbin/geom/   (props changed)
  stable/9/sys/   (props changed)
  stable/9/sys/conf/   (props changed)
  stable/9/sys/modules/   (props changed)

Modified: stable/9/sbin/geom/class/raid/geom_raid.c
==============================================================================
--- stable/9/sbin/geom/class/raid/geom_raid.c	Thu May 24 02:24:03 2012	(r235873)
+++ stable/9/sbin/geom/class/raid/geom_raid.c	Thu May 24 02:34:03 2012	(r235874)
@@ -48,11 +48,12 @@ struct g_command class_commands[] = {
 	{ "label", G_FLAG_VERBOSE, NULL,
 	    {
 		{ 'f', "force", NULL, G_TYPE_BOOL },
+		{ 'o', "fmtopt", G_VAL_OPTIONAL, G_TYPE_STRING },
 		{ 'S', "size", G_VAL_OPTIONAL, G_TYPE_NUMBER },
 		{ 's', "strip", G_VAL_OPTIONAL, G_TYPE_NUMBER },
 		G_OPT_SENTINEL
 	    },
-	    "[-fv] [-S size] [-s stripsize] format label level prov ..."
+	    "[-fv] [-o fmtopt] [-S size] [-s stripsize] format label level prov ..."
 	},
 	{ "add", G_FLAG_VERBOSE, NULL,
 	    {

Modified: stable/9/sbin/geom/class/raid/graid.8
==============================================================================
--- stable/9/sbin/geom/class/raid/graid.8	Thu May 24 02:24:03 2012	(r235873)
+++ stable/9/sbin/geom/class/raid/graid.8	Thu May 24 02:34:03 2012	(r235874)
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 26, 2011
+.Dd May 6, 2012
 .Dt GRAID 8
 .Os
 .Sh NAME
@@ -34,6 +34,7 @@
 .Nm
 .Cm label
 .Op Fl f
+.Op Fl o Ar fmtopt
 .Op Fl S Ar size
 .Op Fl s Ar strip
 .Ar format
@@ -119,6 +120,8 @@ Additional options include:
 .It Fl f
 Enforce specified configuration creation if it is officially unsupported,
 but technically can be created.
+.It Fl o Ar fmtopt
+Specifies metadata format options.
 .It Fl S Ar size
 Use
 .Ar size
@@ -200,6 +203,23 @@ The GEOM RAID class follows a modular de
 formats to be used.
 Support is currently implemented for the following formats:
 .Bl -tag -width "Intel"
+.It DDF
+The format defined by the SNIA Common RAID Disk Data Format v2.0 specification.
+Used by some Adaptec RAID BIOSes and some hardware RAID controllers.
+Because of high format flexibility different implementations support
+different set of features and have different on-disk metadata layouts.
+To provide compatibility, the GEOM RAID class mimics capabilities
+of the first detected DDF array.
+Respecting that, it may support different number of disks per volume,
+volumes per array, partitions per disk, etc.
+The following configurations are supported: RAID0 (2+ disks), RAID1 (2+ disks),
+RAID1E (3+ disks), RAID3 (3+ disks), RAID4 (3+ disks), RAID5 (3+ disks),
+RAID5E (4+ disks), RAID5EE (4+ disks), RAID5R (3+ disks), RAID6 (4+ disks),
+RAIDMDF (4+ disks), RAID10 (4+ disks), SINGLE (1 disk), CONCAT (2+ disks).
+.Pp
+Format supports two options "BE" and "LE", that mean big-endian byte order
+defined by specification (default) and little-endian used by some Adaptec
+controllers.
 .It Intel
 The format used by Intel RAID BIOS.
 Supports up to two volumes per array.
@@ -241,8 +261,11 @@ own risk: RAID1 (3+ disks), RAID10 (6+ d
 .Sh SUPPORTED RAID LEVELS
 The GEOM RAID class follows a modular design, allowing different RAID levels
 to be used.
-Support for the following RAID levels is currently implemented: RAID0, RAID1,
-RAID1E, RAID10, SINGLE, CONCAT.
+Full support for the following RAID levels is currently implemented:
+RAID0, RAID1, RAID1E, RAID10, SINGLE, CONCAT.
+The following RAID levels supported as read-only for volumes in optimal
+state (without using redundancy): RAID4, RAID5, RAID5E, RAID5EE, RAID5R,
+RAID6, RAIDMDF.
 .Sh RAID LEVEL MIGRATION
 The GEOM RAID class has no support for RAID level migration, allowed by some
 metadata formats.
@@ -253,6 +276,33 @@ corruption!
 .Sh 2TiB BARRIERS
 Promise metadata format does not support disks above 2TiB.
 NVIDIA metadata format does not support volumes above 2TiB.
+.Sh SYSCTL VARIABLES
+The following
+.Xr sysctl 8
+variable can be used to control the behavior of the
+.Nm RAID
+GEOM class.
+.Bl -tag -width indent
+.It Va kern.geom.raid.aggressive_spare : No 0
+Use any disks without metadata connected to controllers of the vendor
+matching to volume metadata format as spare.
+Use it with much care to not lose data if connecting unrelated disk!
+.It Va kern.geom.raid.clean_time : No 5
+Mark volume as clean when idle for the specified number of seconds.
+.It Va kern.geom.raid.debug : No 0
+Debug level of the
+.Nm RAID
+GEOM class.
+.It Va kern.geom.raid.idle_threshold : No 1000000
+Time in microseconds to consider a volume idle for rebuild puroses.
+.It Va kern.geom.raid.name_format : No 0
+Providers name format: 0 -- raid/r{num}, 1 -- raid/{label}.
+.It Va kern.geom.raid.read_err_thresh : No 10
+Number of read errors equated to disk failure.
+Write errors are always considered as disk failures.
+.It Va kern.geom.raid.start_timeout : No 30
+Time to wait for missing array components on startup.
+.El
 .Sh EXIT STATUS
 Exit status is 0 on success, and non-zero if the command fails.
 .Sh SEE ALSO

Modified: stable/9/sys/conf/files
==============================================================================
--- stable/9/sys/conf/files	Thu May 24 02:24:03 2012	(r235873)
+++ stable/9/sys/conf/files	Thu May 24 02:34:03 2012	(r235874)
@@ -2253,6 +2253,7 @@ geom/raid/g_raid.c		optional geom_raid
 geom/raid/g_raid_ctl.c		optional geom_raid
 geom/raid/g_raid_md_if.m	optional geom_raid
 geom/raid/g_raid_tr_if.m	optional geom_raid
+geom/raid/md_ddf.c		optional geom_raid
 geom/raid/md_intel.c		optional geom_raid
 geom/raid/md_jmicron.c		optional geom_raid
 geom/raid/md_nvidia.c		optional geom_raid
@@ -2262,6 +2263,7 @@ geom/raid/tr_concat.c		optional geom_rai
 geom/raid/tr_raid0.c		optional geom_raid
 geom/raid/tr_raid1.c		optional geom_raid
 geom/raid/tr_raid1e.c		optional geom_raid
+geom/raid/tr_raid5.c		optional geom_raid
 geom/raid3/g_raid3.c		optional geom_raid3
 geom/raid3/g_raid3_ctl.c	optional geom_raid3
 geom/shsec/g_shsec.c		optional geom_shsec

Modified: stable/9/sys/geom/raid/g_raid.c
==============================================================================
--- stable/9/sys/geom/raid/g_raid.c	Thu May 24 02:24:03 2012	(r235873)
+++ stable/9/sys/geom/raid/g_raid.c	Thu May 24 02:34:03 2012	(r235874)
@@ -277,23 +277,87 @@ g_raid_volume_level2str(int level, int q
 	case G_RAID_VOLUME_RL_RAID1:
 		return ("RAID1");
 	case G_RAID_VOLUME_RL_RAID3:
+		if (qual == G_RAID_VOLUME_RLQ_R3P0)
+			return ("RAID3-P0");
+		if (qual == G_RAID_VOLUME_RLQ_R3PN)
+			return ("RAID3-PN");
 		return ("RAID3");
 	case G_RAID_VOLUME_RL_RAID4:
+		if (qual == G_RAID_VOLUME_RLQ_R4P0)
+			return ("RAID4-P0");
+		if (qual == G_RAID_VOLUME_RLQ_R4PN)
+			return ("RAID4-PN");
 		return ("RAID4");
 	case G_RAID_VOLUME_RL_RAID5:
+		if (qual == G_RAID_VOLUME_RLQ_R5RA)
+			return ("RAID5-RA");
+		if (qual == G_RAID_VOLUME_RLQ_R5RS)
+			return ("RAID5-RS");
+		if (qual == G_RAID_VOLUME_RLQ_R5LA)
+			return ("RAID5-LA");
+		if (qual == G_RAID_VOLUME_RLQ_R5LS)
+			return ("RAID5-LS");
 		return ("RAID5");
 	case G_RAID_VOLUME_RL_RAID6:
+		if (qual == G_RAID_VOLUME_RLQ_R6RA)
+			return ("RAID6-RA");
+		if (qual == G_RAID_VOLUME_RLQ_R6RS)
+			return ("RAID6-RS");
+		if (qual == G_RAID_VOLUME_RLQ_R6LA)
+			return ("RAID6-LA");
+		if (qual == G_RAID_VOLUME_RLQ_R6LS)
+			return ("RAID6-LS");
 		return ("RAID6");
+	case G_RAID_VOLUME_RL_RAIDMDF:
+		if (qual == G_RAID_VOLUME_RLQ_RMDFRA)
+			return ("RAIDMDF-RA");
+		if (qual == G_RAID_VOLUME_RLQ_RMDFRS)
+			return ("RAIDMDF-RS");
+		if (qual == G_RAID_VOLUME_RLQ_RMDFLA)
+			return ("RAIDMDF-LA");
+		if (qual == G_RAID_VOLUME_RLQ_RMDFLS)
+			return ("RAIDMDF-LS");
+		return ("RAIDMDF");
 	case G_RAID_VOLUME_RL_RAID1E:
+		if (qual == G_RAID_VOLUME_RLQ_R1EA)
+			return ("RAID1E-A");
+		if (qual == G_RAID_VOLUME_RLQ_R1EO)
+			return ("RAID1E-O");
 		return ("RAID1E");
 	case G_RAID_VOLUME_RL_SINGLE:
 		return ("SINGLE");
 	case G_RAID_VOLUME_RL_CONCAT:
 		return ("CONCAT");
 	case G_RAID_VOLUME_RL_RAID5E:
+		if (qual == G_RAID_VOLUME_RLQ_R5ERA)
+			return ("RAID5E-RA");
+		if (qual == G_RAID_VOLUME_RLQ_R5ERS)
+			return ("RAID5E-RS");
+		if (qual == G_RAID_VOLUME_RLQ_R5ELA)
+			return ("RAID5E-LA");
+		if (qual == G_RAID_VOLUME_RLQ_R5ELS)
+			return ("RAID5E-LS");
 		return ("RAID5E");
 	case G_RAID_VOLUME_RL_RAID5EE:
+		if (qual == G_RAID_VOLUME_RLQ_R5EERA)
+			return ("RAID5EE-RA");
+		if (qual == G_RAID_VOLUME_RLQ_R5EERS)
+			return ("RAID5EE-RS");
+		if (qual == G_RAID_VOLUME_RLQ_R5EELA)
+			return ("RAID5EE-LA");
+		if (qual == G_RAID_VOLUME_RLQ_R5EELS)
+			return ("RAID5EE-LS");
 		return ("RAID5EE");
+	case G_RAID_VOLUME_RL_RAID5R:
+		if (qual == G_RAID_VOLUME_RLQ_R5RRA)
+			return ("RAID5R-RA");
+		if (qual == G_RAID_VOLUME_RLQ_R5RRS)
+			return ("RAID5R-RS");
+		if (qual == G_RAID_VOLUME_RLQ_R5RLA)
+			return ("RAID5R-LA");
+		if (qual == G_RAID_VOLUME_RLQ_R5RLS)
+			return ("RAID5R-LS");
+		return ("RAID5E");
 	default:
 		return ("UNKNOWN");
 	}
@@ -309,26 +373,111 @@ g_raid_volume_str2level(const char *str,
 		*level = G_RAID_VOLUME_RL_RAID0;
 	else if (strcasecmp(str, "RAID1") == 0)
 		*level = G_RAID_VOLUME_RL_RAID1;
-	else if (strcasecmp(str, "RAID3") == 0)
+	else if (strcasecmp(str, "RAID3-P0") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID3;
+		*qual = G_RAID_VOLUME_RLQ_R3P0;
+	} else if (strcasecmp(str, "RAID3-PN") == 0 ||
+		   strcasecmp(str, "RAID3") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID3;
-	else if (strcasecmp(str, "RAID4") == 0)
+		*qual = G_RAID_VOLUME_RLQ_R3PN;
+	} else if (strcasecmp(str, "RAID4-P0") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID4;
+		*qual = G_RAID_VOLUME_RLQ_R4P0;
+	} else if (strcasecmp(str, "RAID4-PN") == 0 ||
+		   strcasecmp(str, "RAID4") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID4;
-	else if (strcasecmp(str, "RAID5") == 0)
+		*qual = G_RAID_VOLUME_RLQ_R4PN;
+	} else if (strcasecmp(str, "RAID5-RA") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID5;
+		*qual = G_RAID_VOLUME_RLQ_R5RA;
+	} else if (strcasecmp(str, "RAID5-RS") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID5;
+		*qual = G_RAID_VOLUME_RLQ_R5RS;
+	} else if (strcasecmp(str, "RAID5") == 0 ||
+		   strcasecmp(str, "RAID5-LA") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5;
-	else if (strcasecmp(str, "RAID6") == 0)
+		*qual = G_RAID_VOLUME_RLQ_R5LA;
+	} else if (strcasecmp(str, "RAID5-LS") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID5;
+		*qual = G_RAID_VOLUME_RLQ_R5LS;
+	} else if (strcasecmp(str, "RAID6-RA") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID6;
+		*qual = G_RAID_VOLUME_RLQ_R6RA;
+	} else if (strcasecmp(str, "RAID6-RS") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID6;
+		*qual = G_RAID_VOLUME_RLQ_R6RS;
+	} else if (strcasecmp(str, "RAID6") == 0 ||
+		   strcasecmp(str, "RAID6-LA") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID6;
+		*qual = G_RAID_VOLUME_RLQ_R6LA;
+	} else if (strcasecmp(str, "RAID6-LS") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID6;
-	else if (strcasecmp(str, "RAID10") == 0 ||
-		 strcasecmp(str, "RAID1E") == 0)
+		*qual = G_RAID_VOLUME_RLQ_R6LS;
+	} else if (strcasecmp(str, "RAIDMDF-RA") == 0) {
+		*level = G_RAID_VOLUME_RL_RAIDMDF;
+		*qual = G_RAID_VOLUME_RLQ_RMDFRA;
+	} else if (strcasecmp(str, "RAIDMDF-RS") == 0) {
+		*level = G_RAID_VOLUME_RL_RAIDMDF;
+		*qual = G_RAID_VOLUME_RLQ_RMDFRS;
+	} else if (strcasecmp(str, "RAIDMDF") == 0 ||
+		   strcasecmp(str, "RAIDMDF-LA") == 0) {
+		*level = G_RAID_VOLUME_RL_RAIDMDF;
+		*qual = G_RAID_VOLUME_RLQ_RMDFLA;
+	} else if (strcasecmp(str, "RAIDMDF-LS") == 0) {
+		*level = G_RAID_VOLUME_RL_RAIDMDF;
+		*qual = G_RAID_VOLUME_RLQ_RMDFLS;
+	} else if (strcasecmp(str, "RAID10") == 0 ||
+		   strcasecmp(str, "RAID1E") == 0 ||
+		   strcasecmp(str, "RAID1E-A") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID1E;
-	else if (strcasecmp(str, "SINGLE") == 0)
+		*qual = G_RAID_VOLUME_RLQ_R1EA;
+	} else if (strcasecmp(str, "RAID1E-O") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID1E;
+		*qual = G_RAID_VOLUME_RLQ_R1EO;
+	} else if (strcasecmp(str, "SINGLE") == 0)
 		*level = G_RAID_VOLUME_RL_SINGLE;
 	else if (strcasecmp(str, "CONCAT") == 0)
 		*level = G_RAID_VOLUME_RL_CONCAT;
-	else if (strcasecmp(str, "RAID5E") == 0)
+	else if (strcasecmp(str, "RAID5E-RA") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID5E;
+		*qual = G_RAID_VOLUME_RLQ_R5ERA;
+	} else if (strcasecmp(str, "RAID5E-RS") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID5E;
+		*qual = G_RAID_VOLUME_RLQ_R5ERS;
+	} else if (strcasecmp(str, "RAID5E") == 0 ||
+		   strcasecmp(str, "RAID5E-LA") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5E;
-	else if (strcasecmp(str, "RAID5EE") == 0)
+		*qual = G_RAID_VOLUME_RLQ_R5ELA;
+	} else if (strcasecmp(str, "RAID5E-LS") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID5E;
+		*qual = G_RAID_VOLUME_RLQ_R5ELS;
+	} else if (strcasecmp(str, "RAID5EE-RA") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5EE;
-	else
+		*qual = G_RAID_VOLUME_RLQ_R5EERA;
+	} else if (strcasecmp(str, "RAID5EE-RS") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID5EE;
+		*qual = G_RAID_VOLUME_RLQ_R5EERS;
+	} else if (strcasecmp(str, "RAID5EE") == 0 ||
+		   strcasecmp(str, "RAID5EE-LA") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID5EE;
+		*qual = G_RAID_VOLUME_RLQ_R5EELA;
+	} else if (strcasecmp(str, "RAID5EE-LS") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID5EE;
+		*qual = G_RAID_VOLUME_RLQ_R5EELS;
+	} else if (strcasecmp(str, "RAID5R-RA") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID5R;
+		*qual = G_RAID_VOLUME_RLQ_R5RRA;
+	} else if (strcasecmp(str, "RAID5R-RS") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID5R;
+		*qual = G_RAID_VOLUME_RLQ_R5RRS;
+	} else if (strcasecmp(str, "RAID5R") == 0 ||
+		   strcasecmp(str, "RAID5R-LA") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID5R;
+		*qual = G_RAID_VOLUME_RLQ_R5RLA;
+	} else if (strcasecmp(str, "RAID5R-LS") == 0) {
+		*level = G_RAID_VOLUME_RL_RAID5R;
+		*qual = G_RAID_VOLUME_RLQ_R5RLS;
+	} else
 		return (-1);
 	return (0);
 }
@@ -1674,8 +1823,8 @@ g_raid_create_node(struct g_class *mp,
 	sc->sc_flags = 0;
 	TAILQ_INIT(&sc->sc_volumes);
 	TAILQ_INIT(&sc->sc_disks);
-	sx_init(&sc->sc_lock, "gmirror:lock");
-	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
+	sx_init(&sc->sc_lock, "graid:lock");
+	mtx_init(&sc->sc_queue_mtx, "graid:queue", NULL, MTX_DEF);
 	TAILQ_INIT(&sc->sc_events);
 	bioq_init(&sc->sc_queue);
 	gp->softc = sc;
@@ -1707,6 +1856,7 @@ g_raid_create_volume(struct g_raid_softc
 	vol->v_state = G_RAID_VOLUME_S_STARTING;
 	vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
 	vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_UNKNOWN;
+	vol->v_rotate_parity = 1;
 	bioq_init(&vol->v_inflight);
 	bioq_init(&vol->v_locked);
 	LIST_INIT(&vol->v_locks);
@@ -1994,7 +2144,7 @@ g_raid_taste(struct g_class *mp, struct 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	G_RAID_DEBUG(2, "Tasting provider %s.", pp->name);
 
-	gp = g_new_geomf(mp, "mirror:taste");
+	gp = g_new_geomf(mp, "raid:taste");
 	/*
 	 * This orphan function should be never called.
 	 */
@@ -2024,7 +2174,8 @@ g_raid_taste(struct g_class *mp, struct 
 }
 
 int
-g_raid_create_node_format(const char *format, struct g_geom **gp)
+g_raid_create_node_format(const char *format, struct gctl_req *req,
+    struct g_geom **gp)
 {
 	struct g_raid_md_class *class;
 	struct g_raid_md_object *obj;
@@ -2042,7 +2193,7 @@ g_raid_create_node_format(const char *fo
 	obj = (void *)kobj_create((kobj_class_t)class, M_RAID,
 	    M_WAITOK);
 	obj->mdo_class = class;
-	status = G_RAID_MD_CREATE(obj, &g_raid_class, gp);
+	status = G_RAID_MD_CREATE_REQ(obj, &g_raid_class, req, gp);
 	if (status != G_RAID_MD_TASTE_NEW)
 		kobj_delete((kobj_t)obj, M_RAID);
 	return (status);

Modified: stable/9/sys/geom/raid/g_raid.h
==============================================================================
--- stable/9/sys/geom/raid/g_raid.h	Thu May 24 02:24:03 2012	(r235873)
+++ stable/9/sys/geom/raid/g_raid.h	Thu May 24 02:34:03 2012	(r235874)
@@ -219,14 +219,48 @@ struct g_raid_subdisk {
 #define G_RAID_VOLUME_RL_RAID4		0x04
 #define G_RAID_VOLUME_RL_RAID5		0x05
 #define G_RAID_VOLUME_RL_RAID6		0x06
+#define G_RAID_VOLUME_RL_RAIDMDF	0x07
 #define G_RAID_VOLUME_RL_RAID1E		0x11
 #define G_RAID_VOLUME_RL_SINGLE		0x0f
 #define G_RAID_VOLUME_RL_CONCAT		0x1f
 #define G_RAID_VOLUME_RL_RAID5E		0x15
 #define G_RAID_VOLUME_RL_RAID5EE	0x25
+#define G_RAID_VOLUME_RL_RAID5R		0x35
 #define G_RAID_VOLUME_RL_UNKNOWN	0xff
 
 #define G_RAID_VOLUME_RLQ_NONE		0x00
+#define G_RAID_VOLUME_RLQ_R1SM		0x00
+#define G_RAID_VOLUME_RLQ_R1MM		0x01
+#define G_RAID_VOLUME_RLQ_R3P0		0x00
+#define G_RAID_VOLUME_RLQ_R3PN		0x01
+#define G_RAID_VOLUME_RLQ_R4P0		0x00
+#define G_RAID_VOLUME_RLQ_R4PN		0x01
+#define G_RAID_VOLUME_RLQ_R5RA		0x00
+#define G_RAID_VOLUME_RLQ_R5RS		0x01
+#define G_RAID_VOLUME_RLQ_R5LA		0x02
+#define G_RAID_VOLUME_RLQ_R5LS		0x03
+#define G_RAID_VOLUME_RLQ_R6RA		0x00
+#define G_RAID_VOLUME_RLQ_R6RS		0x01
+#define G_RAID_VOLUME_RLQ_R6LA		0x02
+#define G_RAID_VOLUME_RLQ_R6LS		0x03
+#define G_RAID_VOLUME_RLQ_RMDFRA	0x00
+#define G_RAID_VOLUME_RLQ_RMDFRS	0x01
+#define G_RAID_VOLUME_RLQ_RMDFLA	0x02
+#define G_RAID_VOLUME_RLQ_RMDFLS	0x03
+#define G_RAID_VOLUME_RLQ_R1EA		0x00
+#define G_RAID_VOLUME_RLQ_R1EO		0x01
+#define G_RAID_VOLUME_RLQ_R5ERA		0x00
+#define G_RAID_VOLUME_RLQ_R5ERS		0x01
+#define G_RAID_VOLUME_RLQ_R5ELA		0x02
+#define G_RAID_VOLUME_RLQ_R5ELS		0x03
+#define G_RAID_VOLUME_RLQ_R5EERA	0x00
+#define G_RAID_VOLUME_RLQ_R5EERS	0x01
+#define G_RAID_VOLUME_RLQ_R5EELA	0x02
+#define G_RAID_VOLUME_RLQ_R5EELS	0x03
+#define G_RAID_VOLUME_RLQ_R5RRA		0x00
+#define G_RAID_VOLUME_RLQ_R5RRS		0x01
+#define G_RAID_VOLUME_RLQ_R5RLA		0x02
+#define G_RAID_VOLUME_RLQ_R5RLS		0x03
 #define G_RAID_VOLUME_RLQ_UNKNOWN	0xff
 
 struct g_raid_volume;
@@ -244,7 +278,13 @@ struct g_raid_volume {
 	u_int			 v_raid_level;	/* Array RAID level. */
 	u_int			 v_raid_level_qualifier; /* RAID level det. */
 	u_int			 v_disks_count;	/* Number of disks in array. */
+	u_int			 v_mdf_pdisks;	/* Number of parity disks
+						   in RAIDMDF array. */
+	uint16_t		 v_mdf_polynomial; /* Polynomial for RAIDMDF. */
+	uint8_t			 v_mdf_method;	/* Generation method for RAIDMDF. */
 	u_int			 v_strip_size;	/* Array strip size. */
+	u_int			 v_rotate_parity; /* Rotate RAID5R parity
+						   after numer of stripes. */
 	u_int			 v_sectorsize;	/* Volume sector size. */
 	off_t			 v_mediasize;	/* Volume media size.  */
 	struct bio_queue_head	 v_inflight;	/* In-flight write requests. */
@@ -348,7 +388,8 @@ const char * g_raid_disk_state2str(int s
 
 struct g_raid_softc * g_raid_create_node(struct g_class *mp,
     const char *name, struct g_raid_md_object *md);
-int g_raid_create_node_format(const char *format, struct g_geom **gp);
+int g_raid_create_node_format(const char *format, struct gctl_req *req,
+    struct g_geom **gp);
 struct g_raid_volume * g_raid_create_volume(struct g_raid_softc *sc,
     const char *name, int id);
 struct g_raid_disk * g_raid_create_disk(struct g_raid_softc *sc);

Modified: stable/9/sys/geom/raid/g_raid_ctl.c
==============================================================================
--- stable/9/sys/geom/raid/g_raid_ctl.c	Thu May 24 02:24:03 2012	(r235873)
+++ stable/9/sys/geom/raid/g_raid_ctl.c	Thu May 24 02:34:03 2012	(r235874)
@@ -88,7 +88,7 @@ g_raid_ctl_label(struct gctl_req *req, s
 		gctl_error(req, "No format recieved.");
 		return;
 	}
-	crstatus = g_raid_create_node_format(format, &geom);
+	crstatus = g_raid_create_node_format(format, req, &geom);
 	if (crstatus == G_RAID_MD_TASTE_FAIL) {
 		gctl_error(req, "Failed to create array with format '%s'.",
 		    format);

Modified: stable/9/sys/geom/raid/g_raid_md_if.m
==============================================================================
--- stable/9/sys/geom/raid/g_raid_md_if.m	Thu May 24 02:24:03 2012	(r235873)
+++ stable/9/sys/geom/raid/g_raid_md_if.m	Thu May 24 02:34:03 2012	(r235874)
@@ -49,13 +49,22 @@ HEADER {
 # Default implementations of methods.
 CODE {
 	static int
-	g_raid_md_create_default(struct g_raid_md_object *md)
+	g_raid_md_create_default(struct g_raid_md_object *md,
+	    struct g_class *mp, struct g_geom **gp)
 	{
 
 		return (G_RAID_MD_TASTE_FAIL);
 	}
 
 	static int
+	g_raid_md_create_req_default(struct g_raid_md_object *md,
+	    struct g_class *mp, struct gctl_req *req, struct g_geom **gp)
+	{
+
+		return (G_RAID_MD_CREATE(md, mp, gp));
+	}
+
+	static int
 	g_raid_md_ctl_default(struct g_raid_md_object *md,
 	    struct gctl_req *req)
 	{
@@ -95,6 +104,14 @@ METHOD int create {
 	struct g_geom **gp;
 } DEFAULT g_raid_md_create_default;
 
+# create_req() - create new node from scratch, with request argument.
+METHOD int create_req {
+	struct g_raid_md_object *md;
+	struct g_class *mp;
+	struct gctl_req *req;
+	struct g_geom **gp;
+} DEFAULT g_raid_md_create_req_default;
+
 # taste() - taste disk and, if needed, create new node.
 METHOD int taste {
 	struct g_raid_md_object *md;

Copied and modified: stable/9/sys/geom/raid/md_ddf.c (from r234848, head/sys/geom/raid/md_ddf.c)
==============================================================================
--- head/sys/geom/raid/md_ddf.c	Mon Apr 30 17:53:02 2012	(r234848, copy source)
+++ stable/9/sys/geom/raid/md_ddf.c	Thu May 24 02:34:03 2012	(r235874)
@@ -88,14 +88,15 @@ struct g_raid_md_ddf_pervolume {
 
 struct g_raid_md_ddf_object {
 	struct g_raid_md_object	 mdio_base;
+	u_int			 mdio_bigendian;
 	struct ddf_meta		 mdio_meta;
+	int			 mdio_starting;
 	struct callout		 mdio_start_co;	/* STARTING state timer. */
 	int			 mdio_started;
-	int			 mdio_incomplete;
 	struct root_hold_token	*mdio_rootmount; /* Root mount delay token. */
 };
 
-static g_raid_md_create_t g_raid_md_create_ddf;
+static g_raid_md_create_req_t g_raid_md_create_req_ddf;
 static g_raid_md_taste_t g_raid_md_taste_ddf;
 static g_raid_md_event_t g_raid_md_event_ddf;
 static g_raid_md_volume_event_t g_raid_md_volume_event_ddf;
@@ -107,7 +108,7 @@ static g_raid_md_free_volume_t g_raid_md
 static g_raid_md_free_t g_raid_md_free_ddf;
 
 static kobj_method_t g_raid_md_ddf_methods[] = {
-	KOBJMETHOD(g_raid_md_create,	g_raid_md_create_ddf),
+	KOBJMETHOD(g_raid_md_create_req,	g_raid_md_create_req_ddf),
 	KOBJMETHOD(g_raid_md_taste,	g_raid_md_taste_ddf),
 	KOBJMETHOD(g_raid_md_event,	g_raid_md_event_ddf),
 	KOBJMETHOD(g_raid_md_volume_event,	g_raid_md_volume_event_ddf),
@@ -172,6 +173,17 @@ static struct g_raid_md_class g_raid_md_
 #define SET32D(m, f, v)	SET32P((m), &(f), (v))
 #define SET64D(m, f, v)	SET64P((m), &(f), (v))
 
+#define GETCRNUM(m)	(GET32((m), hdr->cr_length) /			\
+	GET16((m), hdr->Configuration_Record_Length))
+
+#define GETVDCPTR(m, n)	((struct ddf_vdc_record *)((uint8_t *)(m)->cr +	\
+	(n) * GET16((m), hdr->Configuration_Record_Length) *		\
+	(m)->sectorsize))
+
+#define GETSAPTR(m, n)	((struct ddf_sa_record *)((uint8_t *)(m)->cr +	\
+	(n) * GET16((m), hdr->Configuration_Record_Length) *		\
+	(m)->sectorsize))
+
 static int
 isff(uint8_t *buf, int size)
 {
@@ -254,7 +266,7 @@ g_raid_md_ddf_print(struct ddf_meta *met
 	    GET16(meta, cdr->Controller_Type.SubVendor_ID),
 	    GET16(meta, cdr->Controller_Type.SubDevice_ID));
 	printf("Product_ID           '%.16s'\n", (char *)&meta->cdr->Product_ID[0]);
-	printf("**** Physical Disk Data ****\n");
+	printf("**** Physical Disk Records ****\n");
 	printf("Populated_PDEs       %u\n", GET16(meta, pdr->Populated_PDEs));
 	printf("Max_PDE_Supported    %u\n", GET16(meta, pdr->Max_PDE_Supported));
 	for (j = 0; j < GET16(meta, pdr->Populated_PDEs); j++) {
@@ -276,7 +288,7 @@ g_raid_md_ddf_print(struct ddf_meta *met
 		printf("Block_Size           %u\n",
 		    GET16(meta, pdr->entry[j].Block_Size));
 	}
-	printf("**** Virtual Disk Data ****\n");
+	printf("**** Virtual Disk Records ****\n");
 	printf("Populated_VDEs       %u\n", GET16(meta, vdr->Populated_VDEs));
 	printf("Max_VDE_Supported    %u\n", GET16(meta, vdr->Max_VDE_Supported));
 	for (j = 0; j < GET16(meta, vdr->Populated_VDEs); j++) {
@@ -287,8 +299,8 @@ g_raid_md_ddf_print(struct ddf_meta *met
 		printf("\n");
 		printf("VD_Number            0x%04x\n",
 		    GET16(meta, vdr->entry[j].VD_Number));
-		printf("VD_Type              0x%02x\n",
-		    GET8(meta, vdr->entry[j].VD_Type));
+		printf("VD_Type              0x%04x\n",
+		    GET16(meta, vdr->entry[j].VD_Type));
 		printf("VD_State             0x%02x\n",
 		    GET8(meta, vdr->entry[j].VD_State));
 		printf("Init_State           0x%02x\n",
@@ -299,11 +311,9 @@ g_raid_md_ddf_print(struct ddf_meta *met
 		    (char *)&meta->vdr->entry[j].VD_Name);
 	}
 	printf("**** Configuration Records ****\n");
-	num = GET32(meta, hdr->cr_length) / GET16(meta, hdr->Configuration_Record_Length);
+	num = GETCRNUM(meta);
 	for (j = 0; j < num; j++) {
-		vdc = (struct ddf_vdc_record *)((uint8_t *)meta->cr +
-		    j * GET16(meta, hdr->Configuration_Record_Length) *
-		    meta->sectorsize);
+		vdc = GETVDCPTR(meta, j);
 		val = GET32D(meta, vdc->Signature);
 		switch (val) {
 		case DDF_VDCR_SIGNATURE:
@@ -391,6 +401,7 @@ g_raid_md_ddf_print(struct ddf_meta *met
 				    GET16D(meta, sa->entry[i].Secondary_Element));
 			}
 			break;
+		case 0x00000000:
 		case 0xFFFFFFFF:
 			break;
 		default:
@@ -463,17 +474,16 @@ ddf_meta_find_vdc(struct ddf_meta *meta,
 	struct ddf_vdc_record *vdc;
 	int i, num;
 
-	num = GET32(meta, hdr->cr_length) / GET16(meta, hdr->Configuration_Record_Length);
+	num = GETCRNUM(meta);
 	for (i = 0; i < num; i++) {
-		vdc = (struct ddf_vdc_record *)((uint8_t *)meta->cr +
-		    i * GET16(meta, hdr->Configuration_Record_Length) *
-		    meta->sectorsize);
+		vdc = GETVDCPTR(meta, i);
 		if (GUID != NULL) {
 			if (GET32D(meta, vdc->Signature) == DDF_VDCR_SIGNATURE &&
 			    memcmp(vdc->VD_GUID, GUID, 24) == 0)
 				return (vdc);
 		} else
-			if (GET32D(meta, vdc->Signature) == 0xffffffff)
+			if (GET32D(meta, vdc->Signature) == 0xffffffff ||
+			    GET32D(meta, vdc->Signature) == 0)
 				return (vdc);
 	}
 	return (NULL);
@@ -486,11 +496,9 @@ ddf_meta_count_vdc(struct ddf_meta *meta
 	int i, num, cnt;
 
 	cnt = 0;
-	num = GET32(meta, hdr->cr_length) / GET16(meta, hdr->Configuration_Record_Length);
+	num = GETCRNUM(meta);
 	for (i = 0; i < num; i++) {
-		vdc = (struct ddf_vdc_record *)((uint8_t *)meta->cr +
-		    i * GET16(meta, hdr->Configuration_Record_Length) *
-		    meta->sectorsize);
+		vdc = GETVDCPTR(meta, i);
 		if (GET32D(meta, vdc->Signature) != DDF_VDCR_SIGNATURE)
 			continue;
 		if (GUID == NULL || memcmp(vdc->VD_GUID, GUID, 24) == 0)
@@ -526,12 +534,36 @@ ddf_meta_find_disk(struct ddf_vol_meta *
 	return (-1);
 }
 
+static struct ddf_sa_record *
+ddf_meta_find_sa(struct ddf_meta *meta, int create)
+{
+	struct ddf_sa_record *sa;
+	int i, num;
+
+	num = GETCRNUM(meta);
+	for (i = 0; i < num; i++) {
+		sa = GETSAPTR(meta, i);
+		if (GET32D(meta, sa->Signature) == DDF_SA_SIGNATURE)
+			return (sa);
+	}
+	if (create) {
+		for (i = 0; i < num; i++) {
+			sa = GETSAPTR(meta, i);
+			if (GET32D(meta, sa->Signature) == 0xffffffff ||
+			    GET32D(meta, sa->Signature) == 0)
+				return (sa);
+		}
+	}
+	return (NULL);
+}
+
 static void
 ddf_meta_create(struct g_raid_disk *disk, struct ddf_meta *sample)
 {
 	struct timespec ts;
 	struct clocktime ct;
 	struct g_raid_md_ddf_perdisk *pd;
+	struct g_raid_md_ddf_object *mdi;
 	struct ddf_meta *meta;
 	struct ddf_pd_entry *pde;
 	off_t anchorlba;
@@ -542,13 +574,14 @@ ddf_meta_create(struct g_raid_disk *disk
 	if (sample->hdr == NULL)
 		sample = NULL;
 
+	mdi = (struct g_raid_md_ddf_object *)disk->d_softc->sc_md;
 	pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
 	meta = &pd->pd_meta;
 	ss = disk->d_consumer->provider->sectorsize;
 	anchorlba = disk->d_consumer->provider->mediasize / ss - 1;
 
 	meta->sectorsize = ss;
-	meta->bigendian = sample ? sample->bigendian : 0;
+	meta->bigendian = sample ? sample->bigendian : mdi->mdio_bigendian;
 	getnanotime(&ts);
 	clock_ts_to_ct(&ts, &ct);
 
@@ -642,9 +675,9 @@ ddf_meta_create(struct g_raid_disk *disk
 	pos += GET32(meta, hdr->Diagnostic_Space_Length);
 	SET32(meta, hdr->Vendor_Specific_Logs,
 	    GET32(meta, hdr->Vendor_Specific_Logs_Length) != 0 ? pos : 0xffffffff);
-	pos += GET32(meta, hdr->Vendor_Specific_Logs_Length);
+	pos += min(GET32(meta, hdr->Vendor_Specific_Logs_Length), 1);
 	SET64(meta, hdr->Primary_Header_LBA,
-	    anchorlba - pos - 16);
+	    anchorlba - pos);
 	SET64(meta, hdr->Secondary_Header_LBA,
 	    0xffffffffffffffffULL);
 	SET64(meta, hdr->WorkSpace_LBA,
@@ -756,7 +789,7 @@ ddf_meta_update(struct ddf_meta *meta, s
 		if (isff(spde->PD_GUID, 24))
 			continue;
 		j = ddf_meta_find_pd(meta, NULL,
-		    src->pdr->entry[i].PD_Reference);
+		    GET32(src, pdr->entry[i].PD_Reference));
 		if (j < 0) {
 			j = ddf_meta_find_pd(meta, NULL, 0xffffffff);
 			pde = &meta->pdr->entry[j];
@@ -835,7 +868,8 @@ ddf_vol_meta_create(struct ddf_vol_meta 
 }
 
 static void
-ddf_vol_meta_update(struct ddf_vol_meta *dst, struct ddf_meta *src, uint8_t *GUID)
+ddf_vol_meta_update(struct ddf_vol_meta *dst, struct ddf_meta *src,
+    uint8_t *GUID, int started)
 {
 	struct ddf_header *hdr;
 	struct ddf_vd_entry *vde;
@@ -850,15 +884,15 @@ ddf_vol_meta_update(struct ddf_vol_meta 
 	size = GET16(src, hdr->Configuration_Record_Length) * src->sectorsize;
 
 	if (dst->vdc == NULL ||
-	    ((int32_t)(GET32D(src, vdc->Sequence_Number) -
-	    GET32(dst, vdc->Sequence_Number))) > 0)
+	    (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) -
+	    GET32(dst, vdc->Sequence_Number))) > 0))
 		vnew = 1;
 	else
 		vnew = 0;
 
 	if (dst->bvdc[bvd] == NULL ||
-	    ((int32_t)(GET32D(src, vdc->Sequence_Number) -
-	    GET32(dst, bvdc[bvd]->Sequence_Number))) > 0)
+	    (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) -
+	    GET32(dst, bvdc[bvd]->Sequence_Number))) > 0))
 		bvnew = 1;
 	else
 		bvnew = 0;
@@ -936,12 +970,9 @@ ddf_meta_unused_range(struct ddf_meta *m
 	beg[0] = 0;
 	end[0] = GET64(meta, pdr->entry[pos].Configured_Size);
 	n = 1;
-	num = GET32(meta, hdr->cr_length) /
-	    GET16(meta, hdr->Configuration_Record_Length);
+	num = GETCRNUM(meta);
 	for (i = 0; i < num; i++) {
-		vdc = (struct ddf_vdc_record *)((uint8_t *)meta->cr +
-		    i * GET16(meta, hdr->Configuration_Record_Length) *
-		    meta->sectorsize);
+		vdc = GETVDCPTR(meta, i);
 		if (GET32D(meta, vdc->Signature) != DDF_VDCR_SIGNATURE)
 			continue;
 		for (pos = 0; pos < GET16D(meta, vdc->Primary_Element_Count); pos++)
@@ -1197,7 +1228,7 @@ hdrerror:
 	}
 
 done:
-	free(abuf, M_MD_DDF);
+	g_free(abuf);
 	if (error != 0)
 		ddf_meta_free(meta);
 	return (error);
@@ -1260,11 +1291,10 @@ err:
 	if (error != 0)
 		goto err;
 
-	size = GET16(meta, hdr->Configuration_Record_Length);
-	num = GET32(meta, hdr->cr_length) / size;
-	size *= ss;
+	size = GET16(meta, hdr->Configuration_Record_Length) * ss;
+	num = GETCRNUM(meta);
 	for (i = 0; i < num; i++) {
-		vdc = (struct ddf_vdc_record *)((uint8_t *)meta->cr + i * size);
+		vdc = GETVDCPTR(meta, i);
 		SET32D(meta, vdc->CRC, 0xffffffff);
 		SET32D(meta, vdc->CRC, crc32(vdc, size));
 	}
@@ -1320,29 +1350,6 @@ ddf_meta_erase(struct g_consumer *cp)
 	return (error);
 }
 
-#if 0
-static int
-ddf_meta_write_spare(struct g_consumer *cp)
-{
-	struct ddf_header *meta;
-	int error;
-
-	meta = malloc(sizeof(*meta), M_MD_DDF, M_WAITOK | M_ZERO);
-	memcpy(&meta->ddf_id[0], DDF_MAGIC, sizeof(DDF_MAGIC) - 1);
-	meta->dummy_0 = 0x00020000;
-	meta->integrity = DDF_I_VALID;
-	meta->disk.flags = DDF_F_SPARE | DDF_F_ONLINE | DDF_F_VALID;
-	meta->disk.number = 0xff;
-	arc4rand(&meta->disk.id, sizeof(meta->disk.id), 0);
-	meta->disk_sectors = cp->provider->mediasize / cp->provider->sectorsize;
-	meta->disk_sectors -= 131072;
-	meta->rebuild_lba = UINT32_MAX;
-	error = ddf_meta_write(cp, &meta, 1);
-	free(meta, M_MD_DDF);
-	return (error);
-}
-#endif
-
 static struct g_raid_volume *
 g_raid_md_ddf_get_volume(struct g_raid_softc *sc, uint8_t *GUID)
 {
@@ -1510,16 +1517,14 @@ g_raid_md_ddf_supported(int level, int q
 		    qual != G_RAID_VOLUME_RLQ_RMDFLA &&
 		    qual != G_RAID_VOLUME_RLQ_RMDFLS)
 			return (0);
-		if (disks < 5)
+		if (disks < 4)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1E:
 		if (qual != G_RAID_VOLUME_RLQ_R1EA &&
 		    qual != G_RAID_VOLUME_RLQ_R1EO)
 			return (0);
-		if (disks < 2)
-			return (0);
-		if (disks % 2 != 0)
+		if (disks < 3)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_SINGLE:
@@ -1578,6 +1583,7 @@ g_raid_md_ddf_start_disk(struct g_raid_d
 	struct ddf_vol_meta *vmeta;
 	struct ddf_meta *pdmeta, *gmeta;
 	struct ddf_vdc_record *vdc1;
+	struct ddf_sa_record *sa;
 	off_t size, eoff = 0, esize = 0;
 	uint64_t *val2;
 	int disk_pos, md_disk_bvd = -1, md_disk_pos = -1, md_pde_pos;
@@ -1600,7 +1606,8 @@ g_raid_md_ddf_start_disk(struct g_raid_d
 	md_pde_pos = ddf_meta_find_pd(gmeta, NULL, reference);
 
 	if (disk_pos < 0) {
-		G_RAID_DEBUG1(1, sc, "Disk %s is not part of the volume %s",
+		G_RAID_DEBUG1(1, sc,
+		    "Disk %s is not a present part of the volume %s",
 		    g_raid_get_diskname(disk), vol->v_name);
 
 		/* Failed stale disk is useless for us. */
@@ -1610,10 +1617,8 @@ g_raid_md_ddf_start_disk(struct g_raid_d
 		}
 
 		/* If disk has some metadata for this volume - erase. */
-		if (pdmeta->cr != NULL &&
-		    (vdc1 = ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID)) != NULL) {
+		if ((vdc1 = ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID)) != NULL)
 			SET32D(pdmeta, vdc1->Signature, 0xffffffff);
-		}
 
 		/* If we are in the start process, that's all for now. */
 		if (!pv->pv_started)
@@ -1634,6 +1639,8 @@ g_raid_md_ddf_start_disk(struct g_raid_d
 			    g_raid_get_diskname(disk));
 			goto nofit;
 		}
+		eoff *= pd->pd_meta.sectorsize;
+		esize *= pd->pd_meta.sectorsize;
 		size = INT64_MAX;
 		for (i = 0; i < vol->v_disks_count; i++) {
 			sd = &vol->v_subdisks[i];
@@ -1646,26 +1653,41 @@ g_raid_md_ddf_start_disk(struct g_raid_d
 		}
 		if (disk_pos >= 0 &&
 		    vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT &&
-		    (off_t)esize * 512 < size) {
+		    esize < size) {
 			G_RAID_DEBUG1(1, sc, "Disk %s free space "
 			    "is too small (%ju < %ju)",
-			    g_raid_get_diskname(disk),
-			    (off_t)esize * 512, size);
+			    g_raid_get_diskname(disk), esize, size);
 			disk_pos = -1;
 		}
 		if (disk_pos >= 0) {
 			if (vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT)
-				esize = size / 512;
+				esize = size;
 			md_disk_bvd = disk_pos / GET16(vmeta, vdc->Primary_Element_Count); // XXX
 			md_disk_pos = disk_pos % GET16(vmeta, vdc->Primary_Element_Count); // XXX
 		} else {
 nofit:
-			if (ddf_meta_count_vdc(&pd->pd_meta, NULL) == 0) {
+			if (disk->d_state == G_RAID_DISK_S_NONE)
 				g_raid_change_disk_state(disk,
-				    G_RAID_DISK_S_SPARE);
-			}
+				    G_RAID_DISK_S_STALE);
 			return (0);
 		}
+
+		/*
+		 * If spare is committable, delete spare record.
+		 * Othersize, mark it active and leave there.
+		 */
+		sa = ddf_meta_find_sa(&pd->pd_meta, 0);
+		if (sa != NULL) {
+			if ((GET8D(&pd->pd_meta, sa->Spare_Type) &
+			    DDF_SAR_TYPE_REVERTIBLE) == 0) {
+				SET32D(&pd->pd_meta, sa->Signature, 0xffffffff);
+			} else {
+				SET8D(&pd->pd_meta, sa->Spare_Type,
+				    GET8D(&pd->pd_meta, sa->Spare_Type) |
+				    DDF_SAR_TYPE_ACTIVE);
+			}
+		}
+
 		G_RAID_DEBUG1(1, sc, "Disk %s takes pos %d in the volume %s",
 		    g_raid_get_diskname(disk), disk_pos, vol->v_name);
 		resurrection = 1;
@@ -1691,8 +1713,8 @@ nofit:
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 
 	if (resurrection) {
-		sd->sd_offset = (off_t)eoff * 512;
-		sd->sd_size = (off_t)esize * 512;
+		sd->sd_offset = eoff;
+		sd->sd_size = esize;
 	} else if (pdmeta->cr != NULL &&
 	    (vdc1 = ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID)) != NULL) {
 		val2 = (uint64_t *)&(vdc1->Physical_Disk_Sequence[GET16(vmeta, hdr->Max_Primary_Element_Entries)]);
@@ -1802,7 +1824,9 @@ g_raid_md_ddf_start(struct g_raid_volume
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_object *md;
+	struct g_raid_md_ddf_perdisk *pd;
 	struct g_raid_md_ddf_pervolume *pv;
+	struct g_raid_md_ddf_object *mdi;
 	struct ddf_vol_meta *vmeta;
 	struct ddf_vdc_record *vdc;
 	uint64_t *val2;
@@ -1810,6 +1834,7 @@ g_raid_md_ddf_start(struct g_raid_volume
 
 	sc = vol->v_softc;
 	md = sc->sc_md;
+	mdi = (struct g_raid_md_ddf_object *)md;
 	pv = vol->v_md_data;
 	vmeta = &pv->pv_meta;
 	vdc = vmeta->vdc;
@@ -1826,6 +1851,13 @@ g_raid_md_ddf_start(struct g_raid_volume
 	vol->v_strip_size = vol->v_sectorsize << GET8(vmeta, vdc->Stripe_Size);
 	vol->v_disks_count = GET16(vmeta, vdc->Primary_Element_Count) *
 	    GET8(vmeta, vdc->Secondary_Element_Count);
+	vol->v_mdf_pdisks = GET8(vmeta, vdc->MDF_Parity_Disks);
+	vol->v_mdf_polynomial = GET16(vmeta, vdc->MDF_Parity_Generator_Polynomial);
+	vol->v_mdf_method = GET8(vmeta, vdc->MDF_Constant_Generation_Method);
+	if (GET8(vmeta, vdc->Rotate_Parity_count) > 31)
+		vol->v_rotate_parity = 1;
+	else
+		vol->v_rotate_parity = 1 << GET8(vmeta, vdc->Rotate_Parity_count);
 	vol->v_mediasize = GET64(vmeta, vdc->VD_Size) * vol->v_sectorsize;
 	for (i = 0, j = 0, bvd = 0; i < vol->v_disks_count; i++, j++) {
 		if (j == GET16(vmeta, vdc->Primary_Element_Count)) {
@@ -1848,20 +1880,14 @@ g_raid_md_ddf_start(struct g_raid_volume
 	g_raid_start_volume(vol);
 
 	/* Make all disks found till the moment take their places. */
-	for (i = 0, j = 0, bvd = 0; i < vol->v_disks_count; i++, j++) {
-		if (j == GET16(vmeta, vdc->Primary_Element_Count)) {
-			j = 0;
-			bvd++;
-		}
-		if (vmeta->bvdc[bvd] == NULL)
-			continue;
-		disk = g_raid_md_ddf_get_disk(sc, NULL,
-		    GET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[j]));
-		if (disk != NULL)
+	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
+		pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
+		if (ddf_meta_find_vdc(&pd->pd_meta, vmeta->vdc->VD_GUID) != NULL)
 			g_raid_md_ddf_start_disk(disk, vol);
 	}
 
 	pv->pv_started = 1;
+	mdi->mdio_starting--;
 	callout_stop(&pv->pv_start_co);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-stable-9 mailing list