svn commit: r230244 - in stable/9: sbin/geom/class/raid
sys/geom/raid
Jim Harris
jimharris at FreeBSD.org
Mon Jan 16 23:22:56 UTC 2012
Author: jimharris
Date: Mon Jan 16 23:22:56 2012
New Revision: 230244
URL: http://svn.freebsd.org/changeset/base/230244
Log:
MFC r229886:
Add support for >2TB disks in GEOM RAID for Intel metadata format.
Sponsored by: Intel
Approved by: sbruno
Modified:
stable/9/sbin/geom/class/raid/graid.8
stable/9/sys/geom/raid/md_intel.c
Directory Properties:
stable/9/sbin/geom/ (props changed)
stable/9/sys/ (props changed)
Modified: stable/9/sbin/geom/class/raid/graid.8
==============================================================================
--- stable/9/sbin/geom/class/raid/graid.8 Mon Jan 16 23:22:42 2012 (r230243)
+++ stable/9/sbin/geom/class/raid/graid.8 Mon Jan 16 23:22:56 2012 (r230244)
@@ -251,7 +251,7 @@ complete it there.
Do not run GEOM RAID class on migrating volumes under pain of possible data
corruption!
.Sh 2TiB BARRIERS
-Intel and Promise metadata formats do not support disks above 2TiB.
+Promise metadata format does not support disks above 2TiB.
NVIDIA metadata format does not support volumes above 2TiB.
.Sh EXIT STATUS
Exit status is 0 on success, and non-zero if the command fails.
Modified: stable/9/sys/geom/raid/md_intel.c
==============================================================================
--- stable/9/sys/geom/raid/md_intel.c Mon Jan 16 23:22:42 2012 (r230243)
+++ stable/9/sys/geom/raid/md_intel.c Mon Jan 16 23:22:56 2012 (r230244)
@@ -64,7 +64,10 @@ struct intel_raid_map {
uint8_t total_domains;
uint8_t failed_disk_num;
uint8_t ddf;
- uint32_t filler_2[7];
+ uint32_t offset_hi;
+ uint32_t disk_sectors_hi;
+ uint32_t stripe_count_hi;
+ uint32_t filler_2[4];
uint32_t disk_idx[1]; /* total_disks entries. */
#define INTEL_DI_IDX 0x00ffffff
#define INTEL_DI_RBLD 0x01000000
@@ -111,7 +114,8 @@ struct intel_raid_vol {
uint8_t fs_state;
uint16_t verify_errors;
uint16_t bad_blocks;
- uint32_t filler_1[4];
+ uint32_t curr_migr_unit_hi;
+ uint32_t filler_1[3];
struct intel_raid_map map[1]; /* 2 entries if migr_state != 0. */
} __packed;
@@ -125,8 +129,9 @@ struct intel_raid_disk {
#define INTEL_F_ASSIGNED 0x02
#define INTEL_F_FAILED 0x04
#define INTEL_F_ONLINE 0x08
-
- uint32_t filler[5];
+ uint32_t owner_cfg_num;
+ uint32_t sectors_hi;
+ uint32_t filler[3];
} __packed;
struct intel_raid_conf {
@@ -254,6 +259,82 @@ intel_get_volume(struct intel_raid_conf
return (mvol);
}
+static off_t
+intel_get_map_offset(struct intel_raid_map *mmap)
+{
+ off_t offset = (off_t)mmap->offset_hi << 32;
+
+ offset += mmap->offset;
+ return (offset);
+}
+
+static void
+intel_set_map_offset(struct intel_raid_map *mmap, off_t offset)
+{
+
+ mmap->offset = offset & 0xffffffff;
+ mmap->offset_hi = offset >> 32;
+}
+
+static off_t
+intel_get_map_disk_sectors(struct intel_raid_map *mmap)
+{
+ off_t disk_sectors = (off_t)mmap->disk_sectors_hi << 32;
+
+ disk_sectors += mmap->disk_sectors;
+ return (disk_sectors);
+}
+
+static void
+intel_set_map_disk_sectors(struct intel_raid_map *mmap, off_t disk_sectors)
+{
+
+ mmap->disk_sectors = disk_sectors & 0xffffffff;
+ mmap->disk_sectors_hi = disk_sectors >> 32;
+}
+
+static void
+intel_set_map_stripe_count(struct intel_raid_map *mmap, off_t stripe_count)
+{
+
+ mmap->stripe_count = stripe_count & 0xffffffff;
+ mmap->stripe_count_hi = stripe_count >> 32;
+}
+
+static off_t
+intel_get_disk_sectors(struct intel_raid_disk *disk)
+{
+ off_t sectors = (off_t)disk->sectors_hi << 32;
+
+ sectors += disk->sectors;
+ return (sectors);
+}
+
+static void
+intel_set_disk_sectors(struct intel_raid_disk *disk, off_t sectors)
+{
+
+ disk->sectors = sectors & 0xffffffff;
+ disk->sectors_hi = sectors >> 32;
+}
+
+static off_t
+intel_get_vol_curr_migr_unit(struct intel_raid_vol *vol)
+{
+ off_t curr_migr_unit = (off_t)vol->curr_migr_unit_hi << 32;
+
+ curr_migr_unit += vol->curr_migr_unit;
+ return (curr_migr_unit);
+}
+
+static void
+intel_set_vol_curr_migr_unit(struct intel_raid_vol *vol, off_t curr_migr_unit)
+{
+
+ vol->curr_migr_unit = curr_migr_unit & 0xffffffff;
+ vol->curr_migr_unit_hi = curr_migr_unit >> 32;
+}
+
static void
g_raid_md_intel_print(struct intel_raid_conf *meta)
{
@@ -274,10 +355,11 @@ g_raid_md_intel_print(struct intel_raid_
printf("attributes 0x%08x\n", meta->attributes);
printf("total_disks %u\n", meta->total_disks);
printf("total_volumes %u\n", meta->total_volumes);
- printf("DISK# serial disk_sectors disk_id flags\n");
+ printf("DISK# serial disk_sectors disk_sectors_hi disk_id flags\n");
for (i = 0; i < meta->total_disks; i++ ) {
- printf(" %d <%.16s> %u 0x%08x 0x%08x\n", i,
+ printf(" %d <%.16s> %u %u 0x%08x 0x%08x\n", i,
meta->disk[i].serial, meta->disk[i].sectors,
+ meta->disk[i].sectors_hi,
meta->disk[i].id, meta->disk[i].flags);
}
for (i = 0; i < meta->total_volumes; i++) {
@@ -288,6 +370,7 @@ g_raid_md_intel_print(struct intel_raid_
printf(" state %u\n", mvol->state);
printf(" reserved %u\n", mvol->reserved);
printf(" curr_migr_unit %u\n", mvol->curr_migr_unit);
+ printf(" curr_migr_unit_hi %u\n", mvol->curr_migr_unit_hi);
printf(" checkpoint_id %u\n", mvol->checkpoint_id);
printf(" migr_state %u\n", mvol->migr_state);
printf(" migr_type %u\n", mvol->migr_type);
@@ -297,8 +380,11 @@ g_raid_md_intel_print(struct intel_raid_
printf(" *** Map %d ***\n", j);
mmap = intel_get_map(mvol, j);
printf(" offset %u\n", mmap->offset);
+ printf(" offset_hi %u\n", mmap->offset_hi);
printf(" disk_sectors %u\n", mmap->disk_sectors);
+ printf(" disk_sectors_hi %u\n", mmap->disk_sectors_hi);
printf(" stripe_count %u\n", mmap->stripe_count);
+ printf(" stripe_count_hi %u\n", mmap->stripe_count_hi);
printf(" strip_sectors %u\n", mmap->strip_sectors);
printf(" status %u\n", mmap->status);
printf(" type %u\n", mmap->type);
@@ -660,12 +746,15 @@ g_raid_md_intel_start_disk(struct g_raid
continue;
/* Make sure this disk is big enough. */
TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) {
+ off_t disk_sectors =
+ intel_get_disk_sectors(&pd->pd_disk_meta);
+
if (sd->sd_offset + sd->sd_size + 4096 >
- (off_t)pd->pd_disk_meta.sectors * 512) {
+ disk_sectors * 512) {
G_RAID_DEBUG1(1, sc,
"Disk too small (%llu < %llu)",
- ((unsigned long long)
- pd->pd_disk_meta.sectors) * 512,
+ (unsigned long long)
+ disk_sectors * 512,
(unsigned long long)
sd->sd_offset + sd->sd_size + 4096);
break;
@@ -788,7 +877,7 @@ nofit:
sd->sd_rebuild_pos = 0;
} else {
sd->sd_rebuild_pos =
- (off_t)mvol->curr_migr_unit *
+ intel_get_vol_curr_migr_unit(mvol) *
sd->sd_volume->v_strip_size *
mmap0->total_domains;
}
@@ -815,7 +904,7 @@ nofit:
sd->sd_rebuild_pos = 0;
} else {
sd->sd_rebuild_pos =
- (off_t)mvol->curr_migr_unit *
+ intel_get_vol_curr_migr_unit(mvol) *
sd->sd_volume->v_strip_size *
mmap0->total_domains;
}
@@ -967,8 +1056,8 @@ g_raid_md_intel_start(struct g_raid_soft
vol->v_sectorsize = 512; //ZZZ
for (j = 0; j < vol->v_disks_count; j++) {
sd = &vol->v_subdisks[j];
- sd->sd_offset = (off_t)mmap->offset * 512; //ZZZ
- sd->sd_size = (off_t)mmap->disk_sectors * 512; //ZZZ
+ sd->sd_offset = intel_get_map_offset(mmap) * 512; //ZZZ
+ sd->sd_size = intel_get_map_disk_sectors(mmap) * 512; //ZZZ
}
g_raid_start_volume(vol);
}
@@ -1176,9 +1265,6 @@ g_raid_md_taste_intel(struct g_raid_md_o
G_RAID_DEBUG(1,
"Intel vendor mismatch 0x%04x != 0x8086",
vendor);
- } else if (pp->mediasize / pp->sectorsize > UINT32_MAX) {
- G_RAID_DEBUG(1,
- "Intel disk '%s' is too big.", pp->name);
} else {
G_RAID_DEBUG(1,
"No Intel metadata, forcing spare.");
@@ -1195,10 +1281,10 @@ g_raid_md_taste_intel(struct g_raid_md_o
G_RAID_DEBUG(1, "Intel serial '%s' not found", serial);
goto fail1;
}
- if (meta->disk[disk_pos].sectors !=
+ if (intel_get_disk_sectors(&meta->disk[disk_pos]) !=
(pp->mediasize / pp->sectorsize)) {
G_RAID_DEBUG(1, "Intel size mismatch %ju != %ju",
- (off_t)meta->disk[disk_pos].sectors,
+ intel_get_disk_sectors(&meta->disk[disk_pos]),
(off_t)(pp->mediasize / pp->sectorsize));
goto fail1;
}
@@ -1266,7 +1352,8 @@ search:
pd->pd_disk_pos = -1;
if (spare == 2) {
memcpy(&pd->pd_disk_meta.serial[0], serial, INTEL_SERIAL_LEN);
- pd->pd_disk_meta.sectors = pp->mediasize / pp->sectorsize;
+ intel_set_disk_sectors(&pd->pd_disk_meta,
+ pp->mediasize / pp->sectorsize);
pd->pd_disk_meta.id = 0;
pd->pd_disk_meta.flags = INTEL_F_SPARE;
} else {
@@ -1372,7 +1459,7 @@ g_raid_md_ctl_intel(struct g_raid_md_obj
const char *verb, *volname, *levelname, *diskname;
char *tmp;
int *nargs, *force;
- off_t off, size, sectorsize, strip;
+ off_t off, size, sectorsize, strip, disk_sectors;
intmax_t *sizearg, *striparg;
int numdisks, i, len, level, qual, update;
int error;
@@ -1452,13 +1539,6 @@ g_raid_md_ctl_intel(struct g_raid_md_obj
cp->private = disk;
g_topology_unlock();
- if (pp->mediasize / pp->sectorsize > UINT32_MAX) {
- gctl_error(req,
- "Disk '%s' is too big.", diskname);
- error = -8;
- break;
- }
-
error = g_raid_md_get_label(cp,
&pd->pd_disk_meta.serial[0], INTEL_SERIAL_LEN);
if (error != 0) {
@@ -1479,7 +1559,8 @@ g_raid_md_ctl_intel(struct g_raid_md_obj
"Dumping not supported by %s.",
cp->provider->name);
- pd->pd_disk_meta.sectors = pp->mediasize / pp->sectorsize;
+ intel_set_disk_sectors(&pd->pd_disk_meta,
+ pp->mediasize / pp->sectorsize);
if (size > pp->mediasize)
size = pp->mediasize;
if (sectorsize < pp->sectorsize)
@@ -1544,10 +1625,6 @@ g_raid_md_ctl_intel(struct g_raid_md_obj
gctl_error(req, "Size too small.");
return (-13);
}
- if (size > 0xffffffffllu * sectorsize) {
- gctl_error(req, "Size too big.");
- return (-14);
- }
/* We have all we need, create things: volume, ... */
mdi->mdio_started = 1;
@@ -1655,8 +1732,11 @@ g_raid_md_ctl_intel(struct g_raid_md_obj
disk = vol1->v_subdisks[i].sd_disk;
pd = (struct g_raid_md_intel_perdisk *)
disk->d_md_data;
- if ((off_t)pd->pd_disk_meta.sectors * 512 < size)
- size = (off_t)pd->pd_disk_meta.sectors * 512;
+ disk_sectors =
+ intel_get_disk_sectors(&pd->pd_disk_meta);
+
+ if (disk_sectors * 512 < size)
+ size = disk_sectors * 512;
if (disk->d_consumer != NULL &&
disk->d_consumer->provider != NULL &&
disk->d_consumer->provider->sectorsize >
@@ -1950,14 +2030,6 @@ g_raid_md_ctl_intel(struct g_raid_md_obj
pp = cp->provider;
g_topology_unlock();
- if (pp->mediasize / pp->sectorsize > UINT32_MAX) {
- gctl_error(req,
- "Disk '%s' is too big.", diskname);
- g_raid_kill_consumer(sc, cp);
- error = -8;
- break;
- }
-
/* Read disk serial. */
error = g_raid_md_get_label(cp,
&serial[0], INTEL_SERIAL_LEN);
@@ -1990,7 +2062,8 @@ g_raid_md_ctl_intel(struct g_raid_md_obj
memcpy(&pd->pd_disk_meta.serial[0], &serial[0],
INTEL_SERIAL_LEN);
- pd->pd_disk_meta.sectors = pp->mediasize / pp->sectorsize;
+ intel_set_disk_sectors(&pd->pd_disk_meta,
+ pp->mediasize / pp->sectorsize);
pd->pd_disk_meta.id = 0;
pd->pd_disk_meta.flags = INTEL_F_SPARE;
@@ -2165,8 +2238,8 @@ g_raid_md_write_intel(struct g_raid_md_o
mmap0 = intel_get_map(mvol, 0);
/* Write map / common part of two maps. */
- mmap0->offset = sd->sd_offset / sectorsize;
- mmap0->disk_sectors = sd->sd_size / sectorsize;
+ intel_set_map_offset(mmap0, sd->sd_offset / sectorsize);
+ intel_set_map_disk_sectors(mmap0, sd->sd_size / sectorsize);
mmap0->strip_sectors = vol->v_strip_size / sectorsize;
if (vol->v_state == G_RAID_VOLUME_S_BROKEN)
mmap0->status = INTEL_S_FAILURE;
@@ -2188,15 +2261,15 @@ g_raid_md_write_intel(struct g_raid_md_o
mmap0->total_domains = 2;
else
mmap0->total_domains = 1;
- mmap0->stripe_count = sd->sd_size / vol->v_strip_size /
- mmap0->total_domains;
+ intel_set_map_stripe_count(mmap0,
+ sd->sd_size / vol->v_strip_size / mmap0->total_domains);
mmap0->failed_disk_num = 0xff;
mmap0->ddf = 1;
/* If there are two maps - copy common and update. */
if (mvol->migr_state) {
- mvol->curr_migr_unit = pos /
- vol->v_strip_size / mmap0->total_domains;
+ intel_set_vol_curr_migr_unit(mvol,
+ pos / vol->v_strip_size / mmap0->total_domains);
mmap1 = intel_get_map(mvol, 1);
memcpy(mmap1, mmap0, sizeof(struct intel_raid_map));
mmap0->status = INTEL_S_READY;
More information about the svn-src-stable-9
mailing list