ZFS patches
Chuck Silvers
chs at highcloudsecurity.com
Thu May 31 00:32:36 UTC 2012
we only have a few changes to ZFS itself, and now that I look I see that
you've found one of them independently (r230256).
the other ones are:
- improve performance of booting from a ZFS root under ESXi.
previously this would sit there for about 5 minutes before even
starting to load the kernel. the problem is that the ZFS pool-discovery
code opens every possible GPT partition looking for pools, and it rereads
the GPT each time, one sector at a time. we changed the GPT code to
read the whole GPT in one shot, which reduced the delay to almost nothing.
I remember seeing some discussion about a PR on this topic some time back
but I don't know if any fix was ever applied and I don't see the PR now.
as I recall, the proposal in that discussion was to improve the boot code
caching so that it wouldn't reread the GPT at all, which I imagine would
work just as well as what we did.
(hmm, this isn't actually a change to ZFS either.)
- make zfs_resilver_delay and zfs_resilver_min_time_ms tunable via sysctl.
patches for both of these are attached.
-Chuck
-------------- next part --------------
Index: sys/boot/i386/libi386/biosdisk.c
===================================================================
RCS file: /home/chs/freebsd/cvs/src/sys/boot/i386/libi386/biosdisk.c,v
retrieving revision 1.62.2.2.4.1
diff -u -p -r1.62.2.2.4.1 biosdisk.c
--- sys/boot/i386/libi386/biosdisk.c 21 Dec 2010 17:09:25 -0000 1.62.2.2.4.1
+++ sys/boot/i386/libi386/biosdisk.c 6 Jul 2011 20:44:31 -0000
@@ -853,9 +853,10 @@ bd_open_gpt(struct open_disk *od, struct
struct gpt_hdr *hdr;
struct gpt_ent *ent;
struct gpt_part *gp;
- int entries_per_sec, error, i, part;
+ int entries_per_sec, error, i, part, nsec;
daddr_t lba, elba;
char gpt[BIOSDISK_SECSIZE], tbl[BIOSDISK_SECSIZE];
+ char *buf, *tblp;
/*
* Following calculations attempt to determine the correct value
@@ -900,22 +901,31 @@ bd_open_gpt(struct open_disk *od, struct
return (EINVAL);
}
+ entries_per_sec = BIOSDISK_SECSIZE / hdr->hdr_entsz;
+ nsec = hdr->hdr_entries / entries_per_sec;
+ if (nsec > 128) {
+ DEBUG("too many GPT table sectors %d", nsec);
+ return (EINVAL);
+ }
+ buf = alloca(nsec * BIOSDISK_SECSIZE);
+ if (bd_read(od, hdr->hdr_lba_table, nsec, buf)) {
+ DEBUG("error reading GPT table");
+ return (EIO);
+ }
+
/* Now walk the partition table to count the number of valid partitions. */
part = 0;
- entries_per_sec = BIOSDISK_SECSIZE / hdr->hdr_entsz;
+ tblp = buf;
elba = hdr->hdr_lba_table + hdr->hdr_entries / entries_per_sec;
for (lba = hdr->hdr_lba_table; lba < elba; lba++) {
- if (bd_read(od, lba, 1, tbl)) {
- DEBUG("error reading GPT table");
- return (EIO);
- }
for (i = 0; i < entries_per_sec; i++) {
- ent = (struct gpt_ent *)(tbl + i * hdr->hdr_entsz);
+ ent = (struct gpt_ent *)(tblp + i * hdr->hdr_entsz);
if (uuid_is_nil(&ent->ent_type, NULL) || ent->ent_lba_start == 0 ||
ent->ent_lba_end < ent->ent_lba_start)
continue;
part++;
}
+ tblp += BIOSDISK_SECSIZE;
}
/* Save the important information about all the valid partitions. */
@@ -923,14 +933,10 @@ bd_open_gpt(struct open_disk *od, struct
if (part != 0) {
od->od_partitions = malloc(part * sizeof(struct gpt_part));
part = 0;
+ tblp = buf;
for (lba = hdr->hdr_lba_table; lba < elba; lba++) {
- if (bd_read(od, lba, 1, tbl)) {
- DEBUG("error reading GPT table");
- error = EIO;
- goto out;
- }
for (i = 0; i < entries_per_sec; i++) {
- ent = (struct gpt_ent *)(tbl + i * hdr->hdr_entsz);
+ ent = (struct gpt_ent *)(tblp + i * hdr->hdr_entsz);
if (uuid_is_nil(&ent->ent_type, NULL) ||
ent->ent_lba_start == 0 ||
ent->ent_lba_end < ent->ent_lba_start)
@@ -942,6 +948,7 @@ bd_open_gpt(struct open_disk *od, struct
od->od_partitions[part].gp_end = ent->ent_lba_end;
part++;
}
+ tblp += BIOSDISK_SECSIZE;
}
}
od->od_flags |= BD_GPTOK;
-------------- next part --------------
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c.ORG 2011-10-12 14:27:36.000000000 +0530
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c 2011-10-12 14:38:44.000000000 +0530
@@ -69,6 +69,14 @@
enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
int dsl_scan_delay_completion = B_FALSE; /* set to delay scan completion */
+SYSCTL_DECL(_vfs_zfs);
+TUNABLE_INT("vfs.zfs.zfs_resilver_delay", &zfs_resilver_delay);
+SYSCTL_INT(_vfs_zfs, OID_AUTO, zfs_resilver_delay, CTLFLAG_RW,
+ &zfs_resilver_delay, 0, "Resilver delay");
+TUNABLE_INT("vfs.zfs.zfs_resilver_min_time_ms", &zfs_resilver_min_time_ms);
+SYSCTL_INT(_vfs_zfs, OID_AUTO, zfs_resilver_min_time_ms, CTLFLAG_RW,
+ &zfs_resilver_min_time_ms, 0, "Resilver min time");
+
#define DSL_SCAN_IS_SCRUB_RESILVER(scn) \
((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB || \
(scn)->scn_phys.scn_func == POOL_SCAN_RESILVER)
More information about the zfs-devel
mailing list