svn commit: r202991 - in projects/suj: 6/sbin/fsck_ffs
6/sys/ufs/ffs 7/sbin/fsck_ffs 7/sbin/fsdb 7/sys/ufs/ffs
8/sbin/fsck_ffs 8/sbin/fsdb 8/sys/ufs/ffs
Jeff Roberson
jeff at FreeBSD.org
Mon Jan 25 23:30:53 UTC 2010
Author: jeff
Date: Mon Jan 25 23:30:53 2010
New Revision: 202991
URL: http://svn.freebsd.org/changeset/base/202991
Log:
- Merge r202989 and r202990 from suj/head
Modified:
projects/suj/6/sbin/fsck_ffs/fsck.h
projects/suj/6/sbin/fsck_ffs/main.c
projects/suj/6/sbin/fsck_ffs/suj.c
projects/suj/6/sys/ufs/ffs/ffs_inode.c
projects/suj/6/sys/ufs/ffs/ffs_softdep.c
projects/suj/6/sys/ufs/ffs/ffs_vfsops.c
projects/suj/6/sys/ufs/ffs/fs.h
projects/suj/7/sbin/fsck_ffs/fsck.h
projects/suj/7/sbin/fsck_ffs/main.c
projects/suj/7/sbin/fsck_ffs/suj.c
projects/suj/7/sbin/fsdb/fsdb.c
projects/suj/7/sys/ufs/ffs/ffs_inode.c
projects/suj/7/sys/ufs/ffs/ffs_softdep.c
projects/suj/7/sys/ufs/ffs/ffs_vfsops.c
projects/suj/7/sys/ufs/ffs/fs.h
projects/suj/8/sbin/fsck_ffs/fsck.h
projects/suj/8/sbin/fsck_ffs/main.c
projects/suj/8/sbin/fsck_ffs/suj.c
projects/suj/8/sbin/fsdb/fsdb.c
projects/suj/8/sys/ufs/ffs/ffs_inode.c
projects/suj/8/sys/ufs/ffs/ffs_softdep.c
projects/suj/8/sys/ufs/ffs/ffs_vfsops.c
projects/suj/8/sys/ufs/ffs/fs.h
Modified: projects/suj/6/sbin/fsck_ffs/fsck.h
==============================================================================
--- projects/suj/6/sbin/fsck_ffs/fsck.h Mon Jan 25 23:27:21 2010 (r202990)
+++ projects/suj/6/sbin/fsck_ffs/fsck.h Mon Jan 25 23:30:53 2010 (r202991)
@@ -385,4 +385,4 @@ void rwerror(const char *mesg, ufs2_dad
void sblock_init(void);
void setinodebuf(ino_t);
int setup(char *dev);
-void suj_check(const char *filesys);
+int suj_check(const char *filesys);
Modified: projects/suj/6/sbin/fsck_ffs/main.c
==============================================================================
--- projects/suj/6/sbin/fsck_ffs/main.c Mon Jan 25 23:27:21 2010 (r202990)
+++ projects/suj/6/sbin/fsck_ffs/main.c Mon Jan 25 23:30:53 2010 (r202991)
@@ -229,8 +229,9 @@ checkfilesys(char *filesys)
if ((fsreadfd = open(filesys, O_RDONLY)) < 0 || readsb(0) == 0)
exit(3); /* Cannot read superblock */
close(fsreadfd);
- if (sblock.fs_flags & FS_NEEDSFSCK)
- exit(4); /* Earlier background failed */
+ /* Earlier background failed or journaled */
+ if (sblock.fs_flags & (FS_NEEDSFSCK | FS_SUJ))
+ exit(4);
if ((sblock.fs_flags & FS_DOSOFTDEP) == 0)
exit(5); /* Not running soft updates */
size = MIBSIZE;
@@ -360,6 +361,23 @@ checkfilesys(char *filesys)
sblock.fs_cstotal.cs_nffree * 100.0 / sblock.fs_dsize);
return (0);
}
+ /*
+ * Determine if we can and should do journal recovery.
+ */
+ if ((sblock.fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == FS_SUJ) {
+ if (preen || reply("USE JOURNAL?")) {
+ if (suj_check(filesys) == 0)
+ goto out;
+ /* suj_check failed, fall through. */
+ }
+ printf("** Skipping journal, falling through to full fsck\n");
+ /*
+ * Write the superblock so we don't try to recover the
+ * journal on another pass.
+ */
+ sblock.fs_mtime = time(NULL);
+ sbdirty();
+ }
/*
* Cleared if any questions answered no. Used to decide if
@@ -454,7 +472,6 @@ checkfilesys(char *filesys)
inocleanup();
if (fsmodified) {
sblock.fs_time = time(NULL);
- sblock.fs_mtime = time(NULL);
sbdirty();
}
if (cvtlevel && sblk.b_dirty) {
@@ -485,6 +502,7 @@ checkfilesys(char *filesys)
printf("\n***** FILE SYSTEM WAS MODIFIED *****\n");
if (rerun)
printf("\n***** PLEASE RERUN FSCK *****\n");
+out:
if (mntp != NULL) {
/*
* We modified a mounted file system. Do a mount update on
Modified: projects/suj/6/sbin/fsck_ffs/suj.c
==============================================================================
--- projects/suj/6/sbin/fsck_ffs/suj.c Mon Jan 25 23:27:21 2010 (r202990)
+++ projects/suj/6/sbin/fsck_ffs/suj.c Mon Jan 25 23:30:53 2010 (r202991)
@@ -49,7 +49,8 @@ __FBSDID("$FreeBSD$");
static void ino_decr(ino_t);
-#define SUJ_HASHSIZE 128
+#define DOTDOT_OFFSET DIRECTSIZ(1)
+#define SUJ_HASHSIZE 2048
#define SUJ_HASHMASK (SUJ_HASHSIZE - 1)
#define SUJ_HASH(x) ((x * 2654435761) & SUJ_HASHMASK)
@@ -68,7 +69,9 @@ TAILQ_HEAD(srechd, suj_rec);
struct suj_ino {
LIST_ENTRY(suj_ino) si_next;
struct srechd si_recs;
+ struct srechd si_newrecs;
struct srechd si_movs;
+ struct jtrncrec *si_trunc;
ino_t si_ino;
int si_nlinkadj;
int si_skipparent;
@@ -90,6 +93,7 @@ struct data_blk {
uint8_t *db_buf;
ufs2_daddr_t db_blk;
int db_size;
+ int db_dirty;
};
struct ino_blk {
@@ -106,6 +110,8 @@ struct suj_cg {
struct inohd sc_inohash[SUJ_HASHSIZE];
struct iblkhd sc_iblkhash[SUJ_HASHSIZE];
struct ino_blk *sc_lastiblk;
+ struct suj_ino *sc_lastino;
+ struct suj_blk *sc_lastblk;
uint8_t *sc_cgbuf;
struct cg *sc_cgp;
int sc_dirty;
@@ -114,6 +120,8 @@ struct suj_cg {
LIST_HEAD(cghd, suj_cg) cghash[SUJ_HASHSIZE];
LIST_HEAD(dblkhd, data_blk) dbhash[SUJ_HASHSIZE];
+struct suj_cg *lastcg;
+struct data_blk *lastblk;
TAILQ_HEAD(seghd, suj_seg) allsegs;
uint64_t oldseq;
@@ -131,6 +139,8 @@ uint64_t jbytes;
uint64_t jrecs;
typedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int);
+static void ino_trunc(ino_t ino, off_t size);
+static void ino_build(struct suj_ino *sino);
static void *
errmalloc(size_t n)
@@ -159,12 +169,6 @@ opendisk(const char *devnam)
disk->d_error);
}
fs = &disk->d_fs;
- /*
- * Setup a few things so reply() can work.
- */
- bcopy(fs, &sblock, sizeof(sblock));
- fsreadfd = disk->d_fd;
- fswritefd = disk->d_fd;
}
/*
@@ -198,8 +202,6 @@ closedisk(const char *devnam)
free(disk);
disk = NULL;
fs = NULL;
- fsreadfd = -1;
- fswritefd = -1;
}
/*
@@ -216,10 +218,14 @@ cg_lookup(int cgx)
abort();
errx(1, "Bad cg number %d", cgx);
}
+ if (lastcg && lastcg->sc_cgx == cgx)
+ return (lastcg);
hd = &cghash[SUJ_HASH(cgx)];
LIST_FOREACH(sc, hd, sc_next)
- if (sc->sc_cgx == cgx)
+ if (sc->sc_cgx == cgx) {
+ lastcg = sc;
return (sc);
+ }
sc = errmalloc(sizeof(*sc));
bzero(sc, sizeof(*sc));
sc->sc_cgbuf = errmalloc(fs->fs_bsize);
@@ -245,6 +251,8 @@ ino_lookup(ino_t ino, int creat)
struct suj_cg *sc;
sc = cg_lookup(ino_to_cg(fs, ino));
+ if (sc->sc_lastino && sc->sc_lastino->si_ino == ino)
+ return (sc->sc_lastino);
hd = &sc->sc_inohash[SUJ_HASH(ino)];
LIST_FOREACH(sino, hd, si_next)
if (sino->si_ino == ino)
@@ -256,6 +264,7 @@ ino_lookup(ino_t ino, int creat)
sino->si_ino = ino;
sino->si_nlinkadj = 0;
TAILQ_INIT(&sino->si_recs);
+ TAILQ_INIT(&sino->si_newrecs);
TAILQ_INIT(&sino->si_movs);
LIST_INSERT_HEAD(hd, sino, si_next);
@@ -274,7 +283,9 @@ blk_lookup(ufs2_daddr_t blk, int creat)
struct blkhd *hd;
sc = cg_lookup(dtog(fs, blk));
- hd = &sc->sc_blkhash[SUJ_HASH(blk)];
+ if (sc->sc_lastblk && sc->sc_lastblk->sb_blk == blk)
+ return (sc->sc_lastblk);
+ hd = &sc->sc_blkhash[SUJ_HASH(fragstoblks(fs, blk))];
LIST_FOREACH(sblk, hd, sb_next)
if (sblk->sb_blk == blk)
return (sblk);
@@ -289,16 +300,18 @@ blk_lookup(ufs2_daddr_t blk, int creat)
return (sblk);
}
-static uint8_t *
-dblk_read(ufs2_daddr_t blk, int size)
+static struct data_blk *
+dblk_lookup(ufs2_daddr_t blk)
{
struct data_blk *dblk;
struct dblkhd *hd;
- hd = &dbhash[SUJ_HASH(blk)];
+ hd = &dbhash[SUJ_HASH(fragstoblks(fs, blk))];
+ if (lastblk && lastblk->db_blk == blk)
+ return (lastblk);
LIST_FOREACH(dblk, hd, db_next)
if (dblk->db_blk == blk)
- goto found;
+ return (dblk);
/*
* The inode block wasn't located, allocate a new one.
*/
@@ -306,7 +319,15 @@ dblk_read(ufs2_daddr_t blk, int size)
bzero(dblk, sizeof(*dblk));
LIST_INSERT_HEAD(hd, dblk, db_next);
dblk->db_blk = blk;
-found:
+ return (dblk);
+}
+
+static uint8_t *
+dblk_read(ufs2_daddr_t blk, int size)
+{
+ struct data_blk *dblk;
+
+ dblk = dblk_lookup(blk);
/*
* I doubt size mismatches can happen in practice but it is trivial
* to handle.
@@ -322,6 +343,33 @@ found:
return (dblk->db_buf);
}
+static void
+dblk_dirty(ufs2_daddr_t blk)
+{
+ struct data_blk *dblk;
+
+ dblk = dblk_lookup(blk);
+ dblk->db_dirty = 1;
+}
+
+static void
+dblk_write(void)
+{
+ struct data_blk *dblk;
+ int i;
+
+ for (i = 0; i < SUJ_HASHSIZE; i++) {
+ LIST_FOREACH(dblk, &dbhash[i], db_next) {
+ if (dblk->db_dirty == 0 || dblk->db_size == 0)
+ continue;
+ if (bwrite(disk, fsbtodb(fs, dblk->db_blk),
+ dblk->db_buf, dblk->db_size) == -1)
+ err(1, "Unable to write block %jd",
+ dblk->db_blk);
+ }
+ }
+}
+
static union dinode *
ino_read(ino_t ino)
{
@@ -333,7 +381,10 @@ ino_read(ino_t ino)
blk = ino_to_fsba(fs, ino);
sc = cg_lookup(ino_to_cg(fs, ino));
- hd = &sc->sc_iblkhash[SUJ_HASH(blk)];
+ iblk = sc->sc_lastiblk;
+ if (iblk && iblk->ib_blk == blk)
+ goto found;
+ hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))];
LIST_FOREACH(iblk, hd, ib_next)
if (iblk->ib_blk == blk)
goto found;
@@ -371,7 +422,7 @@ ino_dirty(ino_t ino)
iblk->ib_dirty = 1;
return;
}
- hd = &sc->sc_iblkhash[SUJ_HASH(blk)];
+ hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))];
LIST_FOREACH(iblk, hd, ib_next) {
if (iblk->ib_blk == blk) {
iblk->ib_dirty = 1;
@@ -612,22 +663,22 @@ blk_free(ufs2_daddr_t bno, int mask, int
* to fetch a specific block.
*/
static ufs2_daddr_t
-indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn, int level)
+indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn)
{
ufs2_daddr_t *bap2;
ufs2_daddr_t *bap1;
ufs_lbn_t lbnadd;
ufs_lbn_t base;
+ int level;
int i;
if (blk == 0)
return (0);
- if (cur == lbn)
- return (blk);
- if (level == 0 && lbn < 0) {
- abort();
+ level = lbn_level(cur);
+ if (level == -1)
+ errx(1, "Invalid indir lbn %jd", lbn);
+ if (level == 0 && lbn < 0)
errx(1, "Invalid lbn %jd", lbn);
- }
bap2 = (void *)dblk_read(blk, fs->fs_bsize);
bap1 = (void *)bap2;
lbnadd = 1;
@@ -638,11 +689,9 @@ indir_blkatoff(ufs2_daddr_t blk, ino_t i
i = (lbn - base) / lbnadd;
else
i = (-lbn - base) / lbnadd;
- if (i < 0 || i >= NINDIR(fs)) {
- abort();
+ if (i < 0 || i >= NINDIR(fs))
errx(1, "Invalid indirect index %d produced by lbn %jd",
i, lbn);
- }
if (level == 0)
cur = base + (i * lbnadd);
else
@@ -657,7 +706,7 @@ indir_blkatoff(ufs2_daddr_t blk, ino_t i
abort();
errx(1, "Invalid lbn %jd at level 0", lbn);
}
- return indir_blkatoff(blk, ino, cur, lbn, level - 1);
+ return indir_blkatoff(blk, ino, cur, lbn);
}
/*
@@ -685,14 +734,10 @@ ino_blkatoff(union dinode *ip, ino_t ino
return (ip->dp2.di_extb[lbn]);
}
/*
- * And now direct and indirect. Verify that the lbn does not
- * exceed the size required to store the file by asking for
- * the lbn of the last byte. These blocks should be 0 anyway
- * so this simply saves the traversal.
+ * Now direct and indirect.
*/
- if (lbn > 0 && lbn > lblkno(fs, DIP(ip, di_size) - 1))
- return (0);
- if (lbn < 0 && -lbn > lblkno(fs, DIP(ip, di_size) - 1))
+ if (DIP(ip, di_mode) == IFLNK &&
+ DIP(ip, di_size) < fs->fs_maxsymlinklen)
return (0);
if (lbn >= 0 && lbn < NDADDR) {
*frags = numfrags(fs, sblksize(fs, DIP(ip, di_size), lbn));
@@ -703,7 +748,7 @@ ino_blkatoff(union dinode *ip, ino_t ino
for (i = 0, tmpval = NINDIR(fs), cur = NDADDR; i < NIADDR; i++,
tmpval *= NINDIR(fs), cur = next) {
next = cur + tmpval;
- if (lbn == -cur)
+ if (lbn == -cur - i)
return (DIP(ip, di_ib[i]));
/*
* Determine whether the lbn in question is within this tree.
@@ -712,8 +757,7 @@ ino_blkatoff(union dinode *ip, ino_t ino
continue;
if (lbn > 0 && lbn >= next)
continue;
-
- return indir_blkatoff(DIP(ip, di_ib[i]), ino, -cur - i, lbn, i);
+ return indir_blkatoff(DIP(ip, di_ib[i]), ino, -cur - i, lbn);
}
errx(1, "lbn %jd not in ino", lbn);
}
@@ -760,7 +804,10 @@ ino_isat(ino_t parent, off_t diroff, ino
*mode = DIP(dip, di_mode);
if ((*mode & IFMT) != IFDIR) {
if (debug) {
- /* This can happen if the parent inode was reallocated. */
+ /*
+ * This can happen if the parent inode
+ * was reallocated.
+ */
if (*mode != 0)
printf("Directory %d has bad mode %o\n",
parent, *mode);
@@ -791,7 +838,7 @@ ino_isat(ino_t parent, off_t diroff, ino
* certain we hit a valid record and not some junk in the middle
* of a file name. Stop when we reach or pass the expected offset.
*/
- dpoff = 0;
+ dpoff = (doff / DIRBLKSIZ) * DIRBLKSIZ;
do {
dp = (struct direct *)&block[dpoff];
if (dpoff == doff)
@@ -801,7 +848,7 @@ ino_isat(ino_t parent, off_t diroff, ino
dpoff += dp->d_reclen;
} while (dpoff <= doff);
if (dpoff > fs->fs_bsize)
- errx(1, "Corrupt directory block in dir inode %d", parent);
+ errx(1, "Corrupt directory block in dir ino %d", parent);
/* Not found. */
if (dpoff != doff) {
if (debug)
@@ -830,6 +877,7 @@ ino_isat(ino_t parent, off_t diroff, ino
#define VISIT_INDIR 0x0001
#define VISIT_EXT 0x0002
+#define VISIT_ROOT 0x0004 /* Operation came via root & valid pointers. */
/*
* Read an indirect level which may or may not be linked into an inode.
@@ -854,16 +902,14 @@ indir_visit(ino_t ino, ufs_lbn_t lbn, uf
*/
if (blk == 0)
return;
- if (blk_isindir(blk, ino, lbn) == 0) {
- if (debug)
- printf("blk %jd ino %d lbn %jd is not indir.\n",
- blk, ino, lbn);
- goto out;
- }
level = lbn_level(lbn);
- if (level == -1) {
- abort();
+ if (level == -1)
errx(1, "Invalid level for lbn %jd", lbn);
+ if ((flags & VISIT_ROOT) == 0 && blk_isindir(blk, ino, lbn) == 0) {
+ if (debug)
+ printf("blk %jd ino %d lbn %jd(%d) is not indir.\n",
+ blk, ino, lbn, level);
+ goto out;
}
lbnadd = 1;
for (i = level; i > 0; i--)
@@ -903,6 +949,7 @@ out:
static uint64_t
ino_visit(union dinode *ip, ino_t ino, ino_visitor visitor, int flags)
{
+ ufs_lbn_t nextlbn;
ufs_lbn_t tmpval;
ufs_lbn_t lbn;
uint64_t size;
@@ -937,8 +984,15 @@ ino_visit(union dinode *ip, ino_t ino, i
fragcnt += frags;
visitor(ino, i, DIP(ip, di_db[i]), frags);
}
+ /*
+ * We know the following indirects are real as we're following
+ * real pointers to them.
+ */
+ flags |= VISIT_ROOT;
for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++,
- tmpval *= NINDIR(fs), lbn += tmpval) {
+ lbn = nextlbn) {
+ nextlbn = lbn + tmpval;
+ tmpval *= NINDIR(fs);
if (DIP(ip, di_ib[i]) == 0)
continue;
indir_visit(ino, -lbn - i, DIP(ip, di_ib[i]), &fragcnt, visitor,
@@ -948,11 +1002,15 @@ ino_visit(union dinode *ip, ino_t ino, i
}
/*
- * Null visitor function used when we just want to count blocks.
+ * Null visitor function used when we just want to count blocks and
+ * record the lbn.
*/
+ufs_lbn_t visitlbn;
static void
null_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
{
+ if (lbn > 0)
+ visitlbn = lbn;
}
/*
@@ -962,23 +1020,45 @@ null_visit(ino_t ino, ufs_lbn_t lbn, ufs
* reachable at the time the inode was written.
*/
static void
-ino_adjblks(ino_t ino)
+ino_adjblks(struct suj_ino *sino)
{
- struct suj_ino *sino;
union dinode *ip;
uint64_t blocks;
uint64_t frags;
+ off_t isize;
+ off_t size;
+ ino_t ino;
- sino = ino_lookup(ino, 1);
- if (sino->si_blkadj)
- return;
- sino->si_blkadj = 1;
+ ino = sino->si_ino;
ip = ino_read(ino);
/* No need to adjust zero'd inodes. */
if (DIP(ip, di_mode) == 0)
return;
+ /*
+ * Visit all blocks and count them as well as recording the last
+ * valid lbn in the file. If the file size doesn't agree with the
+ * last lbn we need to truncate to fix it. Otherwise just adjust
+ * the blocks count.
+ */
+ visitlbn = 0;
frags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT);
blocks = fsbtodb(fs, frags);
+ /*
+ * We assume the size and direct block list is kept coherent by
+ * softdep. For files that have extended into indirects we truncate
+ * to the size in the inode or the maximum size permitted by
+ * populated indirects.
+ */
+ if (visitlbn >= NDADDR) {
+ isize = DIP(ip, di_size);
+ size = lblktosize(fs, visitlbn + 1);
+ printf("ino %d isize %jd size %jd\n", ino, isize, size);
+ if (isize > size)
+ isize = size;
+ /* Always truncate to free any unpopulated indirects. */
+ ino_trunc(sino->si_ino, isize);
+ return;
+ }
if (blocks == DIP(ip, di_blocks))
return;
if (debug)
@@ -1021,6 +1101,16 @@ blk_free_lbn(ufs2_daddr_t blk, ino_t ino
}
static void
+ino_setskip(struct suj_ino *sino, ino_t parent)
+{
+ int isdot;
+ int mode;
+
+ if (ino_isat(sino->si_ino, DOTDOT_OFFSET, parent, &mode, &isdot))
+ sino->si_skipparent = 1;
+}
+
+static void
ino_free_children(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
{
struct suj_ino *sino;
@@ -1053,7 +1143,7 @@ ino_free_children(ino_t ino, ufs_lbn_t l
if (isparent && skipparent == 1)
continue;
if (debug)
- printf("Directory %d removing inode %d name %s\n",
+ printf("Directory %d removing ino %d name %s\n",
ino, dp->d_ino, dp->d_name);
/*
* Lookup this inode to see if we have a record for it.
@@ -1070,7 +1160,7 @@ ino_free_children(ino_t ino, ufs_lbn_t l
* parent. Don't try to adjust our link down again.
*/
if (isparent == 0)
- sino->si_skipparent = 1;
+ ino_setskip(sino, ino);
/*
* If we haven't yet processed this inode we need to make
* sure we will successfully discover the lost path. If not
@@ -1084,16 +1174,16 @@ ino_free_children(ino_t ino, ufs_lbn_t l
break;
}
if (srec == NULL)
- sino->si_nlinkadj--;
+ sino->si_nlinkadj++;
}
}
/*
- * Truncate an inode, freeing all blocks and decrementing all children's
+ * Reclaim an inode, freeing all blocks and decrementing all children's
* link counts. Free the inode back to the cg.
*/
static void
-ino_truncate(union dinode *ip, ino_t ino, int mode)
+ino_reclaim(union dinode *ip, ino_t ino, int mode)
{
uint32_t gen;
@@ -1147,7 +1237,7 @@ ino_decr(ino_t ino)
if (debug)
printf("ino %d not enough links to live %d < %d\n",
ino, nlink, reqlink);
- ino_truncate(ip, ino, mode);
+ ino_reclaim(ip, ino, mode);
return;
}
DIP_SET(ip, di_nlink, nlink);
@@ -1192,7 +1282,7 @@ ino_adjust(ino_t ino, int lastmode, nlin
if (debug)
printf("ino %d not enough links to live %d < %d\n",
ino, nlink, reqlink);
- ino_truncate(ip, ino, mode);
+ ino_reclaim(ip, ino, mode);
return;
}
/* If required write the updated link count. */
@@ -1205,13 +1295,194 @@ ino_adjust(ino_t ino, int lastmode, nlin
ino_dirty(ino);
}
-#define DOTDOT_OFFSET DIRECTSIZ(1)
+/*
+ * Truncate some or all blocks in an indirect, freeing any that are required
+ * and zeroing the indirect.
+ */
+static void
+indir_trunc(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, ufs_lbn_t lastlbn)
+{
+ ufs2_daddr_t *bap2;
+ ufs1_daddr_t *bap1;
+ ufs_lbn_t lbnadd;
+ ufs2_daddr_t nblk;
+ ufs_lbn_t next;
+ ufs_lbn_t nlbn;
+ int dirty;
+ int level;
+ int i;
+
+ if (blk == 0)
+ return;
+ dirty = 0;
+ level = lbn_level(lbn);
+ if (level == -1)
+ errx(1, "Invalid level for lbn %jd", lbn);
+ lbnadd = 1;
+ for (i = level; i > 0; i--)
+ lbnadd *= NINDIR(fs);
+ bap1 = (void *)dblk_read(blk, fs->fs_bsize);
+ bap2 = (void *)bap1;
+ for (i = 0; i < NINDIR(fs); i++) {
+ if (fs->fs_magic == FS_UFS1_MAGIC)
+ nblk = *bap1++;
+ else
+ nblk = *bap2++;
+ if (nblk == 0)
+ continue;
+ if (level != 0) {
+ nlbn = (lbn + 1) - (i * lbnadd);
+ /*
+ * Calculate the lbn of the next indirect to
+ * determine if any of this indirect must be
+ * reclaimed.
+ */
+ next = -(lbn + level) + ((i+1) * lbnadd);
+ if (next <= lastlbn)
+ continue;
+ indir_trunc(ino, nlbn, nblk, lastlbn);
+ /* If all of this indirect was reclaimed, free it. */
+ nlbn = next - lbnadd;
+ if (nlbn < lastlbn)
+ continue;
+ } else {
+ nlbn = -lbn + i * lbnadd;
+ if (nlbn < lastlbn)
+ continue;
+ }
+ dirty = 1;
+ blk_free(nblk, 0, fs->fs_frag);
+ if (fs->fs_magic == FS_UFS1_MAGIC)
+ *(bap1 - 1) = 0;
+ else
+ *(bap2 - 1) = 0;
+ }
+ if (dirty)
+ dblk_dirty(blk);
+}
+
+/*
+ * Truncate an inode to the minimum of the given size or the last populated
+ * block after any over size have been discarded. The kernel would allocate
+ * the last block in the file but fsck does not and neither do we. This
+ * code never extends files, only shrinks them.
+ */
+static void
+ino_trunc(ino_t ino, off_t size)
+{
+ union dinode *ip;
+ ufs2_daddr_t bn;
+ uint64_t totalfrags;
+ ufs_lbn_t nextlbn;
+ ufs_lbn_t lastlbn;
+ ufs_lbn_t tmpval;
+ ufs_lbn_t lbn;
+ ufs_lbn_t i;
+ int frags;
+ off_t cursize;
+ off_t off;
+ int mode;
+
+ ip = ino_read(ino);
+ mode = DIP(ip, di_mode) & IFMT;
+ cursize = DIP(ip, di_size);
+ if (debug)
+ printf("Truncating ino %d, mode %o to size %jd from size %jd\n",
+ ino, mode, size, cursize);
+
+ /* Skip datablocks for short links and devices. */
+ if (mode == 0 || mode == IFBLK || mode == IFCHR ||
+ (mode == IFLNK && cursize < fs->fs_maxsymlinklen))
+ return;
+ /* Don't extend. */
+ if (size > cursize)
+ size = cursize;
+ lastlbn = lblkno(fs, blkroundup(fs, size));
+ for (i = lastlbn; i < NDADDR; i++) {
+ if (DIP(ip, di_db[i]) == 0)
+ continue;
+ frags = sblksize(fs, cursize, i);
+ frags = numfrags(fs, frags);
+ blk_free(DIP(ip, di_db[i]), 0, frags);
+ DIP_SET(ip, di_db[i], 0);
+ }
+ /*
+ * Follow indirect blocks, freeing anything required.
+ */
+ for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++,
+ lbn = nextlbn) {
+ nextlbn = lbn + tmpval;
+ tmpval *= NINDIR(fs);
+ /* If we're not freeing any in this indirect range skip it. */
+ if (lastlbn >= nextlbn)
+ continue;
+ if (DIP(ip, di_ib[i]) == 0)
+ continue;
+ indir_trunc(ino, -lbn - i, DIP(ip, di_ib[i]), lastlbn);
+ /* If we freed everything in this indirect free the indir. */
+ if (lastlbn > lbn)
+ continue;
+ blk_free(DIP(ip, di_ib[i]), 0, frags);
+ DIP_SET(ip, di_ib[i], 0);
+ }
+ ino_dirty(ino);
+ /*
+ * Now that we've freed any whole blocks that exceed the desired
+ * truncation size, figure out how many blocks remain and what the
+ * last populated lbn is. We will set the size to this last lbn
+ * rather than worrying about allocating the final lbn as the kernel
+ * would've done. This is consistent with normal fsck behavior.
+ */
+ visitlbn = 0;
+ totalfrags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT);
+ if (size > lblktosize(fs, visitlbn + 1))
+ size = lblktosize(fs, visitlbn + 1);
+ /*
+ * If we're truncating direct blocks we have to adjust frags
+ * accordingly.
+ */
+ if (visitlbn < NDADDR) {
+ long oldspace, newspace;
+
+ bn = DIP(ip, di_db[visitlbn]);
+ oldspace = sblksize(fs, cursize, visitlbn);
+ newspace = sblksize(fs, size, visitlbn);
+ if (oldspace != newspace) {
+ bn += numfrags(fs, newspace);
+ frags = numfrags(fs, oldspace - newspace);
+ blk_free(bn, 0, frags);
+ totalfrags -= frags;
+ }
+ }
+ DIP_SET(ip, di_blocks, fsbtodb(fs, totalfrags));
+ DIP_SET(ip, di_size, size);
+ /*
+ * If we've truncated into the middle of a block or frag we have
+ * to zero it here. Otherwise the file could extend into
+ * uninitialized space later.
+ */
+ off = blkoff(fs, size);
+ if (off) {
+ uint8_t *buf;
+ long clrsize;
+
+ bn = ino_blkatoff(ip, ino, visitlbn, &frags);
+ if (bn == 0)
+ errx(1, "Block missing from ino %d at lbn %jd\n",
+ ino, visitlbn);
+ clrsize = frags * fs->fs_fsize;
+ buf = dblk_read(bn, clrsize);
+ clrsize -= off;
+ buf += off;
+ bzero(buf, clrsize);
+ dblk_dirty(bn);
+ }
+ return;
+}
/*
* Process records available for one inode and determine whether the
* link count is correct or needs adjusting.
- *
- * XXX Failed to fix zero length directory. Shouldn't .. have been mising?
*/
static void
ino_check(struct suj_ino *sino)
@@ -1228,6 +1499,15 @@ ino_check(struct suj_ino *sino)
int isat;
int mode;
+ /*
+ * Handle truncations that were not complete. We don't have
+ * to worry about truncating directory entries as they must have
+ * been removed for truncate to succeed.
+ */
+ if (sino->si_trunc) {
+ ino_trunc(ino, sino->si_trunc->jt_size);
+ sino->si_trunc = NULL;
+ }
if (sino->si_hasrecs == 0)
return;
ino = sino->si_ino;
@@ -1239,9 +1519,9 @@ ino_check(struct suj_ino *sino)
return;
rrec = (struct jrefrec *)TAILQ_FIRST(&sino->si_recs)->sr_rec;
nlink = rrec->jr_nlink;
- newlinks = sino->si_nlinkadj;
+ newlinks = 0;
dotlinks = 0;
- removes = 0;
+ removes = sino->si_nlinkadj;
TAILQ_FOREACH(srec, &sino->si_recs, sr_next) {
rrec = (struct jrefrec *)srec->sr_rec;
isat = ino_isat(rrec->jr_parent, rrec->jr_diroff,
@@ -1286,7 +1566,7 @@ ino_check(struct suj_ino *sino)
if (rrec->jr_diroff == DOTDOT_OFFSET) {
stmp = ino_lookup(rrec->jr_parent, 0);
if (stmp)
- stmp->si_skipparent = 1;
+ ino_setskip(stmp, ino);
}
}
}
@@ -1304,6 +1584,7 @@ blk_check(struct suj_blk *sblk)
{
struct suj_rec *srec;
struct jblkrec *brec;
+ struct suj_ino *sino;
ufs2_daddr_t blk;
int mask;
int frags;
@@ -1318,6 +1599,10 @@ blk_check(struct suj_blk *sblk)
frags = brec->jb_frags;
blk = brec->jb_blkno + brec->jb_oldfrags;
isat = blk_isat(brec->jb_ino, brec->jb_lbn, blk, &frags);
+ if (sino == NULL || sino->si_ino != brec->jb_ino) {
+ sino = ino_lookup(brec->jb_ino, 1);
+ sino->si_blkadj = 1;
+ }
if (debug)
printf("op %d blk %jd ino %d lbn %jd frags %d isat %d (%d)\n",
brec->jb_op, blk, brec->jb_ino, brec->jb_lbn,
@@ -1336,7 +1621,6 @@ blk_check(struct suj_blk *sblk)
blk += frags;
frags = brec->jb_frags - frags;
blk_free(blk, mask, frags);
- ino_adjblks(brec->jb_ino);
continue;
}
/*
@@ -1349,19 +1633,31 @@ blk_check(struct suj_blk *sblk)
*/
blk_free_lbn(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags,
brec->jb_op == JOP_FREEBLK);
- ino_adjblks(brec->jb_ino);
}
}
/*
+ * Walk the list of inode records for this cg and resolve moved and duplicate
+ * inode references now that we have a complete picture.
+ */
+static void
+cg_build(struct suj_cg *sc)
+{
+ struct suj_ino *sino;
+ int i;
+
+ for (i = 0; i < SUJ_HASHSIZE; i++)
+ LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
+ ino_build(sino);
+}
+
+/*
* Walk the list of inode and block records for this cg, recovering any
* changes which were not complete at the time of crash.
*/
static void
cg_check(struct suj_cg *sc)
{
- struct suj_blk *nextb;
- struct suj_ino *nexti;
struct suj_ino *sino;
struct suj_blk *sblk;
int i;
@@ -1370,32 +1666,43 @@ cg_check(struct suj_cg *sc)
printf("Recovering cg %d\n", sc->sc_cgx);
for (i = 0; i < SUJ_HASHSIZE; i++)
- LIST_FOREACH_SAFE(sino, &sc->sc_inohash[i], si_next, nexti)
+ LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
ino_check(sino);
for (i = 0; i < SUJ_HASHSIZE; i++)
- LIST_FOREACH_SAFE(sblk, &sc->sc_blkhash[i], sb_next, nextb)
+ LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next)
blk_check(sblk);
}
/*
- * Write a potentially dirty cg. All inodes must be written before the
- * cg maps are so that an allocated inode is never marked free, even if
- * we crash during fsck.
+ * Now that we've freed blocks which are not referenced we make a second
+ * pass over all inodes to adjust their block counts.
+ */
+static void
+cg_check2(struct suj_cg *sc)
+{
+ struct suj_ino *sino;
+ int i;
+
+ for (i = 0; i < SUJ_HASHSIZE; i++)
+ LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
+ if (sino->si_blkadj)
+ ino_adjblks(sino);
+}
+
+/*
+ * Write a potentially dirty cg. Recalculate the summary information and
+ * update the superblock summary.
*/
static void
cg_write(struct suj_cg *sc)
{
- struct ino_blk *iblk;
ufs1_daddr_t fragno, cgbno, maxbno;
u_int8_t *blksfree;
struct cg *cgp;
int blk;
int i;
- for (i = 0; i < SUJ_HASHSIZE; i++)
- LIST_FOREACH(iblk, &sc->sc_iblkhash[i], ib_next)
- iblk_write(iblk);
if (sc->sc_dirty == 0)
return;
/*
@@ -1437,6 +1744,21 @@ cg_write(struct suj_cg *sc)
err(1, "Unable to write cylinder group %d", sc->sc_cgx);
}
+/*
+ * Write out any modified inodes.
+ */
+static void
+cg_write_inos(struct suj_cg *sc)
+{
+ struct ino_blk *iblk;
+ int i;
+
+ for (i = 0; i < SUJ_HASHSIZE; i++)
+ LIST_FOREACH(iblk, &sc->sc_iblkhash[i], ib_next)
+ if (iblk->ib_dirty)
+ iblk_write(iblk);
+}
+
static void
cg_apply(void (*apply)(struct suj_cg *))
{
@@ -1473,7 +1795,7 @@ ino_unlinked(void)
if (debug)
printf("Freeing unlinked ino %d mode %o\n",
ino, mode);
- ino_truncate(ip, ino, mode);
+ ino_reclaim(ip, ino, mode);
} else if (debug)
printf("Skipping ino %d mode %o with link %d\n",
ino, mode, DIP(ip, di_nlink));
@@ -1482,6 +1804,29 @@ ino_unlinked(void)
}
/*
+ * Append a new record to the list of records requiring processing.
+ */
+static void
+ino_append(union jrec *rec)
+{
+ struct suj_ino *sino;
+ struct suj_rec *srec;
+
+ /*
+ * Lookup the ino and clear truncate if one is found. Partial
+ * truncates are always done synchronously so if we discover
+ * an operation that requires a lock the truncation has completed
+ * and can be discarded.
+ */
+ sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1);
+ sino->si_trunc = NULL;
+ sino->si_hasrecs = 1;
+ srec = errmalloc(sizeof(*srec));
+ srec->sr_rec = rec;
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-projects
mailing list