svn commit: r202991 - in projects/suj: 6/sbin/fsck_ffs 6/sys/ufs/ffs 7/sbin/fsck_ffs 7/sbin/fsdb 7/sys/ufs/ffs 8/sbin/fsck_ffs 8/sbin/fsdb 8/sys/ufs/ffs

Jeff Roberson jeff at FreeBSD.org
Mon Jan 25 23:30:53 UTC 2010


Author: jeff
Date: Mon Jan 25 23:30:53 2010
New Revision: 202991
URL: http://svn.freebsd.org/changeset/base/202991

Log:
   - Merge r202989 and r202990 from suj/head

Modified:
  projects/suj/6/sbin/fsck_ffs/fsck.h
  projects/suj/6/sbin/fsck_ffs/main.c
  projects/suj/6/sbin/fsck_ffs/suj.c
  projects/suj/6/sys/ufs/ffs/ffs_inode.c
  projects/suj/6/sys/ufs/ffs/ffs_softdep.c
  projects/suj/6/sys/ufs/ffs/ffs_vfsops.c
  projects/suj/6/sys/ufs/ffs/fs.h
  projects/suj/7/sbin/fsck_ffs/fsck.h
  projects/suj/7/sbin/fsck_ffs/main.c
  projects/suj/7/sbin/fsck_ffs/suj.c
  projects/suj/7/sbin/fsdb/fsdb.c
  projects/suj/7/sys/ufs/ffs/ffs_inode.c
  projects/suj/7/sys/ufs/ffs/ffs_softdep.c
  projects/suj/7/sys/ufs/ffs/ffs_vfsops.c
  projects/suj/7/sys/ufs/ffs/fs.h
  projects/suj/8/sbin/fsck_ffs/fsck.h
  projects/suj/8/sbin/fsck_ffs/main.c
  projects/suj/8/sbin/fsck_ffs/suj.c
  projects/suj/8/sbin/fsdb/fsdb.c
  projects/suj/8/sys/ufs/ffs/ffs_inode.c
  projects/suj/8/sys/ufs/ffs/ffs_softdep.c
  projects/suj/8/sys/ufs/ffs/ffs_vfsops.c
  projects/suj/8/sys/ufs/ffs/fs.h

Modified: projects/suj/6/sbin/fsck_ffs/fsck.h
==============================================================================
--- projects/suj/6/sbin/fsck_ffs/fsck.h	Mon Jan 25 23:27:21 2010	(r202990)
+++ projects/suj/6/sbin/fsck_ffs/fsck.h	Mon Jan 25 23:30:53 2010	(r202991)
@@ -385,4 +385,4 @@ void		rwerror(const char *mesg, ufs2_dad
 void		sblock_init(void);
 void		setinodebuf(ino_t);
 int		setup(char *dev);
-void		suj_check(const char *filesys);
+int		suj_check(const char *filesys);

Modified: projects/suj/6/sbin/fsck_ffs/main.c
==============================================================================
--- projects/suj/6/sbin/fsck_ffs/main.c	Mon Jan 25 23:27:21 2010	(r202990)
+++ projects/suj/6/sbin/fsck_ffs/main.c	Mon Jan 25 23:30:53 2010	(r202991)
@@ -229,8 +229,9 @@ checkfilesys(char *filesys)
 		if ((fsreadfd = open(filesys, O_RDONLY)) < 0 || readsb(0) == 0)
 			exit(3);	/* Cannot read superblock */
 		close(fsreadfd);
-		if (sblock.fs_flags & FS_NEEDSFSCK)
-			exit(4);	/* Earlier background failed */
+		/* Earlier background failed or journaled */
+		if (sblock.fs_flags & (FS_NEEDSFSCK | FS_SUJ))
+			exit(4);
 		if ((sblock.fs_flags & FS_DOSOFTDEP) == 0)
 			exit(5);	/* Not running soft updates */
 		size = MIBSIZE;
@@ -360,6 +361,23 @@ checkfilesys(char *filesys)
 		    sblock.fs_cstotal.cs_nffree * 100.0 / sblock.fs_dsize);
 		return (0);
 	}
+	/*
+	 * Determine if we can and should do journal recovery.
+	 */
+	if ((sblock.fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == FS_SUJ) {
+		if (preen || reply("USE JOURNAL?")) {
+			if (suj_check(filesys) == 0)
+				goto out;
+			/* suj_check failed, fall through. */
+		}
+		printf("** Skipping journal, falling through to full fsck\n");
+		/*
+		 * Write the superblock so we don't try to recover the
+		 * journal on another pass.
+		 */
+		sblock.fs_mtime = time(NULL);
+		sbdirty();
+	}
 	
 	/*
 	 * Cleared if any questions answered no. Used to decide if
@@ -454,7 +472,6 @@ checkfilesys(char *filesys)
 	inocleanup();
 	if (fsmodified) {
 		sblock.fs_time = time(NULL);
-		sblock.fs_mtime = time(NULL);
 		sbdirty();
 	}
 	if (cvtlevel && sblk.b_dirty) {
@@ -485,6 +502,7 @@ checkfilesys(char *filesys)
 		printf("\n***** FILE SYSTEM WAS MODIFIED *****\n");
 	if (rerun)
 		printf("\n***** PLEASE RERUN FSCK *****\n");
+out:
 	if (mntp != NULL) {
 		/*
 		 * We modified a mounted file system.  Do a mount update on

Modified: projects/suj/6/sbin/fsck_ffs/suj.c
==============================================================================
--- projects/suj/6/sbin/fsck_ffs/suj.c	Mon Jan 25 23:27:21 2010	(r202990)
+++ projects/suj/6/sbin/fsck_ffs/suj.c	Mon Jan 25 23:30:53 2010	(r202991)
@@ -49,7 +49,8 @@ __FBSDID("$FreeBSD$");
 
 static void	ino_decr(ino_t);
 
-#define	SUJ_HASHSIZE	128
+#define	DOTDOT_OFFSET	DIRECTSIZ(1)
+#define	SUJ_HASHSIZE	2048
 #define	SUJ_HASHMASK	(SUJ_HASHSIZE - 1)
 #define	SUJ_HASH(x)	((x * 2654435761) & SUJ_HASHMASK)
 
@@ -68,7 +69,9 @@ TAILQ_HEAD(srechd, suj_rec);
 struct suj_ino {
 	LIST_ENTRY(suj_ino)	si_next;
 	struct srechd		si_recs;
+	struct srechd		si_newrecs;
 	struct srechd		si_movs;
+	struct jtrncrec		*si_trunc;
 	ino_t			si_ino;
 	int			si_nlinkadj;
 	int			si_skipparent;
@@ -90,6 +93,7 @@ struct data_blk {
 	uint8_t			*db_buf;
 	ufs2_daddr_t		db_blk;
 	int			db_size;
+	int			db_dirty;
 };
 
 struct ino_blk {
@@ -106,6 +110,8 @@ struct suj_cg {
 	struct inohd		sc_inohash[SUJ_HASHSIZE];
 	struct iblkhd		sc_iblkhash[SUJ_HASHSIZE];
 	struct ino_blk		*sc_lastiblk;
+	struct suj_ino		*sc_lastino;
+	struct suj_blk		*sc_lastblk;
 	uint8_t			*sc_cgbuf;
 	struct cg		*sc_cgp;
 	int			sc_dirty;
@@ -114,6 +120,8 @@ struct suj_cg {
 
 LIST_HEAD(cghd, suj_cg) cghash[SUJ_HASHSIZE];
 LIST_HEAD(dblkhd, data_blk) dbhash[SUJ_HASHSIZE];
+struct suj_cg *lastcg;
+struct data_blk *lastblk;
 
 TAILQ_HEAD(seghd, suj_seg) allsegs;
 uint64_t oldseq;
@@ -131,6 +139,8 @@ uint64_t jbytes;
 uint64_t jrecs;
 
 typedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int);
+static void ino_trunc(ino_t ino, off_t size);
+static void ino_build(struct suj_ino *sino);
 
 static void *
 errmalloc(size_t n)
@@ -159,12 +169,6 @@ opendisk(const char *devnam)
 		    disk->d_error);
 	}
 	fs = &disk->d_fs;
-	/*
-	 * Setup a few things so reply() can work.
-	 */
-	bcopy(fs, &sblock, sizeof(sblock));
-	fsreadfd = disk->d_fd;
-	fswritefd = disk->d_fd;
 }
 
 /*
@@ -198,8 +202,6 @@ closedisk(const char *devnam)
 	free(disk);
 	disk = NULL;
 	fs = NULL;
-	fsreadfd = -1;
-	fswritefd = -1;
 }
 
 /*
@@ -216,10 +218,14 @@ cg_lookup(int cgx)
 		abort();
 		errx(1, "Bad cg number %d", cgx);
 	}
+	if (lastcg && lastcg->sc_cgx == cgx)
+		return (lastcg);
 	hd = &cghash[SUJ_HASH(cgx)];
 	LIST_FOREACH(sc, hd, sc_next)
-		if (sc->sc_cgx == cgx)
+		if (sc->sc_cgx == cgx) {
+			lastcg = sc;
 			return (sc);
+		}
 	sc = errmalloc(sizeof(*sc));
 	bzero(sc, sizeof(*sc));
 	sc->sc_cgbuf = errmalloc(fs->fs_bsize);
@@ -245,6 +251,8 @@ ino_lookup(ino_t ino, int creat)
 	struct suj_cg *sc;
 
 	sc = cg_lookup(ino_to_cg(fs, ino));
+	if (sc->sc_lastino && sc->sc_lastino->si_ino == ino)
+		return (sc->sc_lastino);
 	hd = &sc->sc_inohash[SUJ_HASH(ino)];
 	LIST_FOREACH(sino, hd, si_next)
 		if (sino->si_ino == ino)
@@ -256,6 +264,7 @@ ino_lookup(ino_t ino, int creat)
 	sino->si_ino = ino;
 	sino->si_nlinkadj = 0;
 	TAILQ_INIT(&sino->si_recs);
+	TAILQ_INIT(&sino->si_newrecs);
 	TAILQ_INIT(&sino->si_movs);
 	LIST_INSERT_HEAD(hd, sino, si_next);
 
@@ -274,7 +283,9 @@ blk_lookup(ufs2_daddr_t blk, int creat)
 	struct blkhd *hd;
 
 	sc = cg_lookup(dtog(fs, blk));
-	hd = &sc->sc_blkhash[SUJ_HASH(blk)];
+	if (sc->sc_lastblk && sc->sc_lastblk->sb_blk == blk)
+		return (sc->sc_lastblk);
+	hd = &sc->sc_blkhash[SUJ_HASH(fragstoblks(fs, blk))];
 	LIST_FOREACH(sblk, hd, sb_next)
 		if (sblk->sb_blk == blk)
 			return (sblk);
@@ -289,16 +300,18 @@ blk_lookup(ufs2_daddr_t blk, int creat)
 	return (sblk);
 }
 
-static uint8_t *
-dblk_read(ufs2_daddr_t blk, int size)
+static struct data_blk *
+dblk_lookup(ufs2_daddr_t blk)
 {
 	struct data_blk *dblk;
 	struct dblkhd *hd;
 
-	hd = &dbhash[SUJ_HASH(blk)];
+	hd = &dbhash[SUJ_HASH(fragstoblks(fs, blk))];
+	if (lastblk && lastblk->db_blk == blk)
+		return (lastblk);
 	LIST_FOREACH(dblk, hd, db_next)
 		if (dblk->db_blk == blk)
-			goto found;
+			return (dblk);
 	/*
 	 * The inode block wasn't located, allocate a new one.
 	 */
@@ -306,7 +319,15 @@ dblk_read(ufs2_daddr_t blk, int size)
 	bzero(dblk, sizeof(*dblk));
 	LIST_INSERT_HEAD(hd, dblk, db_next);
 	dblk->db_blk = blk;
-found:
+	return (dblk);
+}
+
+static uint8_t *
+dblk_read(ufs2_daddr_t blk, int size)
+{
+	struct data_blk *dblk;
+
+	dblk = dblk_lookup(blk);
 	/*
 	 * I doubt size mismatches can happen in practice but it is trivial
 	 * to handle.
@@ -322,6 +343,33 @@ found:
 	return (dblk->db_buf);
 }
 
+static void
+dblk_dirty(ufs2_daddr_t blk)
+{
+	struct data_blk *dblk;
+
+	dblk = dblk_lookup(blk);
+	dblk->db_dirty = 1;
+}
+
+static void
+dblk_write(void)
+{
+	struct data_blk *dblk;
+	int i;
+
+	for (i = 0; i < SUJ_HASHSIZE; i++) {
+		LIST_FOREACH(dblk, &dbhash[i], db_next) {
+			if (dblk->db_dirty == 0 || dblk->db_size == 0)
+				continue;
+			if (bwrite(disk, fsbtodb(fs, dblk->db_blk),
+			    dblk->db_buf, dblk->db_size) == -1)
+				err(1, "Unable to write block %jd",
+				    dblk->db_blk);
+		}
+	}
+}
+
 static union dinode *
 ino_read(ino_t ino)
 {
@@ -333,7 +381,10 @@ ino_read(ino_t ino)
 
 	blk = ino_to_fsba(fs, ino);
 	sc = cg_lookup(ino_to_cg(fs, ino));
-	hd = &sc->sc_iblkhash[SUJ_HASH(blk)];
+	iblk = sc->sc_lastiblk;
+	if (iblk && iblk->ib_blk == blk)
+		goto found;
+	hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))];
 	LIST_FOREACH(iblk, hd, ib_next)
 		if (iblk->ib_blk == blk)
 			goto found;
@@ -371,7 +422,7 @@ ino_dirty(ino_t ino)
 		iblk->ib_dirty = 1;
 		return;
 	}
-	hd = &sc->sc_iblkhash[SUJ_HASH(blk)];
+	hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))];
 	LIST_FOREACH(iblk, hd, ib_next) {
 		if (iblk->ib_blk == blk) {
 			iblk->ib_dirty = 1;
@@ -612,22 +663,22 @@ blk_free(ufs2_daddr_t bno, int mask, int
  * to fetch a specific block.
  */
 static ufs2_daddr_t
-indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn, int level)
+indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn)
 {
 	ufs2_daddr_t *bap2;
 	ufs2_daddr_t *bap1;
 	ufs_lbn_t lbnadd;
 	ufs_lbn_t base;
+	int level;
 	int i;
 
 	if (blk == 0)
 		return (0);
-	if (cur == lbn)
-		return (blk);
-	if (level == 0 && lbn < 0) {
-		abort();
+	level = lbn_level(cur);
+	if (level == -1)
+		errx(1, "Invalid indir lbn %jd", lbn);
+	if (level == 0 && lbn < 0)
 		errx(1, "Invalid lbn %jd", lbn);
-	}
 	bap2 = (void *)dblk_read(blk, fs->fs_bsize);
 	bap1 = (void *)bap2;
 	lbnadd = 1;
@@ -638,11 +689,9 @@ indir_blkatoff(ufs2_daddr_t blk, ino_t i
 		i = (lbn - base) / lbnadd;
 	else
 		i = (-lbn - base) / lbnadd;
-	if (i < 0 || i >= NINDIR(fs)) {
-		abort();
+	if (i < 0 || i >= NINDIR(fs))
 		errx(1, "Invalid indirect index %d produced by lbn %jd",
 		    i, lbn);
-	}
 	if (level == 0)
 		cur = base + (i * lbnadd);
 	else
@@ -657,7 +706,7 @@ indir_blkatoff(ufs2_daddr_t blk, ino_t i
 		abort();
 		errx(1, "Invalid lbn %jd at level 0", lbn);
 	}
-	return indir_blkatoff(blk, ino, cur, lbn, level - 1);
+	return indir_blkatoff(blk, ino, cur, lbn);
 }
 
 /*
@@ -685,14 +734,10 @@ ino_blkatoff(union dinode *ip, ino_t ino
 		return (ip->dp2.di_extb[lbn]);
 	}
 	/*
-	 * And now direct and indirect.  Verify that the lbn does not
-	 * exceed the size required to store the file by asking for
-	 * the lbn of the last byte.  These blocks should be 0 anyway
-	 * so this simply saves the traversal.
+	 * Now direct and indirect.
 	 */
-	if (lbn > 0 && lbn > lblkno(fs, DIP(ip, di_size) - 1))
-		return (0);
-	if (lbn < 0 && -lbn > lblkno(fs, DIP(ip, di_size) - 1))
+	if (DIP(ip, di_mode) == IFLNK &&
+	    DIP(ip, di_size) < fs->fs_maxsymlinklen)
 		return (0);
 	if (lbn >= 0 && lbn < NDADDR) {
 		*frags = numfrags(fs, sblksize(fs, DIP(ip, di_size), lbn));
@@ -703,7 +748,7 @@ ino_blkatoff(union dinode *ip, ino_t ino
 	for (i = 0, tmpval = NINDIR(fs), cur = NDADDR; i < NIADDR; i++,
 	    tmpval *= NINDIR(fs), cur = next) {
 		next = cur + tmpval;
-		if (lbn == -cur)
+		if (lbn == -cur - i)
 			return (DIP(ip, di_ib[i]));
 		/*
 		 * Determine whether the lbn in question is within this tree.
@@ -712,8 +757,7 @@ ino_blkatoff(union dinode *ip, ino_t ino
 			continue;
 		if (lbn > 0 && lbn >= next)
 			continue;
-
-		return indir_blkatoff(DIP(ip, di_ib[i]), ino, -cur - i, lbn, i);
+		return indir_blkatoff(DIP(ip, di_ib[i]), ino, -cur - i, lbn);
 	}
 	errx(1, "lbn %jd not in ino", lbn);
 }
@@ -760,7 +804,10 @@ ino_isat(ino_t parent, off_t diroff, ino
 	*mode = DIP(dip, di_mode);
 	if ((*mode & IFMT) != IFDIR) {
 		if (debug) {
-			/* This can happen if the parent inode was reallocated. */
+			/*
+			 * This can happen if the parent inode
+			 * was reallocated.
+			 */
 			if (*mode != 0)
 				printf("Directory %d has bad mode %o\n",
 				    parent, *mode);
@@ -791,7 +838,7 @@ ino_isat(ino_t parent, off_t diroff, ino
 	 * certain we hit a valid record and not some junk in the middle
 	 * of a file name.  Stop when we reach or pass the expected offset.
 	 */
-	dpoff = 0;
+	dpoff = (doff / DIRBLKSIZ) * DIRBLKSIZ;
 	do {
 		dp = (struct direct *)&block[dpoff];
 		if (dpoff == doff)
@@ -801,7 +848,7 @@ ino_isat(ino_t parent, off_t diroff, ino
 		dpoff += dp->d_reclen;
 	} while (dpoff <= doff);
 	if (dpoff > fs->fs_bsize)
-		errx(1, "Corrupt directory block in dir inode %d", parent);
+		errx(1, "Corrupt directory block in dir ino %d", parent);
 	/* Not found. */
 	if (dpoff != doff) {
 		if (debug)
@@ -830,6 +877,7 @@ ino_isat(ino_t parent, off_t diroff, ino
 
 #define	VISIT_INDIR	0x0001
 #define	VISIT_EXT	0x0002
+#define	VISIT_ROOT	0x0004	/* Operation came via root & valid pointers. */
 
 /*
  * Read an indirect level which may or may not be linked into an inode.
@@ -854,16 +902,14 @@ indir_visit(ino_t ino, ufs_lbn_t lbn, uf
 	 */
 	if (blk == 0)
 		return;
-	if (blk_isindir(blk, ino, lbn) == 0) {
-		if (debug)
-			printf("blk %jd ino %d lbn %jd is not indir.\n",
-			    blk, ino, lbn);
-		goto out;
-	}
 	level = lbn_level(lbn);
-	if (level == -1) {
-		abort();
+	if (level == -1)
 		errx(1, "Invalid level for lbn %jd", lbn);
+	if ((flags & VISIT_ROOT) == 0 && blk_isindir(blk, ino, lbn) == 0) {
+		if (debug)
+			printf("blk %jd ino %d lbn %jd(%d) is not indir.\n",
+			    blk, ino, lbn, level);
+		goto out;
 	}
 	lbnadd = 1;
 	for (i = level; i > 0; i--)
@@ -903,6 +949,7 @@ out:
 static uint64_t
 ino_visit(union dinode *ip, ino_t ino, ino_visitor visitor, int flags)
 {
+	ufs_lbn_t nextlbn;
 	ufs_lbn_t tmpval;
 	ufs_lbn_t lbn;
 	uint64_t size;
@@ -937,8 +984,15 @@ ino_visit(union dinode *ip, ino_t ino, i
 		fragcnt += frags;
 		visitor(ino, i, DIP(ip, di_db[i]), frags);
 	}
+	/*
+	 * We know the following indirects are real as we're following
+	 * real pointers to them.
+	 */
+	flags |= VISIT_ROOT;
 	for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++,
-	    tmpval *= NINDIR(fs), lbn += tmpval) {
+	    lbn = nextlbn) {
+		nextlbn = lbn + tmpval;
+		tmpval *= NINDIR(fs);
 		if (DIP(ip, di_ib[i]) == 0)
 			continue;
 		indir_visit(ino, -lbn - i, DIP(ip, di_ib[i]), &fragcnt, visitor,
@@ -948,11 +1002,15 @@ ino_visit(union dinode *ip, ino_t ino, i
 }
 
 /*
- * Null visitor function used when we just want to count blocks.
+ * Null visitor function used when we just want to count blocks and
+ * record the lbn.
  */
+ufs_lbn_t visitlbn;
 static void
 null_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
 {
+	if (lbn > 0)
+		visitlbn = lbn;
 }
 
 /*
@@ -962,23 +1020,45 @@ null_visit(ino_t ino, ufs_lbn_t lbn, ufs
  * reachable at the time the inode was written.
  */
 static void
-ino_adjblks(ino_t ino)
+ino_adjblks(struct suj_ino *sino)
 {
-	struct suj_ino *sino;
 	union dinode *ip;
 	uint64_t blocks;
 	uint64_t frags;
+	off_t isize;
+	off_t size;
+	ino_t ino;
 
-	sino = ino_lookup(ino, 1);
-	if (sino->si_blkadj)
-		return;
-	sino->si_blkadj = 1;
+	ino = sino->si_ino;
 	ip = ino_read(ino);
 	/* No need to adjust zero'd inodes. */
 	if (DIP(ip, di_mode) == 0)
 		return;
+	/*
+	 * Visit all blocks and count them as well as recording the last
+	 * valid lbn in the file.  If the file size doesn't agree with the
+	 * last lbn we need to truncate to fix it.  Otherwise just adjust
+	 * the blocks count.
+	 */
+	visitlbn = 0;
 	frags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT);
 	blocks = fsbtodb(fs, frags);
+	/*
+	 * We assume the size and direct block list is kept coherent by
+	 * softdep.  For files that have extended into indirects we truncate
+	 * to the size in the inode or the maximum size permitted by
+	 * populated indirects.
+	 */
+	if (visitlbn >= NDADDR) {
+		isize = DIP(ip, di_size);
+		size = lblktosize(fs, visitlbn + 1);
+		printf("ino %d isize %jd size %jd\n", ino, isize, size);
+		if (isize > size)
+			isize = size;
+		/* Always truncate to free any unpopulated indirects. */
+		ino_trunc(sino->si_ino, isize);
+		return;
+	}
 	if (blocks == DIP(ip, di_blocks))
 		return;
 	if (debug)
@@ -1021,6 +1101,16 @@ blk_free_lbn(ufs2_daddr_t blk, ino_t ino
 }
 
 static void
+ino_setskip(struct suj_ino *sino, ino_t parent)
+{
+	int isdot;
+	int mode;
+
+	if (ino_isat(sino->si_ino, DOTDOT_OFFSET, parent, &mode, &isdot))
+		sino->si_skipparent = 1;
+}
+
+static void
 ino_free_children(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
 {
 	struct suj_ino *sino;
@@ -1053,7 +1143,7 @@ ino_free_children(ino_t ino, ufs_lbn_t l
 		if (isparent && skipparent == 1)
 			continue;
 		if (debug)
-			printf("Directory %d removing inode %d name %s\n",
+			printf("Directory %d removing ino %d name %s\n",
 			    ino, dp->d_ino, dp->d_name);
 		/*
 		 * Lookup this inode to see if we have a record for it.
@@ -1070,7 +1160,7 @@ ino_free_children(ino_t ino, ufs_lbn_t l
 		 * parent.  Don't try to adjust our link down again.
 		 */
 		if (isparent == 0)
-			sino->si_skipparent = 1;
+			ino_setskip(sino, ino);
 		/*
 		 * If we haven't yet processed this inode we need to make
 		 * sure we will successfully discover the lost path.  If not
@@ -1084,16 +1174,16 @@ ino_free_children(ino_t ino, ufs_lbn_t l
 				break;
 		}
 		if (srec == NULL)
-			sino->si_nlinkadj--;
+			sino->si_nlinkadj++;
 	}
 }
 
 /*
- * Truncate an inode, freeing all blocks and decrementing all children's
+ * Reclaim an inode, freeing all blocks and decrementing all children's
  * link counts.  Free the inode back to the cg.
  */
 static void
-ino_truncate(union dinode *ip, ino_t ino, int mode)
+ino_reclaim(union dinode *ip, ino_t ino, int mode)
 {
 	uint32_t gen;
 
@@ -1147,7 +1237,7 @@ ino_decr(ino_t ino)
 		if (debug)
 			printf("ino %d not enough links to live %d < %d\n",
 			    ino, nlink, reqlink);
-		ino_truncate(ip, ino, mode);
+		ino_reclaim(ip, ino, mode);
 		return;
 	}
 	DIP_SET(ip, di_nlink, nlink);
@@ -1192,7 +1282,7 @@ ino_adjust(ino_t ino, int lastmode, nlin
 		if (debug)
 			printf("ino %d not enough links to live %d < %d\n",
 			    ino, nlink, reqlink);
-		ino_truncate(ip, ino, mode);
+		ino_reclaim(ip, ino, mode);
 		return;
 	}
 	/* If required write the updated link count. */
@@ -1205,13 +1295,194 @@ ino_adjust(ino_t ino, int lastmode, nlin
 	ino_dirty(ino);
 }
 
-#define	DOTDOT_OFFSET	DIRECTSIZ(1)
+/*
+ * Truncate some or all blocks in an indirect, freeing any that are required
+ * and zeroing the indirect.
+ */
+static void
+indir_trunc(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, ufs_lbn_t lastlbn)
+{
+	ufs2_daddr_t *bap2;
+	ufs1_daddr_t *bap1;
+	ufs_lbn_t lbnadd;
+	ufs2_daddr_t nblk;
+	ufs_lbn_t next;
+	ufs_lbn_t nlbn;
+	int dirty;
+	int level;
+	int i;
+
+	if (blk == 0)
+		return;
+	dirty = 0;
+	level = lbn_level(lbn);
+	if (level == -1)
+		errx(1, "Invalid level for lbn %jd", lbn);
+	lbnadd = 1;
+	for (i = level; i > 0; i--)
+		lbnadd *= NINDIR(fs);
+	bap1 = (void *)dblk_read(blk, fs->fs_bsize);
+	bap2 = (void *)bap1;
+	for (i = 0; i < NINDIR(fs); i++) {
+		if (fs->fs_magic == FS_UFS1_MAGIC)
+			nblk = *bap1++;
+		else
+			nblk = *bap2++;
+		if (nblk == 0)
+			continue;
+		if (level != 0) {
+			nlbn = (lbn + 1) - (i * lbnadd);
+			/*
+			 * Calculate the lbn of the next indirect to
+			 * determine if any of this indirect must be
+			 * reclaimed.
+			 */
+			next = -(lbn + level) + ((i+1) * lbnadd);
+			if (next <= lastlbn)
+				continue;
+			indir_trunc(ino, nlbn, nblk, lastlbn);
+			/* If all of this indirect was reclaimed, free it. */
+			nlbn = next - lbnadd;
+			if (nlbn < lastlbn)
+				continue;
+		} else {
+			nlbn = -lbn + i * lbnadd;
+			if (nlbn < lastlbn)
+				continue;
+		}
+		dirty = 1;
+		blk_free(nblk, 0, fs->fs_frag);
+		if (fs->fs_magic == FS_UFS1_MAGIC)
+			*(bap1 - 1) = 0;
+		else
+			*(bap2 - 1) = 0;
+	}
+	if (dirty)
+		dblk_dirty(blk);
+}
+
+/*
+ * Truncate an inode to the minimum of the given size or the last populated
+ * block after any over size have been discarded.  The kernel would allocate
+ * the last block in the file but fsck does not and neither do we.  This
+ * code never extends files, only shrinks them.
+ */
+static void
+ino_trunc(ino_t ino, off_t size)
+{
+	union dinode *ip;
+	ufs2_daddr_t bn;
+	uint64_t totalfrags;
+	ufs_lbn_t nextlbn;
+	ufs_lbn_t lastlbn;
+	ufs_lbn_t tmpval;
+	ufs_lbn_t lbn;
+	ufs_lbn_t i;
+	int frags;
+	off_t cursize;
+	off_t off;
+	int mode;
+
+	ip = ino_read(ino);
+	mode = DIP(ip, di_mode) & IFMT;
+	cursize = DIP(ip, di_size);
+	if (debug)
+		printf("Truncating ino %d, mode %o to size %jd from size %jd\n",
+		    ino, mode, size, cursize);
+
+	/* Skip datablocks for short links and devices. */
+	if (mode == 0 || mode == IFBLK || mode == IFCHR ||
+	    (mode == IFLNK && cursize < fs->fs_maxsymlinklen))
+		return;
+	/* Don't extend. */
+	if (size > cursize)
+		size = cursize;
+	lastlbn = lblkno(fs, blkroundup(fs, size));
+	for (i = lastlbn; i < NDADDR; i++) {
+		if (DIP(ip, di_db[i]) == 0)
+			continue;
+		frags = sblksize(fs, cursize, i);
+		frags = numfrags(fs, frags);
+		blk_free(DIP(ip, di_db[i]), 0, frags);
+		DIP_SET(ip, di_db[i], 0);
+	}
+	/*
+	 * Follow indirect blocks, freeing anything required.
+	 */
+	for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++,
+	    lbn = nextlbn) {
+		nextlbn = lbn + tmpval;
+		tmpval *= NINDIR(fs);
+		/* If we're not freeing any in this indirect range skip it. */
+		if (lastlbn >= nextlbn)
+			continue;
+		if (DIP(ip, di_ib[i]) == 0)
+			continue;
+		indir_trunc(ino, -lbn - i, DIP(ip, di_ib[i]), lastlbn);
+		/* If we freed everything in this indirect free the indir. */
+		if (lastlbn > lbn)
+			continue;
+		blk_free(DIP(ip, di_ib[i]), 0, frags);
+		DIP_SET(ip, di_ib[i], 0);
+	}
+	ino_dirty(ino);
+	/*
+	 * Now that we've freed any whole blocks that exceed the desired
+	 * truncation size, figure out how many blocks remain and what the
+	 * last populated lbn is.  We will set the size to this last lbn
+	 * rather than worrying about allocating the final lbn as the kernel
+	 * would've done.  This is consistent with normal fsck behavior.
+	 */ 
+	visitlbn = 0;
+	totalfrags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT);
+	if (size > lblktosize(fs, visitlbn + 1))
+		size = lblktosize(fs, visitlbn + 1);
+	/*
+	 * If we're truncating direct blocks we have to adjust frags
+	 * accordingly.
+	 */
+	if (visitlbn < NDADDR) {
+		long oldspace, newspace;
+
+		bn = DIP(ip, di_db[visitlbn]);
+		oldspace = sblksize(fs, cursize, visitlbn);
+		newspace = sblksize(fs, size, visitlbn);
+		if (oldspace != newspace) {
+			bn += numfrags(fs, newspace);
+			frags = numfrags(fs, oldspace - newspace);
+			blk_free(bn, 0, frags);
+			totalfrags -= frags;
+		}
+	}
+	DIP_SET(ip, di_blocks, fsbtodb(fs, totalfrags));
+	DIP_SET(ip, di_size, size);
+	/*
+	 * If we've truncated into the middle of a block or frag we have
+	 * to zero it here.  Otherwise the file could extend into
+	 * uninitialized space later.
+	 */
+	off = blkoff(fs, size);
+	if (off) {
+		uint8_t *buf;
+		long clrsize;
+
+		bn = ino_blkatoff(ip, ino, visitlbn, &frags);
+		if (bn == 0)
+			errx(1, "Block missing from ino %d at lbn %jd\n",
+			    ino, visitlbn);
+		clrsize = frags * fs->fs_fsize;
+		buf = dblk_read(bn, clrsize);
+		clrsize -= off;
+		buf += off;
+		bzero(buf, clrsize);
+		dblk_dirty(bn);
+	}
+	return;
+}
 
 /*
  * Process records available for one inode and determine whether the
  * link count is correct or needs adjusting.
- *
- * XXX Failed to fix zero length directory.  Shouldn't .. have been mising?
  */
 static void
 ino_check(struct suj_ino *sino)
@@ -1228,6 +1499,15 @@ ino_check(struct suj_ino *sino)
 	int isat;
 	int mode;
 
+	/*
+	 * Handle truncations that were not complete.  We don't have
+	 * to worry about truncating directory entries as they must have
+	 * been removed for truncate to succeed.
+	 */
+	if (sino->si_trunc) {
+		ino_trunc(ino, sino->si_trunc->jt_size);
+		sino->si_trunc = NULL;
+	}
 	if (sino->si_hasrecs == 0)
 		return;
 	ino = sino->si_ino;
@@ -1239,9 +1519,9 @@ ino_check(struct suj_ino *sino)
 		return;
 	rrec = (struct jrefrec *)TAILQ_FIRST(&sino->si_recs)->sr_rec;
 	nlink = rrec->jr_nlink;
-	newlinks = sino->si_nlinkadj;
+	newlinks = 0;
 	dotlinks = 0;
-	removes = 0;
+	removes = sino->si_nlinkadj;
 	TAILQ_FOREACH(srec, &sino->si_recs, sr_next) {
 		rrec = (struct jrefrec *)srec->sr_rec;
 		isat = ino_isat(rrec->jr_parent, rrec->jr_diroff, 
@@ -1286,7 +1566,7 @@ ino_check(struct suj_ino *sino)
 			if (rrec->jr_diroff == DOTDOT_OFFSET) {
 				stmp = ino_lookup(rrec->jr_parent, 0);
 				if (stmp)
-					stmp->si_skipparent = 1;
+					ino_setskip(stmp, ino);
 			}
 		}
 	}
@@ -1304,6 +1584,7 @@ blk_check(struct suj_blk *sblk)
 {
 	struct suj_rec *srec;
 	struct jblkrec *brec;
+	struct suj_ino *sino;
 	ufs2_daddr_t blk;
 	int mask;
 	int frags;
@@ -1318,6 +1599,10 @@ blk_check(struct suj_blk *sblk)
 		frags = brec->jb_frags;
 		blk = brec->jb_blkno + brec->jb_oldfrags;
 		isat = blk_isat(brec->jb_ino, brec->jb_lbn, blk, &frags);
+		if (sino == NULL || sino->si_ino != brec->jb_ino) {
+			sino = ino_lookup(brec->jb_ino, 1);
+			sino->si_blkadj = 1;
+		}
 		if (debug)
 			printf("op %d blk %jd ino %d lbn %jd frags %d isat %d (%d)\n",
 			    brec->jb_op, blk, brec->jb_ino, brec->jb_lbn,
@@ -1336,7 +1621,6 @@ blk_check(struct suj_blk *sblk)
 			blk += frags;
 			frags = brec->jb_frags - frags;
 			blk_free(blk, mask, frags);
-			ino_adjblks(brec->jb_ino);
 			continue;
 		}
 		/*
@@ -1349,19 +1633,31 @@ blk_check(struct suj_blk *sblk)
 		 */
 		blk_free_lbn(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags,
 		    brec->jb_op == JOP_FREEBLK);
-		ino_adjblks(brec->jb_ino);
 	}
 }
 
 /*
+ * Walk the list of inode records for this cg and resolve moved and duplicate
+ * inode references now that we have a complete picture.
+ */
+static void
+cg_build(struct suj_cg *sc)
+{
+	struct suj_ino *sino;
+	int i;
+
+	for (i = 0; i < SUJ_HASHSIZE; i++)
+		LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
+			ino_build(sino);
+}
+
+/*
  * Walk the list of inode and block records for this cg, recovering any
  * changes which were not complete at the time of crash.
  */
 static void
 cg_check(struct suj_cg *sc)
 {
-	struct suj_blk *nextb;
-	struct suj_ino *nexti;
 	struct suj_ino *sino;
 	struct suj_blk *sblk;
 	int i;
@@ -1370,32 +1666,43 @@ cg_check(struct suj_cg *sc)
 		printf("Recovering cg %d\n", sc->sc_cgx);
 
 	for (i = 0; i < SUJ_HASHSIZE; i++)
-		LIST_FOREACH_SAFE(sino, &sc->sc_inohash[i], si_next, nexti)
+		LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
 			ino_check(sino);
 
 	for (i = 0; i < SUJ_HASHSIZE; i++)
-		LIST_FOREACH_SAFE(sblk, &sc->sc_blkhash[i], sb_next, nextb)
+		LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next)
 			blk_check(sblk);
 }
 
 /*
- * Write a potentially dirty cg.  All inodes must be written before the
- * cg maps are so that an allocated inode is never marked free, even if
- * we crash during fsck.
+ * Now that we've freed blocks which are not referenced we make a second
+ * pass over all inodes to adjust their block counts.
+ */
+static void
+cg_check2(struct suj_cg *sc)
+{
+	struct suj_ino *sino;
+	int i;
+
+	for (i = 0; i < SUJ_HASHSIZE; i++)
+		LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
+			if (sino->si_blkadj)
+				ino_adjblks(sino);
+}
+
+/*
+ * Write a potentially dirty cg.  Recalculate the summary information and
+ * update the superblock summary.
  */
 static void
 cg_write(struct suj_cg *sc)
 {
-	struct ino_blk *iblk;
 	ufs1_daddr_t fragno, cgbno, maxbno;
 	u_int8_t *blksfree;
 	struct cg *cgp;
 	int blk;
 	int i;
 
-	for (i = 0; i < SUJ_HASHSIZE; i++)
-		LIST_FOREACH(iblk, &sc->sc_iblkhash[i], ib_next)
-			iblk_write(iblk);
 	if (sc->sc_dirty == 0)
 		return;
 	/*
@@ -1437,6 +1744,21 @@ cg_write(struct suj_cg *sc)
 		err(1, "Unable to write cylinder group %d", sc->sc_cgx);
 }
 
+/*
+ * Write out any modified inodes.
+ */
+static void
+cg_write_inos(struct suj_cg *sc)
+{
+	struct ino_blk *iblk;
+	int i;
+
+	for (i = 0; i < SUJ_HASHSIZE; i++)
+		LIST_FOREACH(iblk, &sc->sc_iblkhash[i], ib_next)
+			if (iblk->ib_dirty)
+				iblk_write(iblk);
+}
+
 static void
 cg_apply(void (*apply)(struct suj_cg *))
 {
@@ -1473,7 +1795,7 @@ ino_unlinked(void)
 			if (debug)
 				printf("Freeing unlinked ino %d mode %o\n",
 				    ino, mode);
-			ino_truncate(ip, ino, mode);
+			ino_reclaim(ip, ino, mode);
 		} else if (debug)
 			printf("Skipping ino %d mode %o with link %d\n",
 			    ino, mode, DIP(ip, di_nlink));
@@ -1482,6 +1804,29 @@ ino_unlinked(void)
 }
 
 /*
+ * Append a new record to the list of records requiring processing.
+ */
+static void
+ino_append(union jrec *rec)
+{
+	struct suj_ino *sino;
+	struct suj_rec *srec;
+
+	/*
+	 * Lookup the ino and clear truncate if one is found.  Partial
+	 * truncates are always done synchronously so if we discover
+	 * an operation that requires a lock the truncation has completed
+	 * and can be discarded.
+	 */
+	sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1);
+	sino->si_trunc = NULL;
+	sino->si_hasrecs = 1;
+	srec = errmalloc(sizeof(*srec));
+	srec->sr_rec = rec;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-projects mailing list