git: 5cc52631b3b8 - main - Rewrite the disk I/O management system in fsck_ffs(8). Other than making fsck_ffs(8) run faster, there should be no functional change.
Kirk McKusick
mckusick at FreeBSD.org
Thu Jan 7 22:58:45 UTC 2021
The branch main has been updated by mckusick:
URL: https://cgit.FreeBSD.org/src/commit/?id=5cc52631b3b88dfc36d8049dc8bece8573c5f9af
commit 5cc52631b3b88dfc36d8049dc8bece8573c5f9af
Author: Kirk McKusick <mckusick at FreeBSD.org>
AuthorDate: 2021-01-07 01:37:08 +0000
Commit: Kirk McKusick <mckusick at FreeBSD.org>
CommitDate: 2021-01-07 23:03:15 +0000
Rewrite the disk I/O management system in fsck_ffs(8). Other than
making fsck_ffs(8) run faster, there should be no functional change.
The original fsck_ffs(8) had its own disk I/O management system.
When gjournal(8) was added to FreeBSD 7, code was added to fsck_ffs(8)
to do the necessary gjournal rollback. Rather than use the existing
fsck_ffs(8) disk I/O system, it wrote its own from scratch. Similarly
when journalled soft updates were added in FreeBSD 9, code was added
to fsck_ffs(8) to do the necessary journal rollback. And once again,
rather than using either of the existing fsck_ffs(8) disk I/O
systems, it wrote its own from scratch. Lastly the fsdb(8) utility
uses the fsck_ffs(8) disk I/O management system. In preparation for
making the changes necessary to enable snapshots to be taken when
using journalled soft updates, it was necessary to have a single
disk I/O system used by all the various subsystems in fsck_ffs(8).
This commit merges the functionality required by all the different
subsystems into a single disk I/O system that supports all of their
needs. In so doing it picks up optimizations from each of them
with the results that each of the subsystems does fewer reads and
writes than it did with its own customized I/O system. It also
greatly simplifies making changes to fsck_ffs(8) since everything
goes through a single place. For example the ginode() function
fetches an inode from the disk. When inode check hashes were added,
they previously had to be checked in the code implementing inode
fetch in each of the three different disk I/O systems. Now they
need only be checked in ginode().
Tested by: Peter Holm
Sponsored by: Netflix
---
sbin/fsck_ffs/dir.c | 139 ++++---
sbin/fsck_ffs/ea.c | 1 +
sbin/fsck_ffs/fsck.h | 81 +++--
sbin/fsck_ffs/fsutil.c | 424 +++++++++++++++++-----
sbin/fsck_ffs/gjournal.c | 411 ++-------------------
sbin/fsck_ffs/globs.c | 9 +-
sbin/fsck_ffs/inode.c | 368 ++++++++++++++-----
sbin/fsck_ffs/main.c | 16 +-
sbin/fsck_ffs/pass1.c | 61 ++--
sbin/fsck_ffs/pass1b.c | 14 +-
sbin/fsck_ffs/pass2.c | 35 +-
sbin/fsck_ffs/pass3.c | 7 +-
sbin/fsck_ffs/pass4.c | 11 +-
sbin/fsck_ffs/setup.c | 18 +-
sbin/fsck_ffs/suj.c | 914 ++++++++++++++---------------------------------
sbin/fsdb/fsdb.c | 89 ++---
sbin/fsdb/fsdb.h | 1 +
17 files changed, 1215 insertions(+), 1384 deletions(-)
diff --git a/sbin/fsck_ffs/dir.c b/sbin/fsck_ffs/dir.c
index a86d65a9f183..e88d1650ce5a 100644
--- a/sbin/fsck_ffs/dir.c
+++ b/sbin/fsck_ffs/dir.c
@@ -62,7 +62,7 @@ static struct dirtemplate dirhead = {
static int chgino(struct inodesc *);
static int dircheck(struct inodesc *, struct bufarea *, struct direct *);
-static int expanddir(union dinode *dp, char *name);
+static int expanddir(struct inode *ip, char *name);
static void freedir(ino_t ino, ino_t parent);
static struct direct *fsck_readdir(struct inodesc *);
static struct bufarea *getdirblk(ufs2_daddr_t blkno, long size);
@@ -126,6 +126,8 @@ dirscan(struct inodesc *idesc)
idesc->id_dirp = (struct direct *)dbuf;
if ((n = (*idesc->id_func)(idesc)) & ALTERED) {
bp = getdirblk(idesc->id_blkno, blksiz);
+ if (bp->b_errs != 0)
+ return (STOP);
memmove(bp->b_un.b_buf + idesc->id_loc - dsize, dbuf,
(size_t)dsize);
dirty(bp);
@@ -155,6 +157,8 @@ fsck_readdir(struct inodesc *idesc)
if (idesc->id_filesize <= 0 || idesc->id_loc >= blksiz)
return (NULL);
bp = getdirblk(idesc->id_blkno, blksiz);
+ if (bp->b_errs != 0)
+ return (NULL);
dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc);
/*
* Only need to check current entry if it is the first in the
@@ -330,6 +334,7 @@ direrror(ino_t ino, const char *errmesg)
void
fileerror(ino_t cwd, ino_t ino, const char *errmesg)
{
+ struct inode ip;
union dinode *dp;
char pathbuf[MAXPATHLEN + 1];
@@ -338,8 +343,9 @@ fileerror(ino_t cwd, ino_t ino, const char *errmesg)
pfatal("out-of-range inode number %ju", (uintmax_t)ino);
return;
}
- dp = ginode(ino);
- prtinode(ino, dp);
+ ginode(ino, &ip);
+ dp = ip.i_dp;
+ prtinode(&ip);
printf("\n");
getpathname(pathbuf, cwd, ino);
if (ftypeok(dp))
@@ -348,15 +354,18 @@ fileerror(ino_t cwd, ino_t ino, const char *errmesg)
pathbuf);
else
pfatal("NAME=%s\n", pathbuf);
+ irelse(&ip);
}
void
adjust(struct inodesc *idesc, int lcnt)
{
+ struct inode ip;
union dinode *dp;
int saveresolved;
- dp = ginode(idesc->id_number);
+ ginode(idesc->id_number, &ip);
+ dp = ip.i_dp;
if (DIP(dp, di_nlink) == lcnt) {
/*
* If we have not hit any unresolved problems, are running
@@ -365,6 +374,7 @@ adjust(struct inodesc *idesc, int lcnt)
*/
if (resolved && (preen || bkgrdflag) && usedsoftdep) {
clri(idesc, "UNREF", 1);
+ irelse(&ip);
return;
} else {
/*
@@ -377,19 +387,19 @@ adjust(struct inodesc *idesc, int lcnt)
if (linkup(idesc->id_number, (ino_t)0, NULL) == 0) {
resolved = saveresolved;
clri(idesc, "UNREF", 0);
+ irelse(&ip);
return;
}
/*
* Account for the new reference created by linkup().
*/
- dp = ginode(idesc->id_number);
lcnt--;
}
}
if (lcnt != 0) {
pwarn("LINK COUNT %s", (lfdir == idesc->id_number) ? lfname :
((DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE"));
- prtinode(idesc->id_number, dp);
+ prtinode(&ip);
printf(" COUNT %d SHOULD BE %d",
DIP(dp, di_nlink), DIP(dp, di_nlink) - lcnt);
if (preen || usedsoftdep) {
@@ -403,7 +413,7 @@ adjust(struct inodesc *idesc, int lcnt)
if (preen || reply("ADJUST") == 1) {
if (bkgrdflag == 0) {
DIP_SET(dp, di_nlink, DIP(dp, di_nlink) - lcnt);
- inodirty(dp);
+ inodirty(&ip);
} else {
cmd.value = idesc->id_number;
cmd.size = -lcnt;
@@ -417,6 +427,7 @@ adjust(struct inodesc *idesc, int lcnt)
}
}
}
+ irelse(&ip);
}
static int
@@ -460,6 +471,7 @@ chgino(struct inodesc *idesc)
int
linkup(ino_t orphan, ino_t parentdir, char *name)
{
+ struct inode ip;
union dinode *dp;
int lostdir;
ino_t oldlfdir;
@@ -467,29 +479,32 @@ linkup(ino_t orphan, ino_t parentdir, char *name)
char tempname[BUFSIZ];
memset(&idesc, 0, sizeof(struct inodesc));
- dp = ginode(orphan);
+ ginode(orphan, &ip);
+ dp = ip.i_dp;
lostdir = (DIP(dp, di_mode) & IFMT) == IFDIR;
pwarn("UNREF %s ", lostdir ? "DIR" : "FILE");
- prtinode(orphan, dp);
+ prtinode(&ip);
printf("\n");
- if (preen && DIP(dp, di_size) == 0)
+ if (preen && DIP(dp, di_size) == 0) {
+ irelse(&ip);
return (0);
+ }
+ irelse(&ip);
if (cursnapshot != 0) {
pfatal("FILE LINKUP IN SNAPSHOT");
return (0);
}
if (preen)
printf(" (RECONNECTED)\n");
- else
- if (reply("RECONNECT") == 0)
- return (0);
+ else if (reply("RECONNECT") == 0)
+ return (0);
if (lfdir == 0) {
- dp = ginode(UFS_ROOTINO);
+ ginode(UFS_ROOTINO, &ip);
idesc.id_name = strdup(lfname);
idesc.id_type = DATA;
idesc.id_func = findino;
idesc.id_number = UFS_ROOTINO;
- if ((ckinode(dp, &idesc) & FOUND) != 0) {
+ if ((ckinode(ip.i_dp, &idesc) & FOUND) != 0) {
lfdir = idesc.id_parent;
} else {
pwarn("NO lost+found DIRECTORY");
@@ -510,42 +525,52 @@ linkup(ino_t orphan, ino_t parentdir, char *name)
}
}
}
+ irelse(&ip);
if (lfdir == 0) {
pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY");
printf("\n\n");
return (0);
}
}
- dp = ginode(lfdir);
+ ginode(lfdir, &ip);
+ dp = ip.i_dp;
if ((DIP(dp, di_mode) & IFMT) != IFDIR) {
pfatal("lost+found IS NOT A DIRECTORY");
- if (reply("REALLOCATE") == 0)
+ if (reply("REALLOCATE") == 0) {
+ irelse(&ip);
return (0);
+ }
oldlfdir = lfdir;
if ((lfdir = allocdir(UFS_ROOTINO, (ino_t)0, lfmode)) == 0) {
pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n");
+ irelse(&ip);
return (0);
}
if ((changeino(UFS_ROOTINO, lfname, lfdir) & ALTERED) == 0) {
pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n");
+ irelse(&ip);
return (0);
}
- inodirty(dp);
- idesc.id_type = ADDR;
+ idesc.id_type = inoinfo(oldlfdir)->ino_idtype;
idesc.id_func = freeblock;
idesc.id_number = oldlfdir;
adjust(&idesc, inoinfo(oldlfdir)->ino_linkcnt + 1);
inoinfo(oldlfdir)->ino_linkcnt = 0;
- dp = ginode(lfdir);
+ inodirty(&ip);
+ irelse(&ip);
+ ginode(lfdir, &ip);
+ dp = ip.i_dp;
}
if (inoinfo(lfdir)->ino_state != DFOUND) {
pfatal("SORRY. NO lost+found DIRECTORY\n\n");
+ irelse(&ip);
return (0);
}
(void)lftempname(tempname, orphan);
if (makeentry(lfdir, orphan, (name ? name : tempname)) == 0) {
pfatal("SORRY. NO SPACE IN lost+found DIRECTORY");
printf("\n\n");
+ irelse(&ip);
return (0);
}
inoinfo(orphan)->ino_linkcnt--;
@@ -553,9 +578,8 @@ linkup(ino_t orphan, ino_t parentdir, char *name)
if ((changeino(orphan, "..", lfdir) & ALTERED) == 0 &&
parentdir != (ino_t)-1)
(void)makeentry(orphan, lfdir, "..");
- dp = ginode(lfdir);
DIP_SET(dp, di_nlink, DIP(dp, di_nlink) + 1);
- inodirty(dp);
+ inodirty(&ip);
inoinfo(lfdir)->ino_linkcnt++;
pwarn("DIR I=%lu CONNECTED. ", (u_long)orphan);
if (parentdir != (ino_t)-1) {
@@ -572,6 +596,7 @@ linkup(ino_t orphan, ino_t parentdir, char *name)
if (preen == 0)
printf("\n");
}
+ irelse(&ip);
return (1);
}
@@ -582,6 +607,8 @@ int
changeino(ino_t dir, const char *name, ino_t newnum)
{
struct inodesc idesc;
+ struct inode ip;
+ int error;
memset(&idesc, 0, sizeof(struct inodesc));
idesc.id_type = DATA;
@@ -590,7 +617,10 @@ changeino(ino_t dir, const char *name, ino_t newnum)
idesc.id_fix = DONTKNOW;
idesc.id_name = strdup(name);
idesc.id_parent = newnum; /* new value for name */
- return (ckinode(ginode(dir), &idesc));
+ ginode(dir, &ip);
+ error = ckinode(ip.i_dp, &idesc);
+ irelse(&ip);
+ return (error);
}
/*
@@ -599,8 +629,10 @@ changeino(ino_t dir, const char *name, ino_t newnum)
int
makeentry(ino_t parent, ino_t ino, const char *name)
{
+ struct inode ip;
union dinode *dp;
struct inodesc idesc;
+ int retval;
char pathbuf[MAXPATHLEN + 1];
if (parent < UFS_ROOTINO || parent >= maxino ||
@@ -613,30 +645,37 @@ makeentry(ino_t parent, ino_t ino, const char *name)
idesc.id_parent = ino; /* this is the inode to enter */
idesc.id_fix = DONTKNOW;
idesc.id_name = strdup(name);
- dp = ginode(parent);
+ ginode(parent, &ip);
+ dp = ip.i_dp;
if (DIP(dp, di_size) % DIRBLKSIZ) {
DIP_SET(dp, di_size, roundup(DIP(dp, di_size), DIRBLKSIZ));
- inodirty(dp);
+ inodirty(&ip);
}
- if ((ckinode(dp, &idesc) & ALTERED) != 0)
+ if ((ckinode(dp, &idesc) & ALTERED) != 0) {
+ irelse(&ip);
return (1);
+ }
getpathname(pathbuf, parent, parent);
- dp = ginode(parent);
- if (expanddir(dp, pathbuf) == 0)
+ if (expanddir(&ip, pathbuf) == 0) {
+ irelse(&ip);
return (0);
- return (ckinode(dp, &idesc) & ALTERED);
+ }
+ retval = ckinode(dp, &idesc) & ALTERED;
+ irelse(&ip);
+ return (retval);
}
/*
* Attempt to expand the size of a directory
*/
static int
-expanddir(union dinode *dp, char *name)
+expanddir(struct inode *ip, char *name)
{
ufs2_daddr_t lastlbn, oldblk, newblk, indirblk;
size_t filesize, lastlbnsize;
struct bufarea *bp, *nbp;
struct inodesc idesc;
+ union dinode *dp;
int indiralloced;
char *cp;
@@ -645,6 +684,7 @@ expanddir(union dinode *dp, char *name)
pwarn("NO SPACE LEFT IN %s", name);
if (!preen && reply("EXPAND") == 0)
return (0);
+ dp = ip->i_dp;
filesize = DIP(dp, di_size);
lastlbn = lblkno(&sblock, filesize);
/*
@@ -671,7 +711,7 @@ expanddir(union dinode *dp, char *name)
DIP_SET(dp, di_size, filesize + sblock.fs_bsize - lastlbnsize);
DIP_SET(dp, di_blocks, DIP(dp, di_blocks) +
btodb(sblock.fs_bsize - lastlbnsize));
- inodirty(dp);
+ inodirty(ip);
memmove(nbp->b_un.b_buf, bp->b_un.b_buf, lastlbnsize);
memset(&nbp->b_un.b_buf[lastlbnsize], 0,
sblock.fs_bsize - lastlbnsize);
@@ -680,10 +720,12 @@ expanddir(union dinode *dp, char *name)
cp += DIRBLKSIZ)
memmove(cp, &emptydir, sizeof emptydir);
dirty(nbp);
- nbp->b_flags &= ~B_INUSE;
+ brelse(nbp);
idesc.id_blkno = oldblk;
idesc.id_numfrags = numfrags(&sblock, lastlbnsize);
(void)freeblock(&idesc);
+ if (preen)
+ printf(" (EXPANDED)\n");
return (1);
}
if ((newblk = allocblk(sblock.fs_frag)) == 0)
@@ -719,18 +761,18 @@ expanddir(union dinode *dp, char *name)
}
IBLK_SET(nbp, lastlbn - UFS_NDADDR, newblk);
dirty(nbp);
- nbp->b_flags &= ~B_INUSE;
+ brelse(nbp);
}
DIP_SET(dp, di_size, filesize + sblock.fs_bsize);
DIP_SET(dp, di_blocks, DIP(dp, di_blocks) + btodb(sblock.fs_bsize));
- inodirty(dp);
+ inodirty(ip);
if (preen)
printf(" (EXPANDED)\n");
return (1);
bad:
pfatal(" (EXPANSION FAILED)\n");
if (nbp != NULL)
- nbp->b_flags &= ~B_INUSE;
+ brelse(nbp);
if (newblk != 0) {
idesc.id_blkno = newblk;
idesc.id_numfrags = sblock.fs_frag;
@@ -752,6 +794,7 @@ allocdir(ino_t parent, ino_t request, int mode)
{
ino_t ino;
char *cp;
+ struct inode ip;
union dinode *dp;
struct bufarea *bp;
struct inoinfo *inp;
@@ -761,10 +804,12 @@ allocdir(ino_t parent, ino_t request, int mode)
dirp = &dirhead;
dirp->dot_ino = ino;
dirp->dotdot_ino = parent;
- dp = ginode(ino);
+ ginode(ino, &ip);
+ dp = ip.i_dp;
bp = getdirblk(DIP(dp, di_db[0]), sblock.fs_fsize);
if (bp->b_errs) {
freeino(ino);
+ irelse(&ip);
return (0);
}
memmove(bp->b_un.b_buf, dirp, sizeof(struct dirtemplate));
@@ -774,14 +819,16 @@ allocdir(ino_t parent, ino_t request, int mode)
memmove(cp, &emptydir, sizeof emptydir);
dirty(bp);
DIP_SET(dp, di_nlink, 2);
- inodirty(dp);
+ inodirty(&ip);
if (ino == UFS_ROOTINO) {
inoinfo(ino)->ino_linkcnt = DIP(dp, di_nlink);
cacheino(dp, ino);
+ irelse(&ip);
return(ino);
}
if (!INO_IS_DVALID(parent)) {
freeino(ino);
+ irelse(&ip);
return (0);
}
cacheino(dp, ino);
@@ -793,9 +840,12 @@ allocdir(ino_t parent, ino_t request, int mode)
inoinfo(ino)->ino_linkcnt = DIP(dp, di_nlink);
inoinfo(parent)->ino_linkcnt++;
}
- dp = ginode(parent);
+ irelse(&ip);
+ ginode(parent, &ip);
+ dp = ip.i_dp;
DIP_SET(dp, di_nlink, DIP(dp, di_nlink) + 1);
- inodirty(dp);
+ inodirty(&ip);
+ irelse(&ip);
return (ino);
}
@@ -805,12 +855,15 @@ allocdir(ino_t parent, ino_t request, int mode)
static void
freedir(ino_t ino, ino_t parent)
{
+ struct inode ip;
union dinode *dp;
if (ino != parent) {
- dp = ginode(parent);
+ ginode(parent, &ip);
+ dp = ip.i_dp;
DIP_SET(dp, di_nlink, DIP(dp, di_nlink) - 1);
- inodirty(dp);
+ inodirty(&ip);
+ irelse(&ip);
}
freeino(ino);
}
@@ -847,8 +900,8 @@ static struct bufarea *
getdirblk(ufs2_daddr_t blkno, long size)
{
- if (pdirbp != NULL)
- pdirbp->b_flags &= ~B_INUSE;
+ if (pdirbp != NULL && pdirbp->b_errs == 0)
+ brelse(pdirbp);
pdirbp = getdatablk(blkno, size, BT_DIRDATA);
return (pdirbp);
}
diff --git a/sbin/fsck_ffs/ea.c b/sbin/fsck_ffs/ea.c
index 29e5f46d7651..7cf20196dfae 100644
--- a/sbin/fsck_ffs/ea.c
+++ b/sbin/fsck_ffs/ea.c
@@ -82,6 +82,7 @@ eascan(struct inodesc *idesc, struct ufs2_dinode *dp)
if ((n & 31) == 31)
printf("\n");
}
+ brelse(bp);
return (STOP);
#endif
}
diff --git a/sbin/fsck_ffs/fsck.h b/sbin/fsck_ffs/fsck.h
index a471d1979438..676350b75767 100644
--- a/sbin/fsck_ffs/fsck.h
+++ b/sbin/fsck_ffs/fsck.h
@@ -73,8 +73,7 @@
#define MAXDUP 10 /* limit on dup blks (per inode) */
#define MAXBAD 10 /* limit on bad blks (per inode) */
-#define MINBUFS 10 /* minimum number of buffers required */
-#define MAXBUFS 40 /* maximum space to allocate to buffers */
+#define MINBUFS 100 /* minimum number of buffers required */
#define INOBUFSIZE 64*1024 /* size of buffer to read inodes in pass1 */
#define ZEROBUFSIZE (dev_bsize * 128) /* size of zero buffer used by -Z */
@@ -101,9 +100,10 @@ union dinode {
* have its link count adjusted by the value remaining in ino_linkcnt.
*/
struct inostat {
- char ino_state; /* state of inode, see below */
- char ino_type; /* type of inode */
- short ino_linkcnt; /* number of links not found */
+ u_char ino_state; /* state of inode, see below */
+ u_char ino_type:4; /* type of inode */
+ u_char ino_idtype:4; /* idesc id_type, SNAP or ADDR */
+ u_short ino_linkcnt; /* number of links not found */
};
/*
* Inode states.
@@ -132,16 +132,35 @@ extern struct inostatlist {
struct inostat *il_stat;/* inostat info for this cylinder group */
} *inostathead;
+/*
+ * Structure to reference a dinode.
+ */
+struct inode {
+ struct bufarea *i_bp; /* buffer containing the dinode */
+ union dinode *i_dp; /* pointer to dinode in buffer */
+ ino_t i_number; /* inode number */
+};
+
+/*
+ * Size of hash tables
+ */
+#define HASHSIZE 2048
+#define HASH(x) ((x * 2654435761) & (HASHSIZE - 1))
+
/*
* buffer cache structure.
*/
struct bufarea {
- TAILQ_ENTRY(bufarea) b_list; /* buffer list */
+ TAILQ_ENTRY(bufarea) b_list; /* LRU buffer queue */
+ LIST_ENTRY(bufarea) b_hash; /* hash list */
ufs2_daddr_t b_bno; /* disk block number */
int b_size; /* size of I/O */
int b_errs; /* I/O error */
int b_flags; /* B_ flags below */
int b_type; /* BT_ type below */
+ int b_refcnt; /* ref count of users */
+ int b_index; /* for BT_LEVEL, ptr index */
+ /* for BT_INODES, first inum */
union {
char *b_buf; /* buffer space */
ufs1_daddr_t *b_indir1; /* UFS1 indirect block */
@@ -151,7 +170,6 @@ struct bufarea {
struct ufs1_dinode *b_dinode1; /* UFS1 inode block */
struct ufs2_dinode *b_dinode2; /* UFS2 inode block */
} b_un;
- char b_dirty;
};
#define IBLK(bp, i) \
@@ -168,7 +186,7 @@ struct bufarea {
/*
* Buffer flags
*/
-#define B_INUSE 0x00000001 /* Buffer is in use */
+#define B_DIRTY 0x00000001 /* Buffer is dirty */
/*
* Type of data in buffer
*/
@@ -182,7 +200,8 @@ struct bufarea {
#define BT_INODES 7 /* Buffer holds inodes */
#define BT_DIRDATA 8 /* Buffer holds directory data */
#define BT_DATA 9 /* Buffer holds user data */
-#define BT_NUMBUFTYPES 10
+#define BT_EMPTY 10 /* Buffer allocated but not filled */
+#define BT_NUMBUFTYPES 11
#define BT_NAMES { \
"unknown", \
"Superblock", \
@@ -193,27 +212,33 @@ struct bufarea {
"External Attribute", \
"Inode Block", \
"Directory Contents", \
- "User Data" }
+ "User Data", \
+ "Allocated but not filled" }
+extern char *buftype[];
+#define BT_BUFTYPE(type) \
+ type < BT_NUMBUFTYPES ? buftype[type] : buftype[BT_UNKNOWN]
extern long readcnt[BT_NUMBUFTYPES];
extern long totalreadcnt[BT_NUMBUFTYPES];
extern struct timespec readtime[BT_NUMBUFTYPES];
extern struct timespec totalreadtime[BT_NUMBUFTYPES];
extern struct timespec startprog;
+extern struct bufarea *icachebp; /* inode cache buffer */
extern struct bufarea sblk; /* file system superblock */
extern struct bufarea *pdirbp; /* current directory contents */
-extern struct bufarea *pbp; /* current inode block */
+extern int sujrecovery; /* 1 => doing check using the journal */
#define dirty(bp) do { \
if (fswritefd < 0) \
pfatal("SETTING DIRTY FLAG IN READ_ONLY MODE\n"); \
else \
- (bp)->b_dirty = 1; \
+ (bp)->b_flags |= B_DIRTY; \
} while (0)
#define initbarea(bp, type) do { \
- (bp)->b_dirty = 0; \
(bp)->b_bno = (ufs2_daddr_t)-1; \
(bp)->b_flags = 0; \
+ (bp)->b_refcnt = 0; \
+ (bp)->b_index = 0; \
(bp)->b_type = type; \
} while (0)
@@ -227,6 +252,8 @@ struct inodesc {
enum fixstate id_fix; /* policy on fixing errors */
int (*id_func)(struct inodesc *);
/* function to be applied to blocks of inode */
+ struct bufarea *id_bp; /* ckinode: buffer with indirect pointers */
+ union dinode *id_dp; /* ckinode: dinode being traversed */
ino_t id_number; /* inode number described */
ino_t id_parent; /* for DATA nodes, their parent */
ufs_lbn_t id_lbn; /* logical block number of current block */
@@ -239,7 +266,7 @@ struct inodesc {
int id_loc; /* for DATA nodes, current location in dir */
struct direct *id_dirp; /* for DATA nodes, ptr to current entry */
char *id_name; /* for DATA nodes, name to find or enter */
- char id_type; /* type of descriptor, DATA or ADDR */
+ char id_type; /* type of descriptor, DATA, ADDR, or SNAP */
};
/* file types */
#define DATA 1 /* a directory */
@@ -332,7 +359,6 @@ extern char skipclean; /* skip clean file systems if preening */
extern int fsmodified; /* 1 => write done to file system */
extern int fsreadfd; /* file descriptor for reading file system */
extern int fswritefd; /* file descriptor for writing file system */
-extern struct uufsd disk; /* libufs user-ufs disk structure */
extern int surrender; /* Give up if reads fail */
extern int wantrestart; /* Restart fsck on early termination */
@@ -352,12 +378,11 @@ extern volatile sig_atomic_t got_sigalarm; /* received a SIGALRM */
#define clearinode(dp) \
if (sblock.fs_magic == FS_UFS1_MAGIC) { \
- (dp)->dp1 = ufs1_zino; \
+ (dp)->dp1 = zino.dp1; \
} else { \
- (dp)->dp2 = ufs2_zino; \
+ (dp)->dp2 = zino.dp2; \
}
-extern struct ufs1_dinode ufs1_zino;
-extern struct ufs2_dinode ufs2_zino;
+extern union dinode zino;
#define setbmap(blkno) setbit(blockmap, blkno)
#define testbmap(blkno) isset(blockmap, blkno)
@@ -408,6 +433,7 @@ struct fstab;
void adjust(struct inodesc *, int lcnt);
+void alarmhandler(int sig);
ufs2_daddr_t allocblk(long frags);
ino_t allocdir(ino_t parent, ino_t request, int mode);
ino_t allocino(ino_t request, int type);
@@ -418,12 +444,14 @@ void bufinit(void);
void blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size);
void blerase(int fd, ufs2_daddr_t blk, long size);
void blzero(int fd, ufs2_daddr_t blk, long size);
+void brelse(struct bufarea *);
void cacheino(union dinode *dp, ino_t inumber);
void catch(int);
void catchquit(int);
void cgdirty(struct bufarea *);
+struct bufarea *cglookup(int cg);
int changeino(ino_t dir, const char *name, ino_t newnum);
-int check_cgmagic(int cg, struct bufarea *cgbp);
+int check_cgmagic(int cg, struct bufarea *cgbp, int requestrebuild);
int chkrange(ufs2_daddr_t blk, int cnt);
void ckfini(int markclean);
int ckinode(union dinode *dp, struct inodesc *);
@@ -444,16 +472,17 @@ void freeinodebuf(void);
void fsutilinit(void);
int ftypeok(union dinode *dp);
void getblk(struct bufarea *bp, ufs2_daddr_t blk, long size);
-struct bufarea *cglookup(int cg);
struct bufarea *getdatablk(ufs2_daddr_t blkno, long size, int type);
struct inoinfo *getinoinfo(ino_t inumber);
union dinode *getnextinode(ino_t inumber, int rebuildcg);
void getpathname(char *namebuf, ino_t curdir, ino_t ino);
-union dinode *ginode(ino_t inumber);
+void ginode(ino_t, struct inode *);
void infohandler(int sig);
-void alarmhandler(int sig);
+void irelse(struct inode *);
+ufs2_daddr_t ino_blkatoff(union dinode *, ino_t, ufs_lbn_t, int *,
+ struct bufarea **);
void inocleanup(void);
-void inodirty(union dinode *);
+void inodirty(struct inode *);
struct inostat *inoinfo(ino_t inum);
void IOstats(char *what);
int linkup(ino_t orphan, ino_t parentdir, char *name);
@@ -468,13 +497,13 @@ void pass4(void);
void pass5(void);
void pfatal(const char *fmt, ...) __printflike(1, 2);
void propagate(void);
-void prtinode(ino_t ino, union dinode *dp);
+void prtinode(struct inode *);
void pwarn(const char *fmt, ...) __printflike(1, 2);
int readsb(int listerr);
int reply(const char *question);
void rwerror(const char *mesg, ufs2_daddr_t blk);
void sblock_init(void);
-void setinodebuf(ino_t);
+void setinodebuf(int, ino_t);
int setup(char *dev);
void gjournal_check(const char *filesys);
int suj_check(const char *filesys);
diff --git a/sbin/fsck_ffs/fsutil.c b/sbin/fsck_ffs/fsutil.c
index 11d2ebd598fd..64c4701d9b7f 100644
--- a/sbin/fsck_ffs/fsutil.c
+++ b/sbin/fsck_ffs/fsutil.c
@@ -64,9 +64,14 @@ __FBSDID("$FreeBSD$");
#include "fsck.h"
+int sujrecovery = 0;
+
+static struct bufarea *allocbuf(const char *);
+static void cg_write(struct bufarea *);
static void slowio_start(void);
static void slowio_end(void);
static void printIOstats(void);
+static void prtbuf(const char *, struct bufarea *);
static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */
static struct timespec startpass, finishpass;
@@ -74,12 +79,16 @@ struct timeval slowio_starttime;
int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */
int slowio_pollcnt;
static struct bufarea cgblk; /* backup buffer for cylinder group blocks */
-static TAILQ_HEAD(buflist, bufarea) bufhead; /* head of buffer cache list */
+static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */
+static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */
static int numbufs; /* size of buffer cache */
-static char *buftype[BT_NUMBUFTYPES] = BT_NAMES;
+static int cachelookups; /* number of cache lookups */
+static int cachereads; /* number of cache reads */
static struct bufarea *cgbufs; /* header for cylinder group cache */
static int flushtries; /* number of tries to reclaim memory */
+char *buftype[BT_NUMBUFTYPES] = BT_NAMES;
+
void
fsutilinit(void)
{
@@ -89,11 +98,6 @@ fsutilinit(void)
bzero(&slowio_starttime, sizeof(struct timeval));
slowio_delay_usec = 10000;
slowio_pollcnt = 0;
- bzero(&cgblk, sizeof(struct bufarea));
- TAILQ_INIT(&bufhead);
- numbufs = 0;
- /* buftype ? */
- cgbufs = NULL;
flushtries = 0;
}
@@ -181,33 +185,19 @@ inoinfo(ino_t inum)
void
bufinit(void)
{
- struct bufarea *bp;
- long bufcnt, i;
- char *bufp;
+ int i;
- pbp = pdirbp = (struct bufarea *)0;
- bufp = Malloc((unsigned int)sblock.fs_bsize);
- if (bufp == NULL)
- errx(EEXIT, "cannot allocate buffer pool");
- cgblk.b_un.b_buf = bufp;
+ pdirbp = (struct bufarea *)0;
+ bzero(&cgblk, sizeof(struct bufarea));
+ cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize);
+ if (cgblk.b_un.b_buf == NULL)
+ errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize);
initbarea(&cgblk, BT_CYLGRP);
- TAILQ_INIT(&bufhead);
- bufcnt = MAXBUFS;
- if (bufcnt < MINBUFS)
- bufcnt = MINBUFS;
- for (i = 0; i < bufcnt; i++) {
- bp = (struct bufarea *)Malloc(sizeof(struct bufarea));
- bufp = Malloc((unsigned int)sblock.fs_bsize);
- if (bp == NULL || bufp == NULL) {
- if (i >= MINBUFS)
- break;
- errx(EEXIT, "cannot allocate buffer pool");
- }
- bp->b_un.b_buf = bufp;
- TAILQ_INSERT_HEAD(&bufhead, bp, b_list);
- initbarea(bp, BT_UNKNOWN);
- }
- numbufs = i; /* save number of buffers */
+ cgbufs = NULL;
+ numbufs = cachelookups = cachereads = 0;
+ TAILQ_INIT(&bufqueuehd);
+ for (i = 0; i < HASHSIZE; i++)
+ LIST_INIT(&bufhashhd[i]);
for (i = 0; i < BT_NUMBUFTYPES; i++) {
readtime[i].tv_sec = totalreadtime[i].tv_sec = 0;
readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0;
@@ -215,6 +205,25 @@ bufinit(void)
}
}
+static struct bufarea *
+allocbuf(const char *failreason)
+{
+ struct bufarea *bp;
+ char *bufp;
+
+ bp = (struct bufarea *)Malloc(sizeof(struct bufarea));
+ bufp = Malloc((unsigned int)sblock.fs_bsize);
+ if (bp == NULL || bufp == NULL) {
+ errx(EEXIT, "%s", failreason);
+ /* NOTREACHED */
+ }
+ numbufs++;
+ bp->b_un.b_buf = bufp;
+ TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
+ initbarea(bp, BT_UNKNOWN);
+ return (bp);
+}
+
/*
* Manage cylinder group buffers.
*
@@ -230,18 +239,22 @@ cglookup(int cg)
struct bufarea *cgbp;
struct cg *cgp;
+ if ((unsigned) cg >= sblock.fs_ncg)
+ errx(EEXIT, "cglookup: out of range cylinder group %d", cg);
if (cgbufs == NULL) {
cgbufs = calloc(sblock.fs_ncg, sizeof(struct bufarea));
if (cgbufs == NULL)
- errx(EEXIT, "cannot allocate cylinder group buffers");
+ errx(EEXIT, "Cannot allocate cylinder group buffers");
}
cgbp = &cgbufs[cg];
if (cgbp->b_un.b_cg != NULL)
return (cgbp);
cgp = NULL;
if (flushtries == 0)
- cgp = malloc((unsigned int)sblock.fs_cgsize);
+ cgp = Malloc((unsigned int)sblock.fs_cgsize);
if (cgp == NULL) {
+ if (sujrecovery)
+ errx(EEXIT,"Ran out of memory during journal recovery");
getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
return (&cgblk);
}
@@ -278,7 +291,7 @@ flushentry(void)
{
struct bufarea *cgbp;
- if (flushtries == sblock.fs_ncg || cgbufs == NULL)
+ if (sujrecovery || flushtries == sblock.fs_ncg || cgbufs == NULL)
return (0);
cgbp = &cgbufs[flushtries++];
if (cgbp->b_un.b_cg == NULL)
@@ -296,25 +309,84 @@ struct bufarea *
getdatablk(ufs2_daddr_t blkno, long size, int type)
{
struct bufarea *bp;
+ struct bufhash *bhdp;
- TAILQ_FOREACH(bp, &bufhead, b_list)
- if (bp->b_bno == fsbtodb(&sblock, blkno))
+ cachelookups++;
+ /* If out of range, return empty buffer with b_err == -1 */
+ if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) {
+ blkno = -1;
+ type = BT_EMPTY;
+ }
+ bhdp = &bufhashhd[HASH(blkno)];
+ LIST_FOREACH(bp, bhdp, b_hash)
+ if (bp->b_bno == fsbtodb(&sblock, blkno)) {
+ if (debug && bp->b_size != size) {
+ prtbuf("getdatablk: size mismatch", bp);
+ pfatal("getdatablk: b_size %d != size %ld\n",
+ bp->b_size, size);
+ }
goto foundit;
- TAILQ_FOREACH_REVERSE(bp, &bufhead, buflist, b_list)
- if ((bp->b_flags & B_INUSE) == 0)
- break;
- if (bp == NULL)
- errx(EEXIT, "deadlocked buffer pool");
+ }
+ /*
+ * Move long-term busy buffer back to the front of the LRU so we
+ * do not endless inspect them for recycling.
+ */
+ bp = TAILQ_LAST(&bufqueuehd, bufqueue);
+ if (bp != NULL && bp->b_refcnt != 0) {
+ TAILQ_REMOVE(&bufqueuehd, bp, b_list);
+ TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
+ }
+ /*
+ * Allocate up to the minimum number of buffers before
+ * considering recycling any of them.
+ */
+ if (size > sblock.fs_bsize)
+ errx(EEXIT, "Excessive buffer size %ld > %d\n", size,
+ sblock.fs_bsize);
+ if (numbufs < MINBUFS) {
+ bp = allocbuf("cannot create minimal buffer pool");
+ } else if (sujrecovery) {
+ /*
+ * SUJ recovery does not want anything written until it
+ * has successfully completed (so it can fail back to
+ * full fsck). Thus, we can only recycle clean buffers.
+ */
+ TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
+ if ((bp->b_flags & B_DIRTY) == 0 && bp->b_refcnt == 0)
+ break;
+ if (bp == NULL)
+ bp = allocbuf("Ran out of memory during "
+ "journal recovery");
+ else
+ LIST_REMOVE(bp, b_hash);
+ } else {
+ /*
+ * Recycle oldest non-busy buffer.
+ */
+ TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
+ if (bp->b_refcnt == 0)
+ break;
*** 4026 LINES SKIPPED ***
More information about the dev-commits-src-all
mailing list