Re: git: 076002f24d35 - main - Do comprehensive UFS/FFS superblock integrity checks when reading a superblock.

From: Warner Losh <imp_at_bsdimp.com>
Date: Mon, 30 May 2022 22:07:27 UTC
David Woofskill also reported this...

Warner

On Mon, May 30, 2022, 3:56 PM Cy Schubert <Cy.Schubert@cschubert.com> wrote:

> In message <202205271922.24RJMOJ2039923@gitrepo.freebsd.org>, Kirk
> McKusick
> wri
> tes:
> > The branch main has been updated by mckusick:
> >
> > URL:
> https://cgit.FreeBSD.org/src/commit/?id=076002f24d35962f0d21f44bfddd34ee
> > 4d7f015d
> >
> > commit 076002f24d35962f0d21f44bfddd34ee4d7f015d
> > Author:     Kirk McKusick <mckusick@FreeBSD.org>
> > AuthorDate: 2022-05-27 19:21:11 +0000
> > Commit:     Kirk McKusick <mckusick@FreeBSD.org>
> > CommitDate: 2022-05-27 19:22:07 +0000
> >
> >     Do comprehensive UFS/FFS superblock integrity checks when reading a
> super
> > block.
> >
> >     Historically only minimal checks were made of a superblock when it
> >     was read in as it was assumed that fsck would have been run to
> >     correct any errors before attempting to use the filesystem. Recently
> >     several bug reports have been submitted reporting kernel panics
> >     that can be triggered by deliberately corrupting filesystem
> superblocks,
> >     see Bug 263979 - [meta] UFS / FFS / GEOM crash (panic) tracking
> >     which is tracking the reported corruption bugs.
> >
> >     This change upgrades the checks that are performed. These additional
> >     checks should prevent panics from a corrupted superblock. Although
> >     it appears in only one place, the new code will apply to the kernel
> >     modules and (through libufs) user applications that read in
> superblocks.
> >
> >     Reported by:  Robert Morris and Neeraj
> >     Reviewed by:  kib
> >     Tested by:    Peter Holm
> >     PR:           263979
> >     MFC after:    1 month
> >     Differential Revision: https://reviews.freebsd.org/D35219
> > ---
> >  sys/ufs/ffs/ffs_subr.c | 163
> +++++++++++++++++++++++++++++++++++++++++++----
> > --
> >  1 file changed, 146 insertions(+), 17 deletions(-)
> >
> > diff --git a/sys/ufs/ffs/ffs_subr.c b/sys/ufs/ffs/ffs_subr.c
> > index 01e9f45e1205..28c2fee1cb37 100644
> > --- a/sys/ufs/ffs/ffs_subr.c
> > +++ b/sys/ufs/ffs/ffs_subr.c
> > @@ -35,6 +35,7 @@
> >  __FBSDID("$FreeBSD$");
> >
> >  #include <sys/param.h>
> > +#include <sys/limits.h>
> >
> >  #ifndef _KERNEL
> >  #include <stdio.h>
> > @@ -50,6 +51,7 @@ uint32_t ffs_calc_sbhash(struct fs *);
> >  struct malloc_type;
> >  #define UFS_MALLOC(size, type, flags) malloc(size)
> >  #define UFS_FREE(ptr, type) free(ptr)
> > +#define maxphys MAXPHYS
> >
> >  #else /* _KERNEL */
> >  #include <sys/systm.h>
> > @@ -125,6 +127,7 @@ ffs_update_dinode_ckhash(struct fs *fs, struct
> ufs2_dinod
> > e *dip)
> >  static off_t sblock_try[] = SBLOCKSEARCH;
> >  static int readsuper(void *, struct fs **, off_t, int, int,
> >       int (*)(void *, off_t, void **, int));
> > +static int validate_sblock(struct fs *, int);
> >
> >  /*
> >   * Read a superblock from the devfd device.
> > @@ -141,7 +144,7 @@ static int readsuper(void *, struct fs **, off_t,
> int, in
> > t,
> >   *     EIO: non-existent or truncated superblock.
> >   *     EIO: error reading summary information.
> >   *     ENOENT: no usable known superblock found.
> > - *     ENOSPC: failed to allocate space for the superblock.
> > + *     ENOMEM: failed to allocate space for the superblock.
> >   *     EINVAL: The previous newfs operation on this volume did not
> complete.
> >   *         The administrator must complete newfs before using this
> volume.
> >   */
> > @@ -152,7 +155,8 @@ ffs_sbget(void *devfd, struct fs **fsp, off_t
> altsblock,
> >  {
> >       struct fs *fs;
> >       struct fs_summary_info *fs_si;
> > -     int i, error, size, blks;
> > +     int i, error;
> > +     uint64_t size, blks;
> >       uint8_t *space;
> >       int32_t *lp;
> >       char *buf;
> > @@ -190,17 +194,16 @@ ffs_sbget(void *devfd, struct fs **fsp, off_t
> altsblock
> > ,
> >       if (fs->fs_contigsumsize > 0)
> >               size += fs->fs_ncg * sizeof(int32_t);
> >       size += fs->fs_ncg * sizeof(u_int8_t);
> > -     /* When running in libufs or libsa, UFS_MALLOC may fail */
> > -     if ((fs_si = UFS_MALLOC(sizeof(*fs_si), filltype, M_WAITOK)) ==
> NULL) {
> > +     if ((fs_si = UFS_MALLOC(sizeof(*fs_si), filltype, M_NOWAIT)) ==
> NULL) {
> >               UFS_FREE(fs, filltype);
> > -             return (ENOSPC);
> > +             return (ENOMEM);
> >       }
> >       bzero(fs_si, sizeof(*fs_si));
> >       fs->fs_si = fs_si;
> > -     if ((space = UFS_MALLOC(size, filltype, M_WAITOK)) == NULL) {
> > +     if ((space = UFS_MALLOC(size, filltype, M_NOWAIT)) == NULL) {
> >               UFS_FREE(fs->fs_si, filltype);
> >               UFS_FREE(fs, filltype);
> > -             return (ENOSPC);
> > +             return (ENOMEM);
> >       }
> >       fs->fs_csp = (struct csum *)space;
> >       for (i = 0; i < blks; i += fs->fs_frag) {
> > @@ -253,16 +256,8 @@ readsuper(void *devfd, struct fs **fsp, off_t
> sblockloc,
> >  int isaltsblk,
> >       fs = *fsp;
> >       if (fs->fs_magic == FS_BAD_MAGIC)
> >               return (EINVAL);
> > -     if (!(((fs->fs_magic == FS_UFS1_MAGIC && (isaltsblk ||
> > -           sblockloc <= SBLOCK_UFS1)) ||
> > -          (fs->fs_magic == FS_UFS2_MAGIC && (isaltsblk ||
> > -           sblockloc == fs->fs_sblockloc))) &&
> > -         fs->fs_ncg >= 1 &&
> > -         fs->fs_bsize >= MINBSIZE &&
> > -         fs->fs_bsize <= MAXBSIZE &&
> > -         fs->fs_bsize >= roundup(sizeof(struct fs), DEV_BSIZE) &&
> > -         fs->fs_sbsize <= SBLOCKSIZE))
> > -             return (ENOENT);
> > +     if ((error = validate_sblock(fs, isaltsblk)) != 0)
> > +             return (error);
> >       /*
> >        * If the filesystem has been run on a kernel without
> >        * metadata check hashes, disable them.
> > @@ -310,6 +305,140 @@ readsuper(void *devfd, struct fs **fsp, off_t
> sblockloc
> > , int isaltsblk,
> >       return (0);
> >  }
> >
> > +/*
> > + * Verify the filesystem values.
> > + */
> > +#define ILOG2(num) (fls(num) - 1)
> > +
> > +static int
> > +validate_sblock(struct fs *fs, int isaltsblk)
> > +{
> > +     int i, sectorsize;
> > +     u_int64_t maxfilesize, minfpg, sizepb;
> > +
> > +     sectorsize = dbtob(1);
> > +     if (fs->fs_magic == FS_UFS2_MAGIC) {
> > +             if ((!isaltsblk && (fs->fs_sblockloc != SBLOCK_UFS2 ||
> > +                 fs->fs_sblockactualloc != SBLOCK_UFS2)) ||
> > +                 fs->fs_maxsymlinklen != ((UFS_NDADDR + UFS_NIADDR) *
> > +                     sizeof(ufs2_daddr_t)) ||
> > +                 fs->fs_nindir != fs->fs_bsize / sizeof(ufs2_daddr_t) ||
> > +                 fs->fs_inopb != fs->fs_bsize / sizeof(struct
> ufs2_dinode))
> > +                     return (ENOENT);
> > +     } else if (fs->fs_magic == FS_UFS1_MAGIC) {
> > +             if ((!isaltsblk && (fs->fs_sblockloc > SBLOCK_UFS1 ||
> > +                 fs->fs_sblockactualloc != SBLOCK_UFS1)) ||
> > +                 fs->fs_nindir != fs->fs_bsize / sizeof(ufs1_daddr_t) ||
> > +                 fs->fs_inopb != fs->fs_bsize / sizeof(struct
> ufs1_dinode) |
> > |
> > +                 fs->fs_maxsymlinklen != ((UFS_NDADDR + UFS_NIADDR) *
> > +                     sizeof(ufs1_daddr_t)) ||
> > +                 fs->fs_old_inodefmt != FS_44INODEFMT ||
> > +                 fs->fs_old_cgoffset != 0 ||
> > +                 fs->fs_old_cgmask != 0xffffffff ||
> > +                 fs->fs_old_size != fs->fs_size ||
> > +                 fs->fs_old_rotdelay != 0 ||
> > +                 fs->fs_old_rps != 60 ||
> > +                 fs->fs_old_nspf != fs->fs_fsize / sectorsize ||
> > +                 fs->fs_old_cpg != 1 ||
> > +                 fs->fs_old_interleave != 1 ||
> > +                 fs->fs_old_trackskew != 0 ||
> > +                 fs->fs_old_cpc != 0 ||
> > +                 fs->fs_old_postblformat != 1 ||
> > +                 fs->fs_old_nrpos != 1 ||
> > +                 fs->fs_old_spc != fs->fs_fpg * fs->fs_old_nspf ||
> > +                 fs->fs_old_nsect != fs->fs_old_spc ||
> > +                 fs->fs_old_npsect != fs->fs_old_spc ||
> > +                 fs->fs_old_dsize != fs->fs_dsize ||
> > +                 fs->fs_old_ncyl != fs->fs_ncg)
> > +                     return (ENOENT);
> > +     } else {
> > +             return (ENOENT);
> > +     }
> > +     if (fs->fs_bsize < MINBSIZE || fs->fs_bsize > MAXBSIZE ||
> > +         fs->fs_bsize < roundup(sizeof(struct fs), DEV_BSIZE) ||
> > +         fs->fs_sbsize > SBLOCKSIZE || fs->fs_sbsize < fs->fs_fsize ||
> > +         !powerof2(fs->fs_bsize))
> > +             return (ENOENT);
> > +     if (fs->fs_fsize < sectorsize || fs->fs_fsize > fs->fs_bsize ||
> > +         fs->fs_fsize * MAXFRAG < fs->fs_bsize ||
> !powerof2(fs->fs_fsize))
> > +             return (ENOENT);
> > +     if (fs->fs_maxbsize < fs->fs_bsize || !powerof2(fs->fs_maxbsize) ||
> > +         fs->fs_maxbsize > FS_MAXCONTIG * fs->fs_bsize)
> > +             return (ENOENT);
> > +     if (fs->fs_bmask != ~(fs->fs_bsize - 1) ||
> > +         fs->fs_fmask != ~(fs->fs_fsize - 1) ||
> > +         fs->fs_qbmask != ~fs->fs_bmask ||
> > +         fs->fs_qfmask != ~fs->fs_fmask ||
> > +         fs->fs_bshift != ILOG2(fs->fs_bsize) ||
> > +         fs->fs_fshift != ILOG2(fs->fs_fsize) ||
> > +         fs->fs_frag != numfrags(fs, fs->fs_bsize) ||
> > +         fs->fs_fragshift != ILOG2(fs->fs_frag) ||
> > +         fs->fs_frag > MAXFRAG ||
> > +         fs->fs_fsbtodb != ILOG2(fs->fs_fsize / sectorsize))
> > +             return (ENOENT);
> > +     if (fs->fs_sblkno !=
> > +             roundup(howmany(fs->fs_sblockloc + SBLOCKSIZE,
> fs->fs_fsize),
> > +                 fs->fs_frag) ||
> > +         fs->fs_cblkno != fs->fs_sblkno +
> > +             roundup(howmany(SBLOCKSIZE, fs->fs_fsize), fs->fs_frag) ||
> > +         fs->fs_iblkno != fs->fs_cblkno + fs->fs_frag ||
> > +         fs->fs_dblkno != fs->fs_iblkno + fs->fs_ipg / INOPF(fs) ||
> > +         fs->fs_cgsize != fragroundup(fs, CGSIZE(fs)))
> > +             return (ENOENT);
> > +     if (fs->fs_csaddr != cgdmin(fs, 0) ||
> > +         fs->fs_cssize !=
> > +             fragroundup(fs, fs->fs_ncg * sizeof(struct csum)) ||
> > +         fs->fs_dsize != fs->fs_size - fs->fs_sblkno -
> > +             fs->fs_ncg * (fs->fs_dblkno - fs->fs_sblkno) -
> > +             howmany(fs->fs_cssize, fs->fs_fsize) ||
> > +         fs->fs_metaspace < 0 || fs->fs_metaspace > fs->fs_fpg / 2 ||
> > +         fs->fs_minfree > 99)
> > +             return (ENOENT);
> > +     maxfilesize = fs->fs_bsize * UFS_NDADDR - 1;
> > +     for (sizepb = fs->fs_bsize, i = 0; i < UFS_NIADDR; i++) {
> > +             sizepb *= NINDIR(fs);
> > +             maxfilesize += sizepb;
> > +     }
> > +     if (fs->fs_maxfilesize != maxfilesize)
> > +             return (ENOENT);
> > +     /*
> > +      * These values have a tight interaction with each other that
> > +      * makes it hard to tightly bound them. So we can only check
> > +      * that they are within a broader possible range.
> > +      *
> > +      * Calculate minfpg, the minimum number of fragments that can be
> > +      * in a cylinder group. The value 12289 is calculated in newfs(8)
> > +      * when creating the smallest block size UFS version 1 filesystem
> > +      * (4096 block size) with no fragments (4096 fragment size). That
> > +      * number may be depressed even further for very small filesystems
> > +      * since newfs(8) strives to have at least four cylinder groups.
> > +      */
> > +     minfpg = MIN(12289, fs->fs_size / 4);
> > +     if (fs->fs_ncg < 1 || fs->fs_ncg > (fs->fs_size / minfpg) + 1 ||
> > +         fs->fs_fpg < minfpg || fs->fs_fpg > fs->fs_size ||
> > +         fs->fs_ipg * fs->fs_ncg > (((int64_t)(1)) << 32) - INOPB(fs) ||
> > +         fs->fs_ipg > fs->fs_fpg || fs->fs_size < 8 * fs->fs_frag)
> > +             return (ENOENT);
> > +     if (fs->fs_size <= (fs->fs_ncg - 1) * fs->fs_fpg ||
> > +         fs->fs_size > fs->fs_ncg * fs->fs_fpg)
> > +             return (ENOENT);
> > +     /*
> > +      * Maxcontig sets the default for the maximum number of blocks
> > +      * that may be allocated sequentially. With file system clustering
> > +      * it is possible to allocate contiguous blocks up to the maximum
> > +      * transfer size permitted by the controller or buffering.
> > +      */
> > +     if (fs->fs_maxcontig < 1 ||
> > +         fs->fs_maxcontig > MAX(1, maxphys / fs->fs_bsize))
> > +             return (ENOENT);
> > +     if (fs->fs_maxcontig < 0 ||
> > +         (fs->fs_maxcontig == 0 && fs->fs_contigsumsize != 0) ||
> > +         (fs->fs_maxcontig > 1 &&
> > +         fs->fs_contigsumsize != MIN(fs->fs_maxcontig, FS_MAXCONTIG)))
> > +             return (ENOENT);
> > +     return (0);
> > +}
> > +
> >  /*
> >   * Write a superblock to the devfd device from the memory pointed to by
> fs.
> >   * Write out the superblock summary information if it is present.
> >
>
> Hi Kirk,
>
> This patch broke loader on one of my machines.
>
> I get the following error:
>
> LUA ERROR: Cannot open /boot/lua/loader.lua: no such file or directory"
> can't load kernel
>
> Of course booting from my rescue worked.
>
> Copying loader* from /boot on the rescue disk to /boot works around the
> problem.
>
> Backing up, newfs, and restoring the root filesystem, using my rescue disk
> also didn't work around the problem.
>
> Reverting this patch provided permanent relief.
>
> Loader could read the filesystems on my rescue disk, which had blocksizes
> of 16K but not those on my boot disk which had blocksizes of 32K.
>
> My three machines downstairs, all AMD gear with UFS blocksize of 32K, had
> no no problems with this whereas my laptop, an Intel, had the problem.
>
> To repeat, a newly created filesystem restored from backup using my rescue
> disk didn't resolve the problem.
>
>
> --
> Cheers,
> Cy Schubert <Cy.Schubert@komquats.com> or <Cy.Schubert@cschubert.com>
> FreeBSD UNIX:  <cy@FreeBSD.org>   Web:  http://www.FreeBSD.org
> NTP:           <cy@nwtime.org>    Web:  https://nwtime.org
>
>                         e**(i*pi)+1=0
>
>
>