Re: fsck segfaults on rpi3 running 13-stable (and on 14-CURRENT analyzing the same file system that resulted from the 13-STABLE crash)

From: Mark Millard <marklmi_at_yahoo.com>
Date: Sun, 12 Feb 2023 21:25:32 UTC
[With a backtrace for the fsck_ffs SIGSEGV crash and some
listing of code involved, I'm now including mckusick@FreeBSD.org <mailto:mckusick@FreeBSD.org>
in the To: . Kirk M. likely would like you to preserve the
problematical UFS file system that produces the fsck_ffs
crashes, at least for now. For Kirk M.: The below is from/for
the fsck_ffs attempted from 14-CURRENT.]

On Feb 12, 2023, at 11:53, bob prohaska <fbsd@www.zefox.net> wrote:

> On Sun, Feb 12, 2023 at 11:31:59AM -0800, Mark Millard wrote:
>> 
>> I'll note that another option is to run fsck_ffs from
>> lldb in the first place. 
> 
> That seems more productive, yielding:
> 
> root@www:~ # lldb /sbin/fsck_ffs
> (lldb) target create "/sbin/fsck_ffs"
> Current executable set to '/sbin/fsck_ffs' (aarch64).
> (lldb) run -fy
> Process 62596 launched: '/sbin/fsck_ffs' (aarch64)
> usage: fsck_ffs [-BCdEFfnpRrSyZ] [-b block] [-c level] [-m mode] filesystem ...
> Process 62596 exited with status = 1 (0x00000001) 
> (lldb) q
> root@www:~ # lldb fsck_ffs
> (lldb) target create "fsck_ffs"
> Current executable set to 'fsck_ffs' (aarch64).
> (lldb) run -fy /dev/da1s2d
> Process 62609 launched: '/sbin/fsck_ffs' (aarch64)
> ** /dev/da1s2d
> ** Last Mounted on /usr
> ** Phase 1 - Check Blocks and Sizes
> 7912408300994173476 BAD I=69393345
> 4313599915630302063 BAD I=69393345
> -4473632163892877928 BAD I=69393345
> 8068741989830080453 BAD I=69393345
> 3857159125896022134 BAD I=69393345
> -4354179704011695453 BAD I=69393345
> 7611175298055105740 BAD I=69393345
> 3985638883347136889 BAD I=69393345
> -2495754894521232470 BAD I=69393345
> 7739654885841380823 BAD I=69393345
> 7912408300994173476 BAD I=69393351
> 4313599915630302063 BAD I=69393351
> -4473632163892877928 BAD I=69393351
> 8068741989830080453 BAD I=69393351
> 3857159125896022134 BAD I=69393351
> -4354179704011695453 BAD I=69393351
> 7611175298055105740 BAD I=69393351
> 3985638883347136889 BAD I=69393351
> -2495754894521232470 BAD I=69393351
> 7739654885841380823 BAD I=69393351
> 7912408300994173476 BAD I=74682090
> 4313599915630302063 BAD I=74682090
> -4473632163892877928 BAD I=74682090
> 8068741989830080453 BAD I=74682090
> 3857159125896022134 BAD I=74682090
> -4354179704011695453 BAD I=74682090
> 7611175298055105740 BAD I=74682090
> 3985638883347136889 BAD I=74682090
> -2495754894521232470 BAD I=74682090
> 7739654885841380823 BAD I=74682090
> INODE CHECK-HASH FAILED I=74999808  OWNER=1842251117 MODE=15044
> This version of LLDB has no plugin for the language "assembler". Inspection of frame variables will be limited.
> Process 62609 stopped
> * thread #1, name = 'fsck_ffs', stop reason = signal SIGSEGV: invalid address (fault address: 0x0)
>    frame #0: 0x00005c6f47c3d550 libc.so.7`strnlen at strnlen.S:50
>   47   bic src, srcin, 15
>   48   mov wtmp, 0xf00f
>   49   cbz cntin, L(nomatch)
> -> 50   ld1 {vdata.16b}, [src], 16
>   51   dup vrepmask.8h, wtmp
>   52   cmeq vhas_chr.16b, vdata.16b, 0
>   53   lsl shift, srcin, 2
> (lldb) bt all
> * thread #1, name = 'fsck_ffs', stop reason = signal SIGSEGV: invalid address (fault address: 0x0)
>  * frame #0: 0x00005c6f47c3d550 libc.so.7`strnlen at strnlen.S:50
>    frame #1: 0x00005c6f47c08b48 libc.so.7`__vfprintf(fp=0x00005c6f47cd8b68, locale=0x00005c6f47cd84a8, fmt0="MTIME=%12.12s %4.4s ", ap=(__stack = 0x00005c6f45005c40, __gr_top = 0x00005c6f45005bd0, __vr_top = 0x00005c6f45005b90, __gr_offs = -48, __vr_offs = -128)) at vfprintf.c:854:25
>    frame #2: 0x00005c6f47c0752c libc.so.7`vfprintf_l(fp=0x00005c6f47cd8b68, locale=0x00005c6f47cd84a8, fmt0="MTIME=%12.12s %4.4s ", ap=(__stack = 0x00005c6f45005c40, __gr_top = 0x00005c6f45005bd0, __vr_top = 0x00005c6f45005b90, __gr_offs = -56, __vr_offs = -128)) at vfprintf.c:285:9
>    frame #3: 0x00005c6f47c09f94 libc.so.7`vfprintf(fp=<unavailable>, fmt0=<unavailable>, ap=<unavailable>) at vfprintf.c:292:9
>    frame #4: 0x00005c6f47c03dc0 libc.so.7`printf(fmt=<unavailable>) at printf.c:57:8
>    frame #5: 0x00005c6ec487edac fsck_ffs`prtinode(ip=<unavailable>) at inode.c:1314:2
>    frame #6: 0x00005c6ec487f000 fsck_ffs`getnextinode(inumber=74999808, rebuildcg=0) at inode.c:563:3
>    frame #7: 0x00005c6ec4882d5c fsck_ffs`pass1 [inlined] checkinode(inumber=74999808, idesc=0x00005c6f45005d20, rebuildcg=0) at pass1.c:254:12
>    frame #8: 0x00005c6ec4882d58 fsck_ffs`pass1 at pass1.c:181:8
>    frame #9: 0x00005c6ec488209c fsck_ffs`main [inlined] checkfilesys(filesys=<unavailable>) at main.c:446:2
>    frame #10: 0x00005c6ec48818b0 fsck_ffs`main(argc=1, argv=0x00005c6f45006138) at main.c:210:16
>    frame #11: 0x00005c6ec4877ec0 fsck_ffs`__start(argc=3, argv=0x00005c6f45006128, env=0x00005c6f45006148, cleanup=<unavailable>) at crt1_c.c:72:7
>    frame #12: 0x00007813def681d8 ld-elf.so.1`.rtld_start at rtld_start.S:41
> (lldb)  
> 
> Does that make any sense?

It gives some context for the internal failure, for sure.

I do not see a direct NULL pointer possibility in what I
report that I looked at below. It leaves me wondering if
something has trashed some memory (stack?) content that
is involved.

The backtrace indicates a NULL pointer was dereferenced:

* thread #1, name = 'fsck_ffs', stop reason = signal SIGSEGV: invalid address (fault address: 0x0)
 * frame #0: 0x00005c6f47c3d550 libc.so.7`strnlen at strnlen.S:50

The "-> 50   ld1 {vdata.16b}, [src], 16" for strnlen indicates that
the code is from (given where I have main's source):

/usr/main-src/contrib/arm-optimized-routines/string/aarch64/strnlen.S

ENTRY (__strnlen_aarch64)
        PTR_ARG (0)
        SIZE_ARG (1)
        bic     src, srcin, 15
        mov     wtmp, 0xf00f
        cbz     cntin, L(nomatch)
        ld1     {vdata.16b}, [src], 16
. . .

This is via the strnlen use in the
/usr/main-src/lib/libc/stdio/vfprintf.c code below (leading white
space might not be preserved):

. . .
int
__vfprintf(FILE *fp, locale_t locale, const char *fmt0, va_list ap)
{
. . .
                case 's':
                        if (flags & LONGINT) {
                                wchar_t *wcp;
                        
                                if (convbuf != NULL)
                                        free(convbuf);
                                if ((wcp = GETARG(wchar_t *)) == NULL)
                                        cp = "(null)";
                                else {
                                        convbuf = __wcsconv(wcp, prec);
                                        if (convbuf == NULL) {
                                                fp->_flags |= __SERR;
                                                goto error;
                                        }
                                        cp = convbuf;
                                }
                        } else if ((cp = GETARG(char *)) == NULL)
                                cp = "(null)";
                        size = (prec >= 0) ? strnlen(cp, prec) : strlen(cp);
                        sign = '\0';
                        break;
. . .

There are multiple layers involving va_list before we
get down to printf in printf.c . I've not tried to validate
this va_list related handling.

Looking back at code that is inside fsck_ffs source files that
leads to the printf usage (and indirectly to the other libc.so
code involved) . . .

So the code around /usr/main-src/sbin/fsck_ffs/inode.c:1314 looks
like: (leading white space might not be preserved)

void
prtinode(struct inode *ip)
{
        char *p;
        union dinode *dp;
        struct passwd *pw;
        time_t t;
          dp = ip->i_dp;
        printf(" I=%lu ", (u_long)ip->i_number);
        if (ip->i_number < UFS_ROOTINO || ip->i_number > maxino)
                return;
        printf(" OWNER=");
        if ((pw = getpwuid((int)DIP(dp, di_uid))) != NULL)
                printf("%s ", pw->pw_name);
        else
                printf("%u ", (unsigned)DIP(dp, di_uid));
        printf("MODE=%o\n", DIP(dp, di_mode));
        if (preen)
                printf("%s: ", cdevname);
        printf("SIZE=%ju ", (uintmax_t)DIP(dp, di_size));
        t = DIP(dp, di_mtime);
        p = ctime(&t);
        printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]);
}

That, in turned, was called via:
( /usr/main-src/sbin/fsck_ffs/inode.c )
. . .
union dinode *
getnextinode(ino_t inumber, int rebuildcg)
{
. . .
        dp = (union dinode *)nextinop;
        if (sblock.fs_magic == FS_UFS1_MAGIC)
                nextinop += sizeof(struct ufs1_dinode);
        else
                nextinop += sizeof(struct ufs2_dinode);
        if ((ckhashadd & CK_INODE) != 0) {
                ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp);
                dirty(&inobuf);
        }
        if (ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp) != 0) {
                pwarn("INODE CHECK-HASH FAILED");
                ip.i_bp = NULL;
                ip.i_dp = dp;
                ip.i_number = inumber;
                prtinode(&ip);
                if (preen || reply("FIX") != 0) {
                        if (preen)
                                printf(" (FIXED)\n");
                        ffs_update_dinode_ckhash(&sblock,
                            (struct ufs2_dinode *)dp);
                        dirty(&inobuf);
                }
        }
. . .

In turn:
( /usr/main-src/sbin/fsck_ffs/pass1.c )

. . .
static int
checkinode(ino_t inumber, struct inodesc *idesc, int rebuildcg)
{
. . . 
        if ((dp = getnextinode(inumber, rebuildcg)) == NULL) {
                pfatal("INVALID INODE");
                goto unknown;
        }
. . .

In turn (same file):

void
pass1(void)
{
. . .
                /*
                 * Scan the allocated inodes.
                 */
                setinodebuf(c, inosused);
                for (i = 0; i < inosused; i++, inumber++) {
                        if (inumber < UFS_ROOTINO) {
                                (void)getnextinode(inumber, rebuildcg);
                                continue;
                        }
                        /*
                         * NULL return indicates probable end of allocated
                         * inodes during cylinder group rebuild attempt.
                         * We always keep trying until we get to the minimum
                         * valid number for this cylinder group.
                         */
                        if (checkinode(inumber, &idesc, rebuildcg) == 0 &&
                            i > cgp->cg_initediblk)
                                break;
                }
. . .

With that I stop.

So far, I've not identified how the NULL pointer showed up
that ended up being dereferenced. It does not look likely
that I will identify such.


===
Mark Millard
marklmi at yahoo.com