Re: confusion about root partition causes panic during startup

From: Mike Karels <mike_at_karels.net>
Date: Sun, 23 Jul 2023 02:43:39 UTC
Are you planning to commit the change to mountroot?

		Mike

On 20 Jul 2023, at 21:37, Mike Karels wrote:

> Mateusz Guzik <mjguzik@gmail.com> wrote:
>> On 7/20/23, Mike Karels <mike@karels.net> wrote:
>>> I installed an additional NVME drive on a system, and then booted.  It
>>> turns
>>> out that the new drive became nda0, renumbering the other drives.  The
>>> loader
>>> found the correct partition to boot (the only choice), and loaded the
>>> kernel
>>> correctly.  However, /etc/fstab still had the old name (nvd1p2), which is
>>> now drive 2.  I expected it to drop into single user, but instead the
>>> system
>>> panicked in vfs_mountroot_shuffle trying to switch root devices (see
>>> below).
>>> It doesn't seem that having the wrong root device in /etc/fstab should
>>> cause
>>> a panic; it makes it harder to patch the system.  I was unable to get the
>>> system to boot using boot-to-single-user or setting currdev, but I managed
>>> to remember doing "boot -a" from a loader prompt to get the system to ask
>>> the root device before mounting it.  I can easily reproduce this to test.
>>> Probably the NDFREE_PNBUF() shouldn't happen if namei() returned an error.
>>>
>
>> ye, this should do it (untested):
>
>> diff --git a/sys/kern/vfs_mountroot.c b/sys/kern/vfs_mountroot.c
>> index 956d29e3f084..85398ff781e4 100644
>> --- a/sys/kern/vfs_mountroot.c
>> +++ b/sys/kern/vfs_mountroot.c
>> @@ -352,13 +352,13 @@ vfs_mountroot_shuffle(struct thread *td, struct
>> mount *mpdevfs)
>>                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath);
>>                 error = namei(&nd);
>>                 if (error) {
>> -                       NDFREE_PNBUF(&nd);
>>                         fspath = "/mnt";
>>                         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
>>                             fspath);
>>                         error = namei(&nd);
>>                 }
>>                 if (!error) {
>> +                       NDFREE_PNBUF(&nd);
>>                         vp = nd.ni_vp;
>>                         error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
>>                         if (!error)
>> @@ -376,7 +376,6 @@ vfs_mountroot_shuffle(struct thread *td, struct
>> mount *mpdevfs)
>>                         } else
>>                                 vput(vp);
>>                 }
>> -               NDFREE_PNBUF(&nd);
>
>>                 if (error)
>>                         printf("mountroot: unable to remount previous root "
>> @@ -387,6 +386,7 @@ vfs_mountroot_shuffle(struct thread *td, struct
>> mount *mpdevfs)
>>         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev");
>>         error = namei(&nd);
>>         if (!error) {
>> +               NDFREE_PNBUF(&nd);
>>                 vp = nd.ni_vp;
>>                 error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
>>                 if (!error)
>
> That was missing the last change, and tabs were expanded.  I put it in
> by hand, and the patch works, at least to avoid this panic.  It still
> insisted on remounting root on nda1p2, which is not a root file system.
> Remounting /dev still failed without panicking, then it panicked because
> there was no /sbin/init.  Apparently it is necessary to use "boot -a"
> in this situation.  Too bad the loader option menu doesn't include that.
>
> Just to be clear what I tested, my patch follows.
>
> 		Mike
>
> diff --git a/sys/kern/vfs_mountroot.c b/sys/kern/vfs_mountroot.c
> index 956d29e3f084..b08b2a3200f8 100644
> --- a/sys/kern/vfs_mountroot.c
> +++ b/sys/kern/vfs_mountroot.c
> @@ -352,13 +352,13 @@ vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
>  		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath);
>  		error = namei(&nd);
>  		if (error) {
> -			NDFREE_PNBUF(&nd);
>  			fspath = "/mnt";
>  			NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
>  			    fspath);
>  			error = namei(&nd);
>  		}
>  		if (!error) {
> +			NDFREE_PNBUF(&nd);
>  			vp = nd.ni_vp;
>  			error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
>  			if (!error)
> @@ -376,7 +376,6 @@ vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
>  			} else
>  				vput(vp);
>  		}
> -		NDFREE_PNBUF(&nd);
>
>  		if (error)
>  			printf("mountroot: unable to remount previous root "
> @@ -387,6 +386,7 @@ vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
>  	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev");
>  	error = namei(&nd);
>  	if (!error) {
> +		NDFREE_PNBUF(&nd);
>  		vp = nd.ni_vp;
>  		error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
>  		if (!error)
> @@ -413,7 +413,6 @@ vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
>  	if (error)
>  		printf("mountroot: unable to remount devfs under /dev "
>  		    "(error %d)\n", error);
> -	NDFREE_PNBUF(&nd);
>
>  	if (mporoot == mpdevfs) {
>  		vfs_unbusy(mpdevfs);