Re: git: 64ba1f4cf3a6 - main - rtld: Implement LD_SHOW_AUXV
- In reply to: Konstantin Belousov : "Re: git: 64ba1f4cf3a6 - main - rtld: Implement LD_SHOW_AUXV"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 13 Nov 2021 19:56:10 UTC
On 13 Nov 2021, at 19:39, Konstantin Belousov <kostikbel@gmail.com> wrote: > On Sat, Nov 13, 2021 at 07:10:54PM +0000, Jessica Clarke wrote: >> On 13 Nov 2021, at 19:09, Jessica Clarke <jrtc27@freebsd.org> wrote: >>> >>> On 13 Nov 2021, at 19:06, Konstantin Belousov <kostikbel@gmail.com> wrote: >>>> On Sat, Nov 13, 2021 at 08:59:00PM +0200, Konstantin Belousov wrote: >>>>> On Sat, Nov 13, 2021 at 06:29:24PM +0000, Jessica Clarke wrote: >>>>>> On 13 Nov 2021, at 17:57, Jessica Clarke <jrtc27@FreeBSD.org> wrote: >>>>>>> >>>>>>> On 13 Nov 2021, at 17:54, Jessica Clarke <jrtc27@FreeBSD.org> wrote: >>>>>>>> >>>>>>>> On 13 Nov 2021, at 17:33, Konstantin Belousov <kib@FreeBSD.org> wrote: >>>>>>>>> >>>>>>>>> The branch main has been updated by kib: >>>>>>>>> >>>>>>>>> URL: https://cgit.FreeBSD.org/src/commit/?id=64ba1f4cf3a6847a1dacf4bab0409d94898fa168 >>>>>>>>> >>>>>>>>> commit 64ba1f4cf3a6847a1dacf4bab0409d94898fa168 >>>>>>>>> Author: Konstantin Belousov <kib@FreeBSD.org> >>>>>>>>> AuthorDate: 2021-11-13 01:18:13 +0000 >>>>>>>>> Commit: Konstantin Belousov <kib@FreeBSD.org> >>>>>>>>> CommitDate: 2021-11-13 17:33:13 +0000 >>>>>>>>> >>>>>>>>> rtld: Implement LD_SHOW_AUXV >>>>>>>>> >>>>>>>>> It dumps auxv as seen by interpreter, right before starting any user >>>>>>>>> code. >>>>>>>>> >>>>>>>>> Copied from: glibc >>>>>>>>> Sponsored by: The FreeBSD Foundation >>>>>>>>> MFC after: 1 week >>>>>>>>> --- >>>>>>>>> libexec/rtld-elf/rtld.1 | 7 +++++- >>>>>>>>> libexec/rtld-elf/rtld.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++ >>>>>>>>> 2 files changed, 73 insertions(+), 1 deletion(-) >>>>>>>>> >>>>>>>>> diff --git a/libexec/rtld-elf/rtld.1 b/libexec/rtld-elf/rtld.1 >>>>>>>>> index 187dc105667a..66aa2bdabd17 100644 >>>>>>>>> --- a/libexec/rtld-elf/rtld.1 >>>>>>>>> +++ b/libexec/rtld-elf/rtld.1 >>>>>>>>> @@ -28,7 +28,7 @@ >>>>>>>>> .\" >>>>>>>>> .\" $FreeBSD$ >>>>>>>>> .\" >>>>>>>>> -.Dd August 15, 2021 >>>>>>>>> +.Dd November 13, 2021 >>>>>>>>> .Dt RTLD 1 >>>>>>>>> .Os >>>>>>>>> .Sh NAME >>>>>>>>> @@ -309,6 +309,11 @@ will process the filtee dependencies of the loaded objects immediately, >>>>>>>>> instead of postponing it until required. >>>>>>>>> Normally, the filtees are opened at the time of the first symbol resolution >>>>>>>>> from the filter object. >>>>>>>>> +.It Ev LD_SHOW_AUXV >>>>>>>>> +If set, causes >>>>>>>>> +.Nm >>>>>>>>> +to dump content of the aux vector to standard output, before passing >>>>>>>>> +control to any user code. >>>>>>>>> .El >>>>>>>>> .Sh DIRECT EXECUTION MODE >>>>>>>>> .Nm >>>>>>>>> diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c >>>>>>>>> index c173c5a6e22e..0475134b0d96 100644 >>>>>>>>> --- a/libexec/rtld-elf/rtld.c >>>>>>>>> +++ b/libexec/rtld-elf/rtld.c >>>>>>>>> @@ -104,6 +104,7 @@ static Obj_Entry *dlopen_object(const char *name, int fd, Obj_Entry *refobj, >>>>>>>>> static Obj_Entry *do_load_object(int, const char *, char *, struct stat *, int); >>>>>>>>> static int do_search_info(const Obj_Entry *obj, int, struct dl_serinfo *); >>>>>>>>> static bool donelist_check(DoneList *, const Obj_Entry *); >>>>>>>>> +static void dump_auxv(Elf_Auxinfo **aux_info); >>>>>>>>> static void errmsg_restore(struct dlerror_save *); >>>>>>>>> static struct dlerror_save *errmsg_save(void); >>>>>>>>> static void *fill_search_info(const char *, size_t, void *); >>>>>>>>> @@ -364,6 +365,7 @@ enum { >>>>>>>>> LD_TRACE_LOADED_OBJECTS_FMT1, >>>>>>>>> LD_TRACE_LOADED_OBJECTS_FMT2, >>>>>>>>> LD_TRACE_LOADED_OBJECTS_ALL, >>>>>>>>> + LD_SHOW_AUXV, >>>>>>>>> }; >>>>>>>>> >>>>>>>>> struct ld_env_var_desc { >>>>>>>>> @@ -396,6 +398,7 @@ static struct ld_env_var_desc ld_env_vars[] = { >>>>>>>>> LD_ENV_DESC(TRACE_LOADED_OBJECTS_FMT1, false), >>>>>>>>> LD_ENV_DESC(TRACE_LOADED_OBJECTS_FMT2, false), >>>>>>>>> LD_ENV_DESC(TRACE_LOADED_OBJECTS_ALL, false), >>>>>>>>> + LD_ENV_DESC(SHOW_AUXV, false), >>>>>>>>> }; >>>>>>>>> >>>>>>>>> static const char * >>>>>>>>> @@ -857,6 +860,9 @@ _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp) >>>>>>>>> if (rtld_verify_versions(&list_main) == -1 && !ld_tracing) >>>>>>>>> rtld_die(); >>>>>>>>> >>>>>>>>> + if (ld_get_env_var(LD_SHOW_AUXV) != NULL) >>>>>>>>> + dump_auxv(aux_info); >>>>>>>>> + >>>>>>>>> if (ld_tracing) { /* We're done */ >>>>>>>>> trace_loaded_objects(obj_main); >>>>>>>>> exit(0); >>>>>>>>> @@ -6058,6 +6064,67 @@ print_usage(const char *argv0) >>>>>>>>> " <args> Arguments to the executed process\n", argv0); >>>>>>>>> } >>>>>>>>> >>>>>>>>> +#define AUXFMT(at, xfmt) [at] = { .name = #at, .fmt = xfmt } >>>>>>>>> +static const struct auxfmt { >>>>>>>>> + const char *name; >>>>>>>>> + const char *fmt; >>>>>>>>> +} auxfmts[] = { >>>>>>>>> + AUXFMT(AT_NULL, NULL), >>>>>>>>> + AUXFMT(AT_IGNORE, NULL), >>>>>>>>> + AUXFMT(AT_EXECFD, "%d"), >>>>>>>>> + AUXFMT(AT_PHDR, "%p"), >>>>>>>>> + AUXFMT(AT_PHENT, "%u"), >>>>>>>>> + AUXFMT(AT_PHNUM, "%u"), >>>>>>>>> + AUXFMT(AT_PAGESZ, "%u"), >>>>>>>>> + AUXFMT(AT_BASE, "%#lx"), >>>>>>>>> + AUXFMT(AT_FLAGS, "%#lx"), >>>>>>>>> + AUXFMT(AT_ENTRY, "%p"), >>>>>>>>> + AUXFMT(AT_NOTELF, NULL), >>>>>>>>> + AUXFMT(AT_UID, "%d"), >>>>>>>>> + AUXFMT(AT_EUID, "%d"), >>>>>>>>> + AUXFMT(AT_GID, "%d"), >>>>>>>>> + AUXFMT(AT_EGID, "%d"), >>>>>>>>> + AUXFMT(AT_EXECPATH, "%s"), >>>>>>>>> + AUXFMT(AT_CANARY, "%p"), >>>>>>>>> + AUXFMT(AT_CANARYLEN, "%u"), >>>>>>>>> + AUXFMT(AT_OSRELDATE, "%u"), >>>>>>>>> + AUXFMT(AT_NCPUS, "%u"), >>>>>>>>> + AUXFMT(AT_PAGESIZES, "%p"), >>>>>>>>> + AUXFMT(AT_PAGESIZESLEN, "%u"), >>>>>>>>> + AUXFMT(AT_TIMEKEEP, "%p"), >>>>>>>>> + AUXFMT(AT_STACKPROT, "%#x"), >>>>>>>>> + AUXFMT(AT_EHDRFLAGS, "%#lx"), >>>>>>>>> + AUXFMT(AT_HWCAP, "%#lx"), >>>>>>>>> + AUXFMT(AT_HWCAP2, "%#lx"), >>>>>>>>> + AUXFMT(AT_BSDFLAGS, "%#lx"), >>>>>>>>> + AUXFMT(AT_ARGC, "%u"), >>>>>>>>> + AUXFMT(AT_ARGV, "%p"), >>>>>>>>> + AUXFMT(AT_ENVC, "%p"), >>>>>>>>> + AUXFMT(AT_ENVV, "%p"), >>>>>>>>> + AUXFMT(AT_PS_STRINGS, "%p"), >>>>>>>>> + AUXFMT(AT_FXRNG, "%p"), >>>>>>>>> +}; >>>>>>>>> + >>>>>>>>> +static void >>>>>>>>> +dump_auxv(Elf_Auxinfo **aux_info) >>>>>>>>> +{ >>>>>>>>> + Elf_Auxinfo *auxp; >>>>>>>>> + const struct auxfmt *fmt; >>>>>>>>> + int i; >>>>>>>>> + >>>>>>>>> + for (i = 0; i < AT_COUNT; i++) { >>>>>>>>> + auxp = aux_info[i]; >>>>>>>>> + if (auxp == NULL) >>>>>>>>> + continue; >>>>>>>>> + fmt = &auxfmts[i]; >>>>>>>>> + if (fmt->fmt == NULL) >>>>>>>>> + continue; >>>>>>>>> + rtld_fdprintf(STDOUT_FILENO, "%s:\t", fmt->name); >>>>>>>>> + rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, auxp->a_un.a_ptr); >>>>>>>>> + rtld_fdprintf(STDOUT_FILENO, "\n"); >>>>>>>> >>>>>>>> This is undefined behaviour, breaks CHERI, and totally unnecessary. You >>>>>>>> have a handful of cases here, just make an enum and have separate >>>>>>>> rtld_fdprintf calls. >>>>>> >>>>>> In particular, ignoring CHERI, unsigned ints are sign-extended to 64 >>>>>> bits on MIPS and RISC-V. Thus by passing a 64-bit value but using a %u, >>>>>> you are violating the calling convention. I can’t currently get GCC or >>>>>> Clang to exploit the fact that varargs arguments are sign-extended, but >>>>>> on MIPS, and RISC-V GCC (Clang is currently stupid and round-trips via >>>>>> memory even when the va_arg calls have no branching surrounding them, >>>>>> rather than just grabbing from the register) there is a redundant >>>>>> sext.w that can legally be optimised out, but would be broken by this >>>>>> calling convention violation. >>>>> I might understand the argument that all non-pointer formats for auxv >>>>> should be longs, i.e. %lu/%ld/%lx, but this is the only problem I see >>>>> there. We do rely on having specific representations for addresses and >>>>> longs, and a low-level component as rtld has full rights to exercise >>>>> this fact, same as VM subsystem or memory allocators. >>>>> >>>>> In fact ELF spec exercises this as well. >>>>> Our arches are either ILP32 or LP64. >>>>> >>>>>> >>>>>> Then CHERI makes it worse because a_ptr and a_val do not have the same >>>>>> representation, although in practice I think passing a_ptr and nothing >>>>>> further does end up working on CHERI-RISC-V and Morello, just not >>>>>> CHERI-MIPS due to being big-endian. >>>> >>>> Ok, the following should be enough for CHERI, right? >>>> >>>> diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c >>>> index 0475134b0d96..cf467ae7aacd 100644 >>>> --- a/libexec/rtld-elf/rtld.c >>>> +++ b/libexec/rtld-elf/rtld.c >>>> @@ -6071,33 +6071,33 @@ static const struct auxfmt { >>>> } auxfmts[] = { >>>> AUXFMT(AT_NULL, NULL), >>>> AUXFMT(AT_IGNORE, NULL), >>>> - AUXFMT(AT_EXECFD, "%d"), >>>> + AUXFMT(AT_EXECFD, "%ld"), >>>> AUXFMT(AT_PHDR, "%p"), >>>> - AUXFMT(AT_PHENT, "%u"), >>>> - AUXFMT(AT_PHNUM, "%u"), >>>> - AUXFMT(AT_PAGESZ, "%u"), >>>> + AUXFMT(AT_PHENT, "%lu"), >>>> + AUXFMT(AT_PHNUM, "%lu"), >>>> + AUXFMT(AT_PAGESZ, "%lu"), >>>> AUXFMT(AT_BASE, "%#lx"), >>>> AUXFMT(AT_FLAGS, "%#lx"), >>>> AUXFMT(AT_ENTRY, "%p"), >>>> AUXFMT(AT_NOTELF, NULL), >>>> - AUXFMT(AT_UID, "%d"), >>>> - AUXFMT(AT_EUID, "%d"), >>>> - AUXFMT(AT_GID, "%d"), >>>> - AUXFMT(AT_EGID, "%d"), >>>> + AUXFMT(AT_UID, "%ld"), >>>> + AUXFMT(AT_EUID, "%ld"), >>>> + AUXFMT(AT_GID, "%ld"), >>>> + AUXFMT(AT_EGID, "%ld"), >>>> AUXFMT(AT_EXECPATH, "%s"), >>>> AUXFMT(AT_CANARY, "%p"), >>>> - AUXFMT(AT_CANARYLEN, "%u"), >>>> - AUXFMT(AT_OSRELDATE, "%u"), >>>> - AUXFMT(AT_NCPUS, "%u"), >>>> + AUXFMT(AT_CANARYLEN, "%lu"), >>>> + AUXFMT(AT_OSRELDATE, "%lu"), >>>> + AUXFMT(AT_NCPUS, "%lu"), >>>> AUXFMT(AT_PAGESIZES, "%p"), >>>> - AUXFMT(AT_PAGESIZESLEN, "%u"), >>>> + AUXFMT(AT_PAGESIZESLEN, "%lu"), >>>> AUXFMT(AT_TIMEKEEP, "%p"), >>>> - AUXFMT(AT_STACKPROT, "%#x"), >>>> + AUXFMT(AT_STACKPROT, "%#lx"), >>>> AUXFMT(AT_EHDRFLAGS, "%#lx"), >>>> AUXFMT(AT_HWCAP, "%#lx"), >>>> AUXFMT(AT_HWCAP2, "%#lx"), >>>> AUXFMT(AT_BSDFLAGS, "%#lx"), >>>> - AUXFMT(AT_ARGC, "%u"), >>>> + AUXFMT(AT_ARGC, "%lu"), >>>> AUXFMT(AT_ARGV, "%p"), >>>> AUXFMT(AT_ENVC, "%p"), >>>> AUXFMT(AT_ENVV, "%p"), >>>> @@ -6105,6 +6105,15 @@ static const struct auxfmt { >>>> AUXFMT(AT_FXRNG, "%p"), >>>> }; >>>> >>>> +static bool >>>> +is_ptr_fmt(const char *fmt) >>>> +{ >>>> + char last; >>>> + >>>> + last = fmt[strlen(fmt) - 1]; >>>> + return (last == 'p' || last == 's'); >>>> +} >>>> + >>>> static void >>>> dump_auxv(Elf_Auxinfo **aux_info) >>>> { >>>> @@ -6120,7 +6129,8 @@ dump_auxv(Elf_Auxinfo **aux_info) >>>> if (fmt->fmt == NULL) >>>> continue; >>>> rtld_fdprintf(STDOUT_FILENO, "%s:\t", fmt->name); >>>> - rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, auxp->a_un.a_ptr); >>>> + rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, is_ptr_fmt(fmt->fmt) ? >>>> + auxp->a_un.a_ptr : auxp->a_un.a_val); >>>> rtld_fdprintf(STDOUT_FILENO, "\n"); >>>> } >>>> } >>> >>> That should indeed work, though I’d argue it’s still not as nice as >>> avoiding rtld_fdprintfx entirely. >> >> Wait, no, it doesn’t, the ternary means both operands need to have the >> same type, so you end up implicitly casting the long to a pointer. You >> need a real if and two different rtld_fdprintfx calls (or just do it as >> I’ve suggested). > Ok, real if() then. > > diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c > index 0475134b0d96..d5c3d2893582 100644 > --- a/libexec/rtld-elf/rtld.c > +++ b/libexec/rtld-elf/rtld.c > @@ -6071,33 +6071,33 @@ static const struct auxfmt { > } auxfmts[] = { > AUXFMT(AT_NULL, NULL), > AUXFMT(AT_IGNORE, NULL), > - AUXFMT(AT_EXECFD, "%d"), > + AUXFMT(AT_EXECFD, "%ld"), > AUXFMT(AT_PHDR, "%p"), > - AUXFMT(AT_PHENT, "%u"), > - AUXFMT(AT_PHNUM, "%u"), > - AUXFMT(AT_PAGESZ, "%u"), > + AUXFMT(AT_PHENT, "%lu"), > + AUXFMT(AT_PHNUM, "%lu"), > + AUXFMT(AT_PAGESZ, "%lu"), > AUXFMT(AT_BASE, "%#lx"), > AUXFMT(AT_FLAGS, "%#lx"), > AUXFMT(AT_ENTRY, "%p"), > AUXFMT(AT_NOTELF, NULL), > - AUXFMT(AT_UID, "%d"), > - AUXFMT(AT_EUID, "%d"), > - AUXFMT(AT_GID, "%d"), > - AUXFMT(AT_EGID, "%d"), > + AUXFMT(AT_UID, "%ld"), > + AUXFMT(AT_EUID, "%ld"), > + AUXFMT(AT_GID, "%ld"), > + AUXFMT(AT_EGID, "%ld"), > AUXFMT(AT_EXECPATH, "%s"), > AUXFMT(AT_CANARY, "%p"), > - AUXFMT(AT_CANARYLEN, "%u"), > - AUXFMT(AT_OSRELDATE, "%u"), > - AUXFMT(AT_NCPUS, "%u"), > + AUXFMT(AT_CANARYLEN, "%lu"), > + AUXFMT(AT_OSRELDATE, "%lu"), > + AUXFMT(AT_NCPUS, "%lu"), > AUXFMT(AT_PAGESIZES, "%p"), > - AUXFMT(AT_PAGESIZESLEN, "%u"), > + AUXFMT(AT_PAGESIZESLEN, "%lu"), > AUXFMT(AT_TIMEKEEP, "%p"), > - AUXFMT(AT_STACKPROT, "%#x"), > + AUXFMT(AT_STACKPROT, "%#lx"), > AUXFMT(AT_EHDRFLAGS, "%#lx"), > AUXFMT(AT_HWCAP, "%#lx"), > AUXFMT(AT_HWCAP2, "%#lx"), > AUXFMT(AT_BSDFLAGS, "%#lx"), > - AUXFMT(AT_ARGC, "%u"), > + AUXFMT(AT_ARGC, "%lu"), > AUXFMT(AT_ARGV, "%p"), > AUXFMT(AT_ENVC, "%p"), > AUXFMT(AT_ENVV, "%p"), > @@ -6105,6 +6105,15 @@ static const struct auxfmt { > AUXFMT(AT_FXRNG, "%p"), > }; > > +static bool > +is_ptr_fmt(const char *fmt) > +{ > + char last; > + > + last = fmt[strlen(fmt) - 1]; > + return (last == 'p' || last == 's'); > +} > + > static void > dump_auxv(Elf_Auxinfo **aux_info) > { > @@ -6120,7 +6129,13 @@ dump_auxv(Elf_Auxinfo **aux_info) > if (fmt->fmt == NULL) > continue; > rtld_fdprintf(STDOUT_FILENO, "%s:\t", fmt->name); > - rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, auxp->a_un.a_ptr); > + if (is_ptr_fmt(fmt->fmt)) { > + rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, > + auxp->a_un.a_ptr); > + } else { > + rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, > + auxp->a_un.a_val); > + } > rtld_fdprintf(STDOUT_FILENO, "\n"); > } > } I can’t think of a reason why that wouldn’t work, so consider this reviewed by me. Thanks, Jess