[Bug 217138] head (e.g.) -r313864 for arm64: sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted"
bugzilla-noreply at freebsd.org
bugzilla-noreply at freebsd.org
Thu Feb 23 21:59:31 UTC 2017
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138
--- Comment #7 from Mark Millard <markmi at dsl-only.net> ---
The following describes a reproducible sequence
in my context, unfortunately involving hours of
buildworld activity. It fails every time that I
have tried it and at the same places each time.
I give a contrast to a working context as well.
Context: doing buildworld buidlkernel on a pine64+ with
2 GiBytes of RAM. Multiple head revisions, most recently:
# uname -apKU
FreeBSD pine64 12.0-CURRENT FreeBSD 12.0-CURRENT r313999M arm64 aarch64
1200021 1200021
The pine64 is running what was a cross build that had
MALLOC_PRODUCTION not defined. (Unlike my usual
way of building.)
Problem: sh core dumps via failing an assert. (script core dumps
as well for other reasons for one stage but I'm focused on the
earliest failures for now: sh failures.)
The following happens when I buildworld buildkernel on the
pine64+ using:
WITH_CLANG=
WITH_CLANG_IS_CC=
WITH_CLANG_FULL=
WITH_CLANG_EXTRAS=
WITH_LLD=
WITH_LLDB=
but not when using:
WITHOUT_CLANG=
WITHOUT_CLANG_IS_CC=
WITHOUT_CLANG_FULL=
WITHOUT_CLANG_EXTRAS=
WITHOUT_LLD=
WITHOUT_LLDB=
(The rest being the same, starting after using cleanworld
in both cases.)
But note that the first failures happen long after the
those have built what they contribute to the _generic_libs
stage. (I have not yet tried isolating subsets.) Similarly
for the later 2nd stage: well after "everything" did its
llvm related activity.
I've tried the failing case under both:
2 GiBytes RAM + 3 GiBytes swap
and:
2 GiBytes RAM + 6 GiBytes swap
It made no difference and there have been no messages
about running out of swap space or other forms of resource
limitation based process killing or the like.
>From sysutils/DTraceToolkit 's /usr/local/share/dtrace-toolkit/execsnoop :
. . .
2017 Feb 22 16:37:02 0 61019 61018 make install DIRPRFX=lib/libusb/\0
2017 Feb 22 16:37:02 0 61020 61019 sh -e\0
2017 Feb 22 16:37:02 0 61021 61019 sh -e\0
2017 Feb 22 16:37:02 0 61022 61019 sh -e\0
2017 Feb 22 16:37:02 0 61023 61020 sh /usr/src/tools/install.sh -C -o
root -g wheel -m 444 libusb.a
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/\0
2017 Feb 22 16:37:02 0 61024 61021 sh /usr/src/tools/install.sh -o root
-g wheel -m 444 /usr/src/lib/libusb/libusb-0.1.pc
/usr/src/lib/libusb/libusb-1.0.pc /usr/src/lib/libusb/libusb-2.0.pc
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/libdata/pkgconfig/\0
2017 Feb 22 16:37:02 0 61025 61022 sh /usr/src/tools/install.sh -C -o
root -g wheel -m 444 /usr/src/lib/libusb/libusb20.h
/usr/src/lib/libusb/libusb20_desc.h /usr/src/lib/libusb/usb.h
/usr/src/lib/libusb/libusb.h
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/include/\0
2017 Feb 22 16:37:02 0 61023 61020 sh /usr/src/tools/install.sh -C -o
root -g wheel -m 444 libusb.a
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/\0
2017 Feb 22 16:37:02 0 61023 61020 install -p libusb.a
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/\0
2017 Feb 22 16:37:02 0 61024 61021 sh /usr/src/tools/install.sh -o root
-g wheel -m 444 /usr/src/lib/libusb/libusb-0.1.pc
/usr/src/lib/libusb/libusb-1.0.pc /usr/src/lib/libusb/libusb-2.0.pc
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/libdata/pkgconfig/\0
2017 Feb 22 16:37:02 0 61025 61022 sh /usr/src/tools/install.sh -C -o
root -g wheel -m 444 /usr/src/lib/libusb/libusb20.h
/usr/src/lib/libusb/libusb20_desc.h /usr/src/lib/libusb/usb.h
/usr/src/lib/libusb/libusb.h
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/include/\0
2017 Feb 22 16:37:02 0 61024 61021 install -p
/usr/src/lib/libusb/libusb-0.1.pc /usr/src/lib/libusb/libusb-1.0.pc
/usr/src/lib/libusb/libusb-2.0.pc
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/libdata/pkgconfig/\0
2017 Feb 22 16:37:02 0 61025 61022 install -p
/usr/src/lib/libusb/libusb20.h /usr/src/lib/libusb/libusb20_desc.h
/usr/src/lib/libusb/usb.h /usr/src/lib/libusb/libusb.h
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/include/\0
2017 Feb 22 16:37:02 0 61026 61020 sh /usr/src/tools/install.sh -s -o
root -g wheel -m 444 libusb.so.3
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/\0
2017 Feb 22 16:37:02 0 61026 61020 sh /usr/src/tools/install.sh -s -o
root -g wheel -m 444 libusb.so.3
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/\0
2017 Feb 22 16:37:02 0 61026 61020 install -p libusb.so.3
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/\0
2017 Feb 22 16:37:02 0 61027 61020 sh /usr/src/tools/install.sh -o root
-g wheel -m 444 libusb.so.3.debug
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/debug/usr/lib/\0
2017 Feb 22 16:37:02 0 61027 61020 sh /usr/src/tools/install.sh -o root
-g wheel -m 444 libusb.so.3.debug
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/debug/usr/lib/\0
2017 Feb 22 16:37:02 0 61027 61020 install -p libusb.so.3.debug
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/debug/usr/lib/\0
2017 Feb 22 16:37:02 0 61028 61020 sh /usr/src/tools/install.sh -l rs
libusb.so.3 /usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/libusb.so\0
2017 Feb 22 16:37:02 0 61029 61028 ln -fsn libusb.so.3
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/libusb.so\0
(That last was it for the build.) That is the end of the exec
activity for the _generic_libs part of the build (and since the
build stops: the last for the build overall).
(The below ps -daux output is from some time before the problem happened
but with later, related core files listed as well.)
root 91353 0.0 0.1 6856 1500 u0 I+ 10:28 0:00.02 `-- /bin/sh
/root/sys_build_scripts.pine64-host/make_pine64_nodebug_clang_bootstrap-pine64-host.sh
-j 4 buildworld buildkernel
root 91356 0.0 0.1 6204 1560 u0 S+ 10:28 0:06.59 `--
script
/root/sys_typescripts/typescript_make_pine64_nodebug_clang_bootstrap-pine64-host-2017-02-22:10:28:28
env __MAKE_CONF=/
-rw------- 1 root wheel 4657152 Feb 22 16:37:04 2017 script.91356.core
(from: ls -ltTU)
root 91357 0.0 0.0 4948 204 1 Ss+ 10:28 0:01.87 `--
make -j 4 buildworld buildkernel
root 91373 0.0 0.1 6856 1500 1 I 10:28 0:00.01 `--
sh -ev
-rw------- 1 root wheel 4702208 Feb 22 16:37:03 2017 sh.91373.core
(from: ls -ltTU)
root 91374 0.0 0.0 4948 204 1 S 10:28 0:01.69 `--
make -m /usr/src/share/mk -f Makefile.inc1 TARGET=arm64 TARGET_ARCH=aarch64
buildworld
root 10803 0.0 0.1 6856 1500 1 I 10:43 0:00.01
`-- sh -ev
-rw------- 1 root wheel 4702208 Feb 22 16:37:02 2017 sh.10803.core
(from: ls -ltTU)
root 10804 0.0 0.0 4948 200 1 S 10:43 3:00.18
`-- make -f Makefile.inc1
DESTDIR=/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp -DNO_FSCHG MK_HTML=no
-DNO_LINT MK_MA
root 10811 0.0 0.1 6856 1500 1 I 10:43 0:00.01
`-- sh -ev
root 38075 0.0 0.0 4948 204 1 S 11:14 0:00.75
`-- make -f Makefile.inc1 _generic_libs
root 38085 0.0 0.1 6856 1500 1 I 11:14 0:00.01
`-- sh -ev
. . .
"shutdown -r now" here makes no difference to the below
when done after the reboot.
(Of course there is some llvm related build activity during the
"everything" stage below.)
Doing another buildworld buildkernel to continue the build results
in:
. . .
2017 Feb 22 18:48:46 0 51772 51454 sh -e\0
2017 Feb 22 18:48:46 0 51773 51772 sed -E s,(^| |B|`)svn,\\1svnlite,g
/usr/src/contrib/subversion/subversion/svn/svn.1\0
2017 Feb 22 18:48:46 0 51774 51454 sh -e\0
2017 Feb 22 18:48:46 0 51775 51774 gzip -cn svnlite.1\0
2017 Feb 22 18:48:48 0 51776 51454 sh -e\0
2017 Feb 22 18:48:48 0 51777 51776 \0
2017 Feb 22 18:48:48 0 51778 51777 \0
2017 Feb 22 18:48:49 0 51779 51454 sh -e\0
2017 Feb 22 18:48:49 0 51780 51779 /usr/local/aarch64-freebsd/bin/objcopy
--only-keep-debug svnlite.full svnlite.debug\0
2017 Feb 22 18:48:50 0 51781 51454 sh -e\0
2017 Feb 22 18:48:50 0 51782 51781 /usr/local/aarch64-freebsd/bin/objcopy
--strip-debug --add-gnu-debuglink=svnlite.debug svnlite.full svnlite\0
The above is the end of the "everything" exec activity
but before the buildworld_epilogue (that does not happen).
Again it is the last exec activity for the build because the
build stops.
(Again ps -daux from sometime before the failure mixed
with core file ls -ltTU information below:)
root 61122 0.0 0.1 6856 1500 u0 I+ 17:13 0:00.01 `--
/bin/sh
/root/sys_build_scripts.pine64-host/make_pine64_nodebug_clang_bootstrap-pine64-host.sh
-j 4 buildworld buildkernel
root 61125 0.0 0.1 6204 1560 u0 S+ 17:13 0:09.56 `--
script
/root/sys_typescripts/typescript_make_pine64_nodebug_clang_bootstrap-pine64-host-2017-02-22:17:13:45
env __MAKE_CONF=
root 61126 0.0 0.0 4948 204 1 Ss+ 17:13 0:02.36 `--
make -j 4 buildworld buildkernel
root 61142 0.0 0.1 6856 1500 1 I 17:13 0:00.01 `--
sh -ev
-rw------- 1 root wheel 4702208 Feb 22 18:48:51 2017 sh.61142.core
root 61143 0.0 0.0 4948 204 1 S 17:13 0:02.08
`-- make -m /usr/src/share/mk -f Makefile.inc1 TARGET=arm64 TARGET_ARCH=aarch64
buildworld
root 81104 0.0 0.1 6856 1500 1 I 17:19 0:00.01
`-- sh -ev
-rw------- 1 root wheel 4702208 Feb 22 18:48:50 2017 sh.81104.core
root 81105 0.0 0.0 4948 220 1 S 17:19 0:02.57
`-- make -f Makefile.inc1
DESTDIR=/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp all
root 13358 0.0 0.1 6856 1500 1 I 17:49 0:00.01
|-- sh -e
. . .
(Yep: script does not core dump for this 2nd stage context.)
A 3rd buildworld buildkernel finishes the build, with buildworld
being essentially a large no-op and then doing the buildkernel.
Context details:
# more ~/src.configs/make.conf
CFLAGS.gcc+= -v
(But this was not a gcc based build.)
# more ~/src.configs/src.conf.pine64-clang-bootstrap.pine64-host
TO_TYPE=aarch64
TOOLS_TO_TYPE=${TO_TYPE}
#
KERNCONF=GENERIC-NODBG
TARGET=arm64
.if ${.MAKE.LEVEL} == 0
TARGET_ARCH=${TO_TYPE}
.export TARGET_ARCH
.endif
#
#WITH_CROSS_COMPILER=
WITH_SYSTEM_COMPILER=
#
#CPUTYPE=soft
WITH_LIBCPLUSPLUS=
WITHOUT_BINUTILS_BOOTSTRAP=
WITHOUT_ELFTOOLCHAIN_BOOTSTRAP=
#WITHOUT_CLANG_BOOTSTRAP=
WITH_CLANG=
WITH_CLANG_IS_CC=
WITH_CLANG_FULL=
WITH_CLANG_EXTRAS=
WITH_LLD=
WITH_LLDB=
#
WITH_BOOT=
WITHOUT_LIB32=
WITHOUT_LIBSOFT=
#
WITHOUT_GCC_BOOTSTRAP=
WITHOUT_GCC=
WITHOUT_GCC_IS_CC=
WITHOUT_GNUCXX=
#
NO_WERROR=
#WERROR=
MALLOC_PRODUCTION=
#
WITH_REPRODUCIBLE_BUILD=
WITH_DEBUG_FILES=
#
CROSS_BINUTILS_PREFIX=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/
AS=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/as
AR=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/ar
LD=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/ld
NM=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/nm
OBJCOPY=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/objcopy
OBJDUMP=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/objdump
RANLIB=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/ranlib
SIZE=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/size
STRINGS=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/strings
.export AS
.export AR
.export LD
.export NM
.export OBJCOPY
.export OBJDUMP
.export RANLIB
.export SIZE
.export STRINGS
# svnlite status /usr/src/ | sort
? /usr/src/sys/amd64/conf/GENERIC-DBG
? /usr/src/sys/amd64/conf/GENERIC-NODBG
? /usr/src/sys/arm/conf/BPIM3-DBG
? /usr/src/sys/arm/conf/BPIM3-NODBG
? /usr/src/sys/arm/conf/RPI2-DBG
? /usr/src/sys/arm/conf/RPI2-NODBG
? /usr/src/sys/arm64/conf/GENERIC-DBG
? /usr/src/sys/arm64/conf/GENERIC-NODBG
? /usr/src/sys/powerpc/conf/GENERIC64vtsc-DBG
? /usr/src/sys/powerpc/conf/GENERIC64vtsc-NODBG
? /usr/src/sys/powerpc/conf/GENERICvtsc-DBG
? /usr/src/sys/powerpc/conf/GENERICvtsc-NODBG
M /usr/src/bin/sh/jobs.c
M /usr/src/bin/sh/miscbltin.c
M /usr/src/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
M /usr/src/contrib/llvm/tools/lld/ELF/Target.cpp
M /usr/src/lib/csu/powerpc64/Makefile
M /usr/src/libexec/rtld-elf/Makefile
M /usr/src/sys/arm/arm/gic.c
M /usr/src/sys/boot/ofw/Makefile.inc
M /usr/src/sys/boot/powerpc/Makefile.inc
M /usr/src/sys/boot/powerpc/kboot/Makefile
M /usr/src/sys/boot/uboot/Makefile.inc
M /usr/src/sys/conf/Makefile.powerpc
M /usr/src/sys/conf/kmod.mk
M /usr/src/sys/ddb/db_main.c
M /usr/src/sys/ddb/db_script.c
M /usr/src/sys/powerpc/ofw/ofw_machdep.c
The . . ./conf/*-*DBG files include the standard files
and then make adjustments to have a production
style kernel build, including the arm64 case.
Below the first two files are as they were used to isolate
forks' original lack of preserving the sp value for the
child process side when interrupts happen. (Since fixed
in head but not in stable/11 last I looked.)
# svnlite diff /usr/src/bin/sh/jobs.c /usr/src/bin/sh/miscbltin.c
/usr/src/sys/arm/arm/gic.c
Index: /usr/src/bin/sh/jobs.c
===================================================================
--- /usr/src/bin/sh/jobs.c (revision 313999)
+++ /usr/src/bin/sh/jobs.c (working copy)
@@ -51,6 +51,9 @@
#include <stdlib.h>
#include <unistd.h>
+/* JUST FOR TESTING */
+#include <stdint.h>
+
#include "shell.h"
#if JOBS
#include <termios.h>
@@ -833,6 +836,13 @@
* in a pipeline).
*/
+extern uintptr_t example_stack_address(void);
+
+uintptr_t stack_address_before_fork = 0;
+uintptr_t stack_address_after_fork = 0;
+
+pid_t pid_from_fork = -1;
+
pid_t
forkshell(struct job *jp, union node *n, int mode)
{
@@ -845,7 +855,10 @@
if (mode == FORK_BG && (jp == NULL || jp->nprocs == 0))
checkzombies();
flushall();
- pid = fork();
+ stack_address_before_fork = example_stack_address();
+ pid_from_fork = pid = fork();
+ stack_address_after_fork = example_stack_address();
+ if (stack_address_after_fork != stack_address_before_fork) abort();
if (pid == -1) {
TRACE(("Fork failed, errno=%d\n", errno));
INTON;
@@ -946,7 +959,6 @@
return pid;
}
-
pid_t
vforkexecshell(struct job *jp, char **argv, char **envp, const char *path, int
idx, int pip[2])
{
Index: /usr/src/bin/sh/miscbltin.c
===================================================================
--- /usr/src/bin/sh/miscbltin.c (revision 313999)
+++ /usr/src/bin/sh/miscbltin.c (working copy)
@@ -64,6 +64,15 @@
#undef eflag
+
+/* JUST FOR TESTING */
+uintptr_t example_stack_address(void)
+{
+ volatile uintptr_t test = 0;
+ return (uintptr_t)(void*)&test;
+}
+
+
int readcmd(int, char **);
int umaskcmd(int, char **);
int ulimitcmd(int, char **);
Index: /usr/src/sys/arm/arm/gic.c
===================================================================
--- /usr/src/sys/arm/arm/gic.c (revision 313999)
+++ /usr/src/sys/arm/arm/gic.c (working copy)
@@ -672,9 +672,13 @@
if (irq >= sc->nirqs) {
#ifdef GIC_DEBUG_SPURIOUS
+#define EXPECTED_SPURIOUS_IRQ 1023
+ if (irq != EXPECTED_SPURIOUS_IRQ) {
device_printf(sc->gic_dev,
- "Spurious interrupt detected: last irq: %d on CPU%d\n",
+ "Spurious interrupt %d detected of %d: last irq: %d on
CPU%d\n",
+ irq, sc->nirqs,
sc->last_irq[PCPU_GET(cpuid)], PCPU_GET(cpuid));
+ }
#endif
return (FILTER_HANDLED);
}
@@ -720,6 +724,16 @@
if (irq < sc->nirqs)
goto dispatch_irq;
+ if (irq != EXPECTED_SPURIOUS_IRQ) {
+#undef EXPECTED_SPURIOUS_IRQ
+#ifdef GIC_DEBUG_SPURIOUS
+ device_printf(sc->gic_dev,
+ "Spurious end interrupt %d detected of %d: last irq: %d on
CPU%d\n",
+ irq, sc->nirqs,
+ sc->last_irq[PCPU_GET(cpuid)], PCPU_GET(cpuid));
+#endif
+ }
+
return (FILTER_HANDLED);
}
The gic.c change just avoids getting uninteresting spurious
interrupt messages on the console.
Other changes are generally tied to my powerpc64 and powerpc
investigations.
--
You are receiving this mail because:
You are on the CC list for the bug.
More information about the freebsd-amd64
mailing list