[Bug 280318] fork() can deadlock on rtld_phdr_lock

From: <bugzilla-noreply_at_freebsd.org>
Date: Tue, 16 Jul 2024 20:43:51 UTC
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=280318

            Bug ID: 280318
           Summary: fork() can deadlock on rtld_phdr_lock
           Product: Base System
           Version: 14.1-RELEASE
          Hardware: Any
                OS: Any
            Status: New
          Severity: Affects Only Me
          Priority: ---
         Component: threads
          Assignee: threads@FreeBSD.org
          Reporter: tavianator@tavianator.com

I ran into a deadlock when calling fork() in a multi-threaded app with ASAN
enabled.  The backtrace looks like this:

(gdb) bt
#0  _umtx_op_err () at /usr/src/lib/libthr/arch/amd64/amd64/_umtx_op_err.S:38
#1  0x000000080054af50 in __thr_rwlock_wrlock (rwlock=rwlock@entry=0x80054f640,
tsp=tsp@entry=0x0) at /usr/src/lib/libthr/thread/thr_umtx.c:324
#2  0x0000000800545571 in _thr_rwlock_wrlock (rwlock=<optimized out>,
tsp=<optimized out>) at /usr/src/lib/libthr/thread/thr_umtx.h:239
#3  _thr_rtld_wlock_acquire (lock=0x80054f640) at
/usr/src/lib/libthr/thread/thr_rtld.c:139
#4  0x000000080045ee49 in wlock_acquire (lock=0x80046bae0 <rtld_locks+32>,
lockstate=<optimized out>) at /usr/src/libexec/rtld-elf/rtld_lock.c:275
#5  _rtld_atfork_pre (locks=locks@entry=0x7fffffffb290) at
/usr/src/libexec/rtld-elf/rtld_lock.c:475
#6  0x000000080053e716 in thr_fork_impl (a=0x7fffffffb2f8) at
/usr/src/lib/libthr/thread/thr_fork.c:194
#7  0x000000080053e658 in __thr_fork () at
/usr/src/lib/libthr/thread/thr_fork.c:315
...
(gdb) thread 6
[Switching to thread 6 (LWP 154312 of process 84643)]
(gdb) bt
#0  __syscall () at __syscall.S:4
#1  0x0000000000311995 in __sanitizer::StaticSpinMutex::LockSlow (this=0x4af5b0
<__asan::instance+680>)
    at
/usr/src/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_mutex.cpp:24
#2  0x00000000002829b2 in __sanitizer::StaticSpinMutex::Lock (this=0x4af5b0
<__asan::instance+680>) at
/usr/src/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h:32
#3 
__sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex>::GenericScopedLock
(mu=0x4af5b0 <__asan::instance+680>, this=<optimized out>)
    at
/usr/src/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h:383
#4  __sanitizer::LargeMmapAllocator<__asan::AsanMapUnmapCallback,
__sanitizer::LargeMmapAllocatorPtrArrayDynamic,
__sanitizer::LocalAddressSpaceView>::GetBlockBegin (
    this=0x4af348 <__asan::instance+64>, ptr=0x802e4d120) at
/usr/src/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_allocator_secondary.h:184
#5  0x0000000000281666 in
__sanitizer::CombinedAllocator<__sanitizer::SizeClassAllocator64<__asan::AP64<__sanitizer::LocalAddressSpaceView>
>, __sanitizer::LargeMmapAllocatorPtrArrayDynamic>::GetBlockBegin
(p=0x802e4d120, this=<optimized out>) at
/usr/src/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h:132
#6  __asan::Allocator::GetAsanChunkByAddr (this=<optimized out>, p=34408288544)
at /usr/src/contrib/llvm-project/compiler-rt/lib/asan/asan_allocator.cpp:824
#7  0x000000000027fbe3 in AllocationBegin (p=0x802e4d120) at
/usr/src/contrib/llvm-project/compiler-rt/lib/asan/asan_allocator.cpp:1215
#8  __sanitizer_get_allocated_begin (p=0x802e4d120) at
/usr/src/contrib/llvm-project/compiler-rt/lib/asan/asan_allocator.cpp:1256
#9  0x0000000000323f3f in __sanitizer::DTLS_on_tls_get_addr
(arg_void=arg_void@entry=0x7fffdf7f89d0, res=res@entry=0x802e4d120,
static_tls_begin=0, static_tls_end=0)
    at
/usr/src/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp:138
#10 0x00000000002beb19 in ___interceptor___tls_get_addr (arg=0x7fffdf7f89d0) at
/usr/src/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc:5426
#11 0x000000000031146c in __sanitizer::CollectStaticTlsBlocks
(info=0x7fffdf7f8da8, size=<optimized out>, data=0x6)
    at
/usr/src/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp:399
#12 0x0000000800458f4d in dl_iterate_phdr (callback=0x311430
<__sanitizer::CollectStaticTlsBlocks(dl_phdr_info*, unsigned long, void*)>,
param=0x7fffdf7f8e38)
    at /usr/src/libexec/rtld-elf/rtld.c:4246
#13 0x00000000003107b2 in __sanitizer::GetStaticTlsBoundary (addr=<optimized
out>, size=<optimized out>, align=<optimized out>)
    at
/usr/src/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp:415
#14 __sanitizer::GetTls (addr=addr@entry=0x802e8a030,
size=size@entry=0x7fffdf7f8ef0) at
/usr/src/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp:526
#15 0x0000000000310b8b in __sanitizer::GetThreadStackAndTls (main=false,
stk_addr=stk_addr@entry=0x802e8a010, stk_size=0x6,
stk_size@entry=0x7fffdf7f8ef8, 
    tls_addr=0x800701f2a <__syscall+10>, tls_addr@entry=0x802e8a030,
tls_size=0x0, tls_size@entry=0x7fffdf7f8ef0)
    at
/usr/src/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp:610
#16 0x0000000000301ebe in __asan::AsanThread::SetThreadStackAndTls
(this=this@entry=0x802e8a000, options=<optimized out>)
    at /usr/src/contrib/llvm-project/compiler-rt/lib/asan/asan_thread.cpp:306
#17 0x0000000000301b5f in __asan::AsanThread::Init (this=0x802e8a000,
options=options@entry=0x0) at
/usr/src/contrib/llvm-project/compiler-rt/lib/asan/asan_thread.cpp:253
#18 0x0000000000301fb7 in __asan::AsanThread::ThreadStart (this=0x14b,
os_id=154312) at
/usr/src/contrib/llvm-project/compiler-rt/lib/asan/asan_thread.cpp:283
#19 0x00000000002f38b7 in asan_thread_start (arg=0x802e8a000) at
/usr/src/contrib/llvm-project/compiler-rt/lib/asan/asan_interceptors.cpp:230
#20 0x000000080053db05 in thread_start (curthread=0x51b000001c80) at
/usr/src/lib/libthr/thread/thr_create.c:289
#21 0x0000000000000000 in ?? ()

THread 1 is calling fork() which calls _rtld_atfork_pre() which acquires
rtld_phdr_lock.  Simultaneously, thread 6 is in the middle of dl_iterate_phdr()
with rtld_phdr_lock held outside the loop.  But the callback is apparently
waiting for thread 1 to do something, which will never happen.

It can be reproduced without ASAN too.  Here's a somewhat artificial
reproducer:

$ cat foo.c
#include <link.h>
#include <pthread.h>
#include <stdlib.h>
#include <unistd.h>

pthread_barrier_t in_callback, done_fork;

int callback(struct dl_phdr_info *info, size_t size, void *data) {
        static int waited = 0;
        if (!waited) {
                pthread_barrier_wait(&in_callback);
                pthread_barrier_wait(&done_fork);
                waited = 1;
        }
        return 0;
}

void *start_routine(void *arg) {
        dl_iterate_phdr(callback, NULL);
        return NULL;
}

int main(void) {
        pthread_barrier_init(&in_callback, NULL, 2);
        pthread_barrier_init(&done_fork, NULL, 2);

        /* Create a thread to call dl_iterate_phdr() */
        pthread_t thread;
        pthread_create(&thread, NULL, start_routine, NULL);

        /* Wait for the dl_iterate_phdr() callback to start */
        pthread_barrier_wait(&in_callback);

        /* fork() will hang in _rtld_atfork_pre() */
        pid_t pid = fork();
        if (pid == 0) {
                return 0;
        }

        pthread_barrier_wait(&done_fork);
        pthread_join(thread, NULL);
        return 0;
}
$ cc -pthread foo.c -o foo
$ ./foo

That will hang until you kill the process.

-- 
You are receiving this mail because:
You are the assignee for the bug.