git: b764a426534f - main - There is a window where threads are removed from the process list and where the thread destructor is invoked. Catch that window by waiting for all task_struct allocations to be returned before freeing the UMA zone in the LinuxKPI. Else UMA may fail to release the zone due to concurrent access and panic:

Konstantin Belousov kostikbel at gmail.com
Fri May 21 11:51:53 UTC 2021


On Fri, May 21, 2021 at 11:21:17AM +0000, Hans Petter Selasky wrote:
> The branch main has been updated by hselasky:
> 
> URL: https://cgit.FreeBSD.org/src/commit/?id=b764a426534f2f5f86d6625288c74dafdbc94d2b
> 
> commit b764a426534f2f5f86d6625288c74dafdbc94d2b
> Author:     Hans Petter Selasky <hselasky at FreeBSD.org>
> AuthorDate: 2021-05-21 11:17:42 +0000
> Commit:     Hans Petter Selasky <hselasky at FreeBSD.org>
> CommitDate: 2021-05-21 11:18:41 +0000
> 
>     There is a window where threads are removed from the process list and where
>     the thread destructor is invoked. Catch that window by waiting for all
>     task_struct allocations to be returned before freeing the UMA zone in the
>     LinuxKPI. Else UMA may fail to release the zone due to concurrent access
>     and panic:
>     
>     panic() - Bad link element prev->next != elm
>     zone_release()
>     bucket_drain()
>     bucket_free()
>     zone_dtor()
>     zone_free_item()
>     uma_zdestroy()
>     linux_current_uninit()
>     
>     This failure can be triggered by loading and unloading the LinuxKPI module
>     in a loop:
>     
>     while true
>     do
>     kldload linuxkpi
>     kldunload linuxkpi
>     done
>     
>     Discussed with: kib@
No, it was not discussed, with me.
It contains parts of my half-done patches.
And I disagree with what the global counting you added there, both on
principle and on implementation.

>     MFC after:      1 week
>     Sponsored by:   Mellanox Technologies // NVIDIA Networking
> ---
>  sys/compat/linuxkpi/common/src/linux_current.c | 35 ++++++++++++++++++++++----
>  1 file changed, 30 insertions(+), 5 deletions(-)
> 
> diff --git a/sys/compat/linuxkpi/common/src/linux_current.c b/sys/compat/linuxkpi/common/src/linux_current.c
> index 9bae7ee92e49..51e396081c04 100644
> --- a/sys/compat/linuxkpi/common/src/linux_current.c
> +++ b/sys/compat/linuxkpi/common/src/linux_current.c
> @@ -45,6 +45,7 @@ extern u_int first_msi_irq, num_msi_irqs;
>  
>  static eventhandler_tag linuxkpi_thread_dtor_tag;
>  
> +static atomic_t linux_current_allocs;
>  static uma_zone_t linux_current_zone;
>  static uma_zone_t linux_mm_zone;
>  
> @@ -146,6 +147,10 @@ linux_alloc_current(struct thread *td, int flags)
>  	/* free mm_struct pointer, if any */
>  	uma_zfree(linux_mm_zone, mm);
>  
> +	/* keep track of number of allocations */
> +	if (atomic_add_return(1, &linux_current_allocs) == INT_MAX)
> +		panic("linux_alloc_current: Refcount too high!");
> +
>  	return (0);
>  }
>  
> @@ -173,6 +178,10 @@ linux_free_current(struct task_struct *ts)
>  {
>  	mmput(ts->mm);
>  	uma_zfree(linux_current_zone, ts);
> +
> +	/* keep track of number of allocations */
> +	if (atomic_sub_return(1, &linux_current_allocs) < 0)
> +		panic("linux_free_current: Negative refcount!");
>  }
>  
>  static void
> @@ -271,10 +280,6 @@ SYSCTL_INT(_compat_linuxkpi, OID_AUTO, task_struct_reserve,
>  static void
>  linux_current_init(void *arg __unused)
>  {
> -	lkpi_alloc_current = linux_alloc_current;
> -	linuxkpi_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
> -	    linuxkpi_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
> -
>  	TUNABLE_INT_FETCH("compat.linuxkpi.task_struct_reserve",
>  	    &lkpi_task_resrv);
>  	if (lkpi_task_resrv == 0) {
> @@ -298,6 +303,12 @@ linux_current_init(void *arg __unused)
>  	    UMA_ALIGN_PTR, 0);
>  	uma_zone_reserve(linux_mm_zone, lkpi_task_resrv);
>  	uma_prealloc(linux_mm_zone, lkpi_task_resrv);
> +
> +	atomic_thread_fence_seq_cst();
> +
> +	lkpi_alloc_current = linux_alloc_current;
> +	linuxkpi_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
> +	    linuxkpi_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
>  }
>  SYSINIT(linux_current, SI_SUB_EVENTHANDLER, SI_ORDER_SECOND,
>      linux_current_init, NULL);
> @@ -309,6 +320,10 @@ linux_current_uninit(void *arg __unused)
>  	struct task_struct *ts;
>  	struct thread *td;
>  
> +	lkpi_alloc_current = linux_alloc_current_noop;
> +
> +	atomic_thread_fence_seq_cst();
> +
>  	sx_slock(&allproc_lock);
>  	FOREACH_PROC_IN_SYSTEM(p) {
>  		PROC_LOCK(p);
> @@ -321,8 +336,18 @@ linux_current_uninit(void *arg __unused)
>  		PROC_UNLOCK(p);
>  	}
>  	sx_sunlock(&allproc_lock);
> +
> +	/*
> +	 * There is a window where threads are removed from the
> +	 * process list and where the thread destructor is invoked.
> +	 * Catch that window by waiting for all task_struct
> +	 * allocations to be returned before freeing the UMA zone.
> +	 */
> +	while (atomic_read(&linux_current_allocs) != 0)
> +		pause("W", 1);
> +
>  	EVENTHANDLER_DEREGISTER(thread_dtor, linuxkpi_thread_dtor_tag);
> -	lkpi_alloc_current = linux_alloc_current_noop;
> +	
>  	uma_zdestroy(linux_current_zone);
>  	uma_zdestroy(linux_mm_zone);
>  }


More information about the dev-commits-src-main mailing list