nptl: Do not use pthread set_tid_address as state synchronization (BZ #19951)

The use-after-free described in BZ#19951 is due to the use of two
different PD fields, 'joinid' and 'cancelhandling', to describe the
thread state and to synchronise the calls of pthread_join,
pthread_detach, pthread_exit, and normal thread exit.

Any state change may require checking both fields atomically to handle
partial state (e.g., pthread_join() with a cancellation handler to
issue a 'joinstate' field rollback).

This patch uses a different PD member with 4 possible states (JOINABLE,
DETACHED, EXITING, and EXITED) instead of the pthread 'tid' field, with
the following logic:

 1. On pthread_create, the initial state is set either to JOINABLE or
    DETACHED depending on the pthread attribute used.

 2. On pthread_detach, a CAS is issued on the state.  If the CAS fails,
    the thread is already detached (DETACHED) or being terminated (EXITING).
    For the former, an EINVAL is returned; for the latter, pthread_detach
    should be responsible for joining the thread (and for deallocating any
    internal resources).

 3. In the exit phase of the wrapper function for the thread start routine
    (reached either if the thread function has returned, pthread_exit has
    been called, or cancellation handled has been acted upon), we issue a
    CAS on state to set it to the EXITING mode.

    If the thread is previously in DETACHED mode, the thread is responsible
    for deallocating any resources; otherwise, the thread must be joined
    (detached threads cannot deallocate themselves immediately).

 4. The clear_tid_field on 'clone' call is changed to set the new 'state'
    field on thread exit (EXITED).  This state is only reached at thread
    termination.

 5. The pthread_join implementation is now simpler: the futex wait is done
    directly on thread state, and there is no need to reset it in case of
    timeout since the state is now set either by pthread_detach() or by the
    kernel on process termination.

The race condition on pthread_detach is avoided with a single atomic
operation on the PD state: once the mode is set to THREAD_STATE_DETACHED, it
is up to the thread itself to deallocate its memory (done during the exit
phase at pthread_create()).

Also, the INVALID_NOT_TERMINATED_TD_P is removed since a negative yid is
not possible, and the macro is not used anywhere.

This change triggers an invalid C11 thread test: it creates a thread that
detaches, and after a timeout, the creating thread checks whether the join
fails.  The issue is that once thrd_join() is called, the thread's lifetime
is not defined.

Checked on x86_64-linux-gnu, i686-linux-gnu, aarch64-linux-gnu,
arm-linux-gnueabihf, and powerpc64-linux-gnu.

Reviewed-by: Florian Weimer <fweimer@redhat.com>
This commit is contained in:
Adhemerval Zanella 2025-12-11 17:47:19 -03:00
parent 2d865eaa12
commit 5da15b15ad
12 changed files with 134 additions and 139 deletions

View File

@ -132,6 +132,18 @@ enum allocate_stack_mode_t
ALLOCATE_GUARD_USER = 2,
};
/* Possible values for the 'joinstate' field. The field will be cleared
(set to THREAD_STATE_EXITED) atomically by the kernel when thread
terminated. */
enum
{
THREAD_STATE_EXITED = 0,
THREAD_STATE_EXITING,
THREAD_STATE_JOINABLE,
THREAD_STATE_DETACHED,
};
/* Thread descriptor data structure. */
struct pthread
{
@ -174,8 +186,7 @@ struct pthread
GL (dl_stack_user) list. */
list_t list;
/* Thread ID - which is also a 'is this thread descriptor (and
therefore stack) used' flag. */
/* Thread ID set by the kernel with CLONE_PARENT_SETTID. */
pid_t tid;
/* List of robust mutexes the thread is holding. */
@ -345,15 +356,8 @@ struct pthread
/* Lock for synchronizing setxid calls. */
unsigned int setxid_futex;
/* If the thread waits to join another one the ID of the latter is
stored here.
In case a thread is detached this field contains a pointer of the
TCB if the thread itself. This is something which cannot happen
in normal operation. */
struct pthread *joinid;
/* Check whether a thread is detached. */
#define IS_DETACHED(pd) ((pd)->joinid == (pd))
/* The current thread state defined by the THREAD_STATE_* enumeration. */
unsigned int joinstate;
/* The result of the thread function. */
void *result;

View File

@ -34,7 +34,7 @@ extern int32_t __nptl_stack_hugetlb;
static inline bool
__nptl_stack_in_use (struct pthread *pd)
{
return pd->tid <= 0;
return atomic_load_relaxed (&pd->joinstate) == THREAD_STATE_EXITED;
}
/* Remove the stack ELEM from its list. */

View File

@ -60,7 +60,8 @@ __pthread_cancel (pthread_t th)
{
volatile struct pthread *pd = (volatile struct pthread *) th;
if (pd->tid == 0)
unsigned int state = atomic_load_relaxed (&pd->joinstate);
if (state == THREAD_STATE_EXITED)
/* The thread has already exited on the kernel side. Its outcome
(regular exit, other cancelation) has already been
determined. */

View File

@ -290,7 +290,7 @@ static int create_thread (struct pthread *pd, const struct pthread_attr *attr,
.flags = clone_flags,
.pidfd = (uintptr_t) &pd->tid,
.parent_tid = (uintptr_t) &pd->tid,
.child_tid = (uintptr_t) &pd->tid,
.child_tid = (uintptr_t) &pd->joinstate,
.stack = (uintptr_t) stackaddr,
.stack_size = stacksize,
.tls = (uintptr_t) tp,
@ -355,12 +355,14 @@ start_thread (void *arg)
and free any resource prior return to the pthread_create caller. */
setup_failed = pd->setup_failed == 1;
if (setup_failed)
pd->joinid = NULL;
pd->joinstate = THREAD_STATE_JOINABLE;
/* And give it up right away. */
lll_unlock (pd->lock, LLL_PRIVATE);
if (setup_failed)
/* No need to clear the tid here, pthread_create() will join the
thread prior returning to caller. */
goto out;
}
@ -496,6 +498,19 @@ start_thread (void *arg)
the breakpoint reports TD_THR_RUN state rather than TD_THR_ZOMBIE. */
atomic_fetch_or_relaxed (&pd->cancelhandling, EXITING_BITMASK);
/* CONCURRENCY NOTES:
Concurrent pthread_detach() either sets the state to
THREAD_STATE_DETACHED or waits for the thread to terminate. The
THREAD_STATE_EXITING state set here ensures that pthread_join() waits
until all required cleanup steps are complete.
The 'prevstate' variable field will be used to determine who is
responsible for calling __nptl_free_tcb below. */
unsigned int prevstate = atomic_exchange_relaxed (&pd->joinstate,
THREAD_STATE_EXITING);
if (__glibc_unlikely (atomic_fetch_add_relaxed (&__nptl_nthreads, -1) == 1))
/* This was the last thread. */
exit (0);
@ -578,20 +593,21 @@ start_thread (void *arg)
pd->setxid_futex = 0;
}
/* If the thread is detached free the TCB. */
if (IS_DETACHED (pd))
if (prevstate == THREAD_STATE_DETACHED)
/* Free the TCB. */
__nptl_free_tcb (pd);
/* Remove the associated name from the thread stack. */
name_stack_maps (pd, false);
pd->tid = 0;
out:
/* We cannot call '_exit' here. '_exit' will terminate the process.
The 'exit' implementation in the kernel will signal when the
process is really dead since 'clone' got passed the CLONE_CHILD_CLEARTID
flag. The 'tid' field in the TCB will be set to zero.
flag. The 'joinstate' field in the TCB will be set to zero.
rseq TLS is still registered at this point. Rely on implicit
unregistration performed by the kernel on thread teardown. This is not a
@ -706,7 +722,9 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
/* Initialize the field for the ID of the thread which is waiting
for us. This is a self-reference in case the thread is created
detached. */
pd->joinid = iattr->flags & ATTR_FLAG_DETACHSTATE ? pd : NULL;
pd->joinstate = iattr->flags & ATTR_FLAG_DETACHSTATE
? THREAD_STATE_DETACHED
: THREAD_STATE_JOINABLE;
/* The debug events are inherited from the parent. */
pd->eventbuf = self->eventbuf;
@ -865,9 +883,10 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
/* Similar to pthread_join, but since thread creation has failed at
startup there is no need to handle all the steps. */
pid_t tid;
while ((tid = atomic_load_acquire (&pd->tid)) != 0)
__futex_abstimed_wait64 ((unsigned int *) &pd->tid, tid, 0, NULL,
unsigned int state;
while ((state = atomic_load_relaxed (&pd->joinstate))
!= THREAD_STATE_EXITED)
__futex_abstimed_wait64 (&pd->joinstate, state, 0, NULL,
LLL_SHARED);
}

View File

@ -25,32 +25,32 @@ ___pthread_detach (pthread_t th)
{
struct pthread *pd = (struct pthread *) th;
/* Make sure the descriptor is valid. */
if (INVALID_NOT_TERMINATED_TD_P (pd))
/* Not a valid thread handle. */
return ESRCH;
/* CONCURRENCY NOTES:
int result = 0;
Concurrent pthread_detach will return EINVAL for the case where the
thread is already detached (THREAD_STATE_DETACHED). POSIX states it is
undefined to call pthread_detach if TH refers to a non-joinable thread.
/* Mark the thread as detached. */
if (atomic_compare_and_exchange_bool_acq (&pd->joinid, pd, NULL))
In the case the thread is being terminated (THREAD_STATE_EXITING),
pthread_detach will be responsible for cleaning up the stack. */
unsigned int prevstate = atomic_load_relaxed (&pd->joinstate);
do
{
/* There are two possibilities here. First, the thread might
already be detached. In this case we return EINVAL.
Otherwise there might already be a waiter. The standard does
not mention what happens in this case. */
if (IS_DETACHED (pd))
result = EINVAL;
}
else
/* Check whether the thread terminated meanwhile. In this case we
will just free the TCB. */
if ((pd->cancelhandling & EXITING_BITMASK) != 0)
/* Note that the code in __free_tcb makes sure each thread
control block is freed only once. */
__nptl_free_tcb (pd);
if (prevstate != THREAD_STATE_JOINABLE)
{
if (prevstate == THREAD_STATE_DETACHED)
return EINVAL;
/* pthread_detach is declared _THROW so it need to call a
pthread_join variant that is not a cancellation entrypoint. */
return result;
return __pthread_clockjoin_ex (th, 0, 0 /* Ignored */, NULL,
false);
}
}
while (!atomic_compare_exchange_weak_acquire (&pd->joinstate, &prevstate,
THREAD_STATE_DETACHED));
return 0;
}
versioned_symbol (libc, ___pthread_detach, pthread_detach, GLIBC_2_34);
libc_hidden_ver (___pthread_detach, __pthread_detach)

View File

@ -52,7 +52,7 @@ __pthread_getattr_np (pthread_t thread_id, pthread_attr_t *attr)
iattr->flags = thread->flags;
/* The thread might be detached by now. */
if (IS_DETACHED (thread))
if (atomic_load_relaxed (&thread->joinstate) == THREAD_STATE_DETACHED)
iattr->flags |= ATTR_FLAG_DETACHSTATE;
/* This is the guardsize after adjusting it. */

View File

@ -22,116 +22,65 @@
#include <time.h>
#include <futex-internal.h>
static void
cleanup (void *arg)
{
/* If we already changed the waiter ID, reset it. The call cannot
fail for any reason but the thread not having done that yet so
there is no reason for a loop. */
void *self = THREAD_SELF;
atomic_compare_exchange_weak_acquire (&arg, &self, NULL);
}
int
__pthread_clockjoin_ex (pthread_t threadid, void **thread_return,
clockid_t clockid,
const struct __timespec64 *abstime, bool block)
const struct __timespec64 *abstime,
bool cancel)
{
struct pthread *pd = (struct pthread *) threadid;
/* Make sure the descriptor is valid. */
if (INVALID_NOT_TERMINATED_TD_P (pd))
/* Not a valid thread handle. */
return ESRCH;
/* Is the thread joinable?. */
if (IS_DETACHED (pd))
/* We cannot wait for the thread. */
return EINVAL;
/* Make sure the clock and time specified are valid. */
if (abstime
&& __glibc_unlikely (!futex_abstimed_supported_clockid (clockid)
|| ! valid_nanoseconds (abstime->tv_nsec)))
return EINVAL;
struct pthread *self = THREAD_SELF;
int result = 0;
LIBC_PROBE (pthread_join, 1, threadid);
if ((pd == self
|| (self->joinid == pd
&& (pd->cancelhandling
& (CANCELING_BITMASK | CANCELED_BITMASK | EXITING_BITMASK
| TERMINATED_BITMASK)) == 0))
&& !cancel_enabled_and_canceled (self->cancelhandling))
/* This is a deadlock situation. The threads are waiting for each
other to finish. Note that this is a "may" error. To be 100%
sure we catch this error we would have to lock the data
structures but it is not necessary. In the unlikely case that
two threads are really caught in this situation they will
deadlock. It is the programmer's problem to figure this
out. */
return EDEADLK;
/* Wait for the thread to finish. If it is already locked something
is wrong. There can only be one waiter. */
else if (__glibc_unlikely (atomic_compare_exchange_weak_acquire (&pd->joinid,
&self,
NULL)))
/* There is already somebody waiting for the thread. */
return EINVAL;
/* BLOCK waits either indefinitely or based on an absolute time. POSIX also
states a cancellation point shall occur for pthread_join, and we use the
same rationale for posix_timedjoin_np. Both clockwait_tid and the futex
call use the cancellable variant. */
if (block)
int result = 0;
unsigned int state;
while ((state = atomic_load_acquire (&pd->joinstate))
!= THREAD_STATE_EXITED)
{
/* During the wait we change to asynchronous cancellation. If we
are cancelled the thread we are waiting for must be marked as
un-wait-ed for again. */
pthread_cleanup_push (cleanup, &pd->joinid);
struct pthread *self = THREAD_SELF;
if (pd == self
&& !cancel_enabled_and_canceled (self->cancelhandling))
return EDEADLK;
/* We need acquire MO here so that we synchronize with the
kernel's store to 0 when the clone terminates. (see above) */
pid_t tid;
while ((tid = atomic_load_acquire (&pd->tid)) != 0)
{
/* The kernel notifies a process which uses CLONE_CHILD_CLEARTID via
futex wake-up when the clone terminates. The memory location
contains the thread ID while the clone is running and is reset to
zero by the kernel afterwards. The kernel up to version 3.16.3
does not use the private futex operations for futex wake-up when
the clone terminates. */
int ret = __futex_abstimed_wait_cancelable64 (
(unsigned int *) &pd->tid, tid, clockid, abstime, LLL_SHARED);
if (ret == ETIMEDOUT || ret == EOVERFLOW)
{
result = ret;
break;
}
/* POSIX states calling pthread_join on a non joinable thread is
undefined. However, if PD is still in the cache we can warn
the caller. */
if (state == THREAD_STATE_DETACHED)
return EINVAL;
/* pthread_join is a cancellation entrypoint and we use the same
rationale for pthread_timedjoin_np.
The kernel notifies a process which uses CLONE_CHILD_CLEARTID via
a memory zeroing and futex wake-up when the process terminates.
The futex operation is not private. */
int ret = cancel
? __futex_abstimed_wait_cancelable64 (&pd->joinstate, state, clockid,
abstime, LLL_SHARED)
: __futex_abstimed_wait64 (&pd->joinstate, state, clockid, abstime,
LLL_SHARED);
if (ret == ETIMEDOUT || ret == EOVERFLOW)
{
result = ret;
break;
}
pthread_cleanup_pop (0);
}
void *pd_result = pd->result;
if (__glibc_likely (result == 0))
{
/* We mark the thread as terminated and as joined. */
pd->tid = -1;
/* Store the return value if the caller is interested. */
if (thread_return != NULL)
*thread_return = pd_result;
/* Free the TCB. */
__nptl_free_tcb (pd);
}
else
pd->joinid = NULL;
LIBC_PROBE (pthread_join_ret, 3, threadid, result, pd_result);

View File

@ -21,15 +21,24 @@
int
__pthread_tryjoin_np (pthread_t threadid, void **thread_return)
{
/* Return right away if the thread hasn't terminated yet. */
struct pthread *pd = (struct pthread *) threadid;
if (pd->tid != 0)
return EBUSY;
/* The joinable state (THREAD_STATE_JOINABLE) is straightforward: the thread
hasn't finished yet, so trying to join might block.
/* If pd->tid == 0 then lll_wait_tid will not block on futex
operation. */
return __pthread_clockjoin_ex (threadid, thread_return, 0 /* Ignored */,
NULL, false);
The exiting thread (THREAD_STATE_EXITING) also might result in a blocking
call: a detached thread might change its state to exiting, and an exiting
thread might take some time to exit (and thus let the kernel set the
state to THREAD_STATE_EXITED).
The joinstate does not change during the thread lifetime once the
kernel sets it to THREAD_STATE_EXITED. The __pthread_clockjoin_ex will
only call the cancellable futex if state is not THREAD_STATE_EXITED, so
calling it should be safe wrt not making pthread_tryjoin_np a
cancellable entrypoint (since it is marked as __THROW). */
struct pthread *pd = (struct pthread *) threadid;
return atomic_load_acquire (&pd->joinstate) != THREAD_STATE_EXITED
? EBUSY
: __pthread_clockjoin_ex (threadid, thread_return, 0, NULL, false);
}
versioned_symbol (libc, __pthread_tryjoin_np, pthread_tryjoin_np, GLIBC_2_34);

View File

@ -73,9 +73,10 @@ __tls_init_tp (void)
list_add (&pd->list, &GL (dl_stack_user));
/* Early initialization of the TCB. */
pd->tid = INTERNAL_SYSCALL_CALL (set_tid_address, &pd->tid);
pd->tid = INTERNAL_SYSCALL_CALL (set_tid_address, &pd->joinstate);
THREAD_SETMEM (pd, specific[0], &pd->specific_1stblock[0]);
THREAD_SETMEM (pd, stack_mode, ALLOCATE_GUARD_USER);
THREAD_SETMEM (pd, joinstate, THREAD_STATE_JOINABLE);
/* Before initializing GL (dl_stack_user), the debugger could not
find us and had to set __nptl_initial_report_events. Propagate

View File

@ -18,6 +18,7 @@
#include <atomic.h>
#include <pthreadP.h>
#include <futex-internal.h>
_Noreturn static void
__libc_start_call_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
@ -65,6 +66,14 @@ __libc_start_call_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
/* One less thread. Decrement the counter. If it is zero we
terminate the entire process. */
result = 0;
/* For the case a thread is waiting for the main thread to finish. */
internal_sigset_t set;
internal_signal_block_all (&set);
struct pthread *self = THREAD_SELF;
atomic_store_release (&self->joinstate, THREAD_STATE_EXITED);
futex_wake (&self->joinstate, 1, FUTEX_SHARED);
if (atomic_fetch_add_relaxed (&__nptl_nthreads, -1) != 1)
/* Not much left to do but to exit the thread, not the process. */
while (1)

View File

@ -217,7 +217,6 @@ libc_hidden_proto (__pthread_current_priority)
nothing. And if the test triggers the thread descriptor is
guaranteed to be invalid. */
#define INVALID_TD_P(pd) __builtin_expect ((pd)->tid <= 0, 0)
#define INVALID_NOT_TERMINATED_TD_P(pd) __builtin_expect ((pd)->tid < 0, 0)
extern void __pthread_unwind (__pthread_unwind_buf_t *__buf)
__cleanup_fct_attribute __attribute ((__noreturn__))

View File

@ -28,7 +28,10 @@ detach_thrd (void *arg)
{
if (thrd_detach (thrd_current ()) != thrd_success)
FAIL_EXIT1 ("thrd_detach failed");
thrd_exit (thrd_success);
pause ();
return 0;
}
static int
@ -43,8 +46,9 @@ do_test (void)
/* Give some time so the thread can finish. */
thrd_sleep (&(struct timespec) {.tv_sec = 2}, NULL);
/* Calling thrd_join on a detached thread is UB... */
if (thrd_join (id, NULL) == thrd_success)
FAIL_EXIT1 ("thrd_join succeed where it should fail");
FAIL_EXIT1 ("thrd_join succeeded where it should fail");
return 0;
}