From b0897944cc3081e019b39981790051f7ee127406 Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Thu, 20 Mar 2025 20:04:14 +0000 Subject: [PATCH] malloc: Improve performance of __libc_malloc Improve performance of __libc_malloc by splitting it into 2 parts: first handle the tcache fastpath, then do the rest in a separate tailcalled function. This results in significant performance gains since __libc_malloc doesn't need to setup a frame and we delay tcache initialization and setting of errno until later. On Neoverse V2, bench-malloc-simple improves by 6.7% overall (up to 8.5% for ST case) and bench-malloc-thread improves by 20.3% for 1 thread and 14.4% for 32 threads. Reviewed-by: DJ Delorie --- malloc/malloc.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/malloc/malloc.c b/malloc/malloc.c index 7e4c139938..a0bc733482 100644 --- a/malloc/malloc.c +++ b/malloc/malloc.c @@ -1325,6 +1325,9 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ static __always_inline size_t checked_request2size (size_t req) __nonnull (1) { + _Static_assert (PTRDIFF_MAX <= SIZE_MAX / 2, + "PTRDIFF_MAX is not more than half of SIZE_MAX"); + if (__glibc_unlikely (req > PTRDIFF_MAX)) return 0; @@ -3380,26 +3383,17 @@ tcache_thread_shutdown (void) #endif /* !USE_TCACHE */ #if IS_IN (libc) -void * -__libc_malloc (size_t bytes) + +static void * __attribute_noinline__ +__libc_malloc2 (size_t bytes) { mstate ar_ptr; void *victim; - _Static_assert (PTRDIFF_MAX <= SIZE_MAX / 2, - "PTRDIFF_MAX is not more than half of SIZE_MAX"); - if (!__malloc_initialized) ptmalloc_init (); -#if USE_TCACHE - bool err = tcache_try_malloc (bytes, &victim); - if (err) - return NULL; - - if (victim) - return tag_new_usable (victim); -#endif + MAYBE_INIT_TCACHE (); if (SINGLE_THREAD_P) { @@ -3430,6 +3424,19 @@ __libc_malloc (size_t bytes) ar_ptr == arena_for_chunk (mem2chunk (victim))); return victim; } + +void * +__libc_malloc (size_t bytes) +{ +#if USE_TCACHE + size_t tc_idx = csize2tidx (checked_request2size (bytes)); + + if (tcache_available (tc_idx)) + return tag_new_usable (tcache_get (tc_idx)); +#endif + + return __libc_malloc2 (bytes); +} libc_hidden_def (__libc_malloc) void