From 1233da4943a9a0e334748490225ae10b0d770c0a Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Thu, 20 Mar 2025 16:36:52 +0000 Subject: [PATCH] malloc: Use __always_inline for simple functions Use __always_inline for small helper functions that are critical for performance. This ensures inlining always happens when expected. Performance of bench-malloc-simple improves by 0.6% on average on Neoverse V2. Reviewed-by: DJ Delorie --- malloc/arena.c | 12 ++++++------ malloc/malloc.c | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/malloc/arena.c b/malloc/arena.c index 405ae829c0..5672c699aa 100644 --- a/malloc/arena.c +++ b/malloc/arena.c @@ -43,14 +43,14 @@ /* HEAP_MAX_SIZE should be larger than the huge page size, otherwise heaps will use not huge pages. It is a constant so arena_for_chunk() is efficient. */ -static inline size_t +static __always_inline size_t heap_min_size (void) { return mp_.hp_pagesize == 0 || mp_.hp_pagesize > HEAP_MAX_SIZE ? HEAP_MIN_SIZE : mp_.hp_pagesize; } -static inline size_t +static __always_inline size_t heap_max_size (void) { return HEAP_MAX_SIZE; @@ -141,14 +141,14 @@ static bool __malloc_initialized = false; /* find the heap and corresponding arena for a given ptr */ -static inline heap_info * +static __always_inline heap_info * heap_for_ptr (void *ptr) { size_t max_size = heap_max_size (); return PTR_ALIGN_DOWN (ptr, max_size); } -static inline struct malloc_state * +static __always_inline struct malloc_state * arena_for_chunk (mchunkptr ptr) { return chunk_main_arena (ptr) ? &main_arena : heap_for_ptr (ptr)->ar_ptr; @@ -232,8 +232,8 @@ __malloc_fork_unlock_child (void) } #define TUNABLE_CALLBACK_FNDECL(__name, __type) \ -static inline int do_ ## __name (__type value); \ -static void \ +static __always_inline int do_ ## __name (__type value); \ +static void \ TUNABLE_CALLBACK (__name) (tunable_val_t *valp) \ { \ __type value = (__type) (valp)->numval; \ diff --git a/malloc/malloc.c b/malloc/malloc.c index 0811061e46..7e4c139938 100644 --- a/malloc/malloc.c +++ b/malloc/malloc.c @@ -1322,7 +1322,7 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ value is less than PTRDIFF_T. Returns the requested size or MINSIZE in case the value is less than MINSIZE, or 0 if any of the previous checks fail. */ -static inline size_t +static __always_inline size_t checked_request2size (size_t req) __nonnull (1) { if (__glibc_unlikely (req > PTRDIFF_MAX)) @@ -1782,7 +1782,7 @@ static uint8_t global_max_fast; global_max_fast = (((size_t) (s) <= MALLOC_ALIGN_MASK - SIZE_SZ) \ ? MIN_CHUNK_SIZE / 2 : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK)) -static inline INTERNAL_SIZE_T +static __always_inline INTERNAL_SIZE_T get_max_fast (void) { /* Tell the GCC optimizers that global_max_fast is never larger @@ -3245,7 +3245,7 @@ tcache_double_free_verify (tcache_entry *e, size_t tc_idx) /* Try to free chunk to the tcache, if success return true. Caller must ensure that chunk and size are valid. */ -static inline bool +static __always_inline bool tcache_free (mchunkptr p, INTERNAL_SIZE_T size) { bool done = false; @@ -4553,7 +4553,7 @@ _int_malloc (mstate av, size_t bytes) ------------------------------ free ------------------------------ */ -static inline void +static __always_inline void _int_free_check (mstate av, mchunkptr p, INTERNAL_SIZE_T size) { /* Little security check which won't hurt performance: the @@ -4687,7 +4687,7 @@ _int_free_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size, int have_lock) P has already been locked. It will perform sanity check, then try the fast path to free into tcache. If the attempt not success, free the chunk to arena. */ -static inline void +static __always_inline void _int_free (mstate av, mchunkptr p, int have_lock) { INTERNAL_SIZE_T size; /* its size */