malloc: Use __always_inline for simple functions

Use __always_inline for small helper functions that are critical for
performance.  This ensures inlining always happens when expected.
Performance of bench-malloc-simple improves by 0.6% on average on
Neoverse V2.

Reviewed-by: DJ Delorie <dj@redhat.com>
This commit is contained in:
Wilco Dijkstra 2025-03-20 16:36:52 +00:00
parent 3263675250
commit 1233da4943
2 changed files with 11 additions and 11 deletions

View File

@ -43,14 +43,14 @@
/* HEAP_MAX_SIZE should be larger than the huge page size, otherwise heaps will /* HEAP_MAX_SIZE should be larger than the huge page size, otherwise heaps will
use not huge pages. It is a constant so arena_for_chunk() is efficient. */ use not huge pages. It is a constant so arena_for_chunk() is efficient. */
static inline size_t static __always_inline size_t
heap_min_size (void) heap_min_size (void)
{ {
return mp_.hp_pagesize == 0 || mp_.hp_pagesize > HEAP_MAX_SIZE return mp_.hp_pagesize == 0 || mp_.hp_pagesize > HEAP_MAX_SIZE
? HEAP_MIN_SIZE : mp_.hp_pagesize; ? HEAP_MIN_SIZE : mp_.hp_pagesize;
} }
static inline size_t static __always_inline size_t
heap_max_size (void) heap_max_size (void)
{ {
return HEAP_MAX_SIZE; return HEAP_MAX_SIZE;
@ -141,14 +141,14 @@ static bool __malloc_initialized = false;
/* find the heap and corresponding arena for a given ptr */ /* find the heap and corresponding arena for a given ptr */
static inline heap_info * static __always_inline heap_info *
heap_for_ptr (void *ptr) heap_for_ptr (void *ptr)
{ {
size_t max_size = heap_max_size (); size_t max_size = heap_max_size ();
return PTR_ALIGN_DOWN (ptr, max_size); return PTR_ALIGN_DOWN (ptr, max_size);
} }
static inline struct malloc_state * static __always_inline struct malloc_state *
arena_for_chunk (mchunkptr ptr) arena_for_chunk (mchunkptr ptr)
{ {
return chunk_main_arena (ptr) ? &main_arena : heap_for_ptr (ptr)->ar_ptr; return chunk_main_arena (ptr) ? &main_arena : heap_for_ptr (ptr)->ar_ptr;
@ -232,8 +232,8 @@ __malloc_fork_unlock_child (void)
} }
#define TUNABLE_CALLBACK_FNDECL(__name, __type) \ #define TUNABLE_CALLBACK_FNDECL(__name, __type) \
static inline int do_ ## __name (__type value); \ static __always_inline int do_ ## __name (__type value); \
static void \ static void \
TUNABLE_CALLBACK (__name) (tunable_val_t *valp) \ TUNABLE_CALLBACK (__name) (tunable_val_t *valp) \
{ \ { \
__type value = (__type) (valp)->numval; \ __type value = (__type) (valp)->numval; \

View File

@ -1322,7 +1322,7 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
value is less than PTRDIFF_T. Returns the requested size or value is less than PTRDIFF_T. Returns the requested size or
MINSIZE in case the value is less than MINSIZE, or 0 if any of the MINSIZE in case the value is less than MINSIZE, or 0 if any of the
previous checks fail. */ previous checks fail. */
static inline size_t static __always_inline size_t
checked_request2size (size_t req) __nonnull (1) checked_request2size (size_t req) __nonnull (1)
{ {
if (__glibc_unlikely (req > PTRDIFF_MAX)) if (__glibc_unlikely (req > PTRDIFF_MAX))
@ -1782,7 +1782,7 @@ static uint8_t global_max_fast;
global_max_fast = (((size_t) (s) <= MALLOC_ALIGN_MASK - SIZE_SZ) \ global_max_fast = (((size_t) (s) <= MALLOC_ALIGN_MASK - SIZE_SZ) \
? MIN_CHUNK_SIZE / 2 : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK)) ? MIN_CHUNK_SIZE / 2 : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
static inline INTERNAL_SIZE_T static __always_inline INTERNAL_SIZE_T
get_max_fast (void) get_max_fast (void)
{ {
/* Tell the GCC optimizers that global_max_fast is never larger /* Tell the GCC optimizers that global_max_fast is never larger
@ -3245,7 +3245,7 @@ tcache_double_free_verify (tcache_entry *e, size_t tc_idx)
/* Try to free chunk to the tcache, if success return true. /* Try to free chunk to the tcache, if success return true.
Caller must ensure that chunk and size are valid. */ Caller must ensure that chunk and size are valid. */
static inline bool static __always_inline bool
tcache_free (mchunkptr p, INTERNAL_SIZE_T size) tcache_free (mchunkptr p, INTERNAL_SIZE_T size)
{ {
bool done = false; bool done = false;
@ -4553,7 +4553,7 @@ _int_malloc (mstate av, size_t bytes)
------------------------------ free ------------------------------ ------------------------------ free ------------------------------
*/ */
static inline void static __always_inline void
_int_free_check (mstate av, mchunkptr p, INTERNAL_SIZE_T size) _int_free_check (mstate av, mchunkptr p, INTERNAL_SIZE_T size)
{ {
/* Little security check which won't hurt performance: the /* Little security check which won't hurt performance: the
@ -4687,7 +4687,7 @@ _int_free_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size, int have_lock)
P has already been locked. It will perform sanity check, then try the P has already been locked. It will perform sanity check, then try the
fast path to free into tcache. If the attempt not success, free the fast path to free into tcache. If the attempt not success, free the
chunk to arena. */ chunk to arena. */
static inline void static __always_inline void
_int_free (mstate av, mchunkptr p, int have_lock) _int_free (mstate av, mchunkptr p, int have_lock)
{ {
INTERNAL_SIZE_T size; /* its size */ INTERNAL_SIZE_T size; /* its size */