malloc: Use __always_inline for simple functions

Use __always_inline for small helper functions that are critical for
performance.  This ensures inlining always happens when expected.
Performance of bench-malloc-simple improves by 0.6% on average on
Neoverse V2.

Reviewed-by: DJ Delorie <dj@redhat.com>
This commit is contained in:
Wilco Dijkstra 2025-03-20 16:36:52 +00:00
parent 3263675250
commit 1233da4943
2 changed files with 11 additions and 11 deletions

View File

@ -43,14 +43,14 @@
/* HEAP_MAX_SIZE should be larger than the huge page size, otherwise heaps will
use not huge pages. It is a constant so arena_for_chunk() is efficient. */
static inline size_t
static __always_inline size_t
heap_min_size (void)
{
return mp_.hp_pagesize == 0 || mp_.hp_pagesize > HEAP_MAX_SIZE
? HEAP_MIN_SIZE : mp_.hp_pagesize;
}
static inline size_t
static __always_inline size_t
heap_max_size (void)
{
return HEAP_MAX_SIZE;
@ -141,14 +141,14 @@ static bool __malloc_initialized = false;
/* find the heap and corresponding arena for a given ptr */
static inline heap_info *
static __always_inline heap_info *
heap_for_ptr (void *ptr)
{
size_t max_size = heap_max_size ();
return PTR_ALIGN_DOWN (ptr, max_size);
}
static inline struct malloc_state *
static __always_inline struct malloc_state *
arena_for_chunk (mchunkptr ptr)
{
return chunk_main_arena (ptr) ? &main_arena : heap_for_ptr (ptr)->ar_ptr;
@ -232,8 +232,8 @@ __malloc_fork_unlock_child (void)
}
#define TUNABLE_CALLBACK_FNDECL(__name, __type) \
static inline int do_ ## __name (__type value); \
static void \
static __always_inline int do_ ## __name (__type value); \
static void \
TUNABLE_CALLBACK (__name) (tunable_val_t *valp) \
{ \
__type value = (__type) (valp)->numval; \

View File

@ -1322,7 +1322,7 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
value is less than PTRDIFF_T. Returns the requested size or
MINSIZE in case the value is less than MINSIZE, or 0 if any of the
previous checks fail. */
static inline size_t
static __always_inline size_t
checked_request2size (size_t req) __nonnull (1)
{
if (__glibc_unlikely (req > PTRDIFF_MAX))
@ -1782,7 +1782,7 @@ static uint8_t global_max_fast;
global_max_fast = (((size_t) (s) <= MALLOC_ALIGN_MASK - SIZE_SZ) \
? MIN_CHUNK_SIZE / 2 : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
static inline INTERNAL_SIZE_T
static __always_inline INTERNAL_SIZE_T
get_max_fast (void)
{
/* Tell the GCC optimizers that global_max_fast is never larger
@ -3245,7 +3245,7 @@ tcache_double_free_verify (tcache_entry *e, size_t tc_idx)
/* Try to free chunk to the tcache, if success return true.
Caller must ensure that chunk and size are valid. */
static inline bool
static __always_inline bool
tcache_free (mchunkptr p, INTERNAL_SIZE_T size)
{
bool done = false;
@ -4553,7 +4553,7 @@ _int_malloc (mstate av, size_t bytes)
------------------------------ free ------------------------------
*/
static inline void
static __always_inline void
_int_free_check (mstate av, mchunkptr p, INTERNAL_SIZE_T size)
{
/* Little security check which won't hurt performance: the
@ -4687,7 +4687,7 @@ _int_free_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size, int have_lock)
P has already been locked. It will perform sanity check, then try the
fast path to free into tcache. If the attempt not success, free the
chunk to arena. */
static inline void
static __always_inline void
_int_free (mstate av, mchunkptr p, int have_lock)
{
INTERNAL_SIZE_T size; /* its size */