2019-05-31 08:09:38 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2012-04-19 21:59:55 +00:00
|
|
|
/*
|
2019-12-04 00:46:31 +00:00
|
|
|
* sorttable.c: Sort the kernel's table
|
2012-04-19 21:59:55 +00:00
|
|
|
*
|
2019-12-04 00:46:32 +00:00
|
|
|
* Added ORC unwind tables sort support and other updates:
|
|
|
|
* Copyright (C) 1999-2019 Alibaba Group Holding Limited. by:
|
|
|
|
* Shile Zhang <shile.zhang@linux.alibaba.com>
|
|
|
|
*
|
2012-04-24 18:23:14 +00:00
|
|
|
* Copyright 2011 - 2012 Cavium, Inc.
|
2012-04-19 21:59:55 +00:00
|
|
|
*
|
|
|
|
* Based on code taken from recortmcount.c which is:
|
|
|
|
*
|
|
|
|
* Copyright 2009 John F. Reiser <jreiser@BitWagon.com>. All rights reserved.
|
|
|
|
*
|
|
|
|
* Restructured to fit Linux format, as well as other updates:
|
2019-12-04 00:46:32 +00:00
|
|
|
* Copyright 2010 Steven Rostedt <srostedt@redhat.com>, Red Hat Inc.
|
2012-04-19 21:59:55 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Strategy: alter the vmlinux file in-place.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <getopt.h>
|
|
|
|
#include <elf.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
2025-02-18 19:59:19 +00:00
|
|
|
#include <stdbool.h>
|
2012-04-19 21:59:55 +00:00
|
|
|
#include <string.h>
|
|
|
|
#include <unistd.h>
|
2021-12-12 11:33:58 +00:00
|
|
|
#include <errno.h>
|
|
|
|
#include <pthread.h>
|
2012-04-19 21:59:55 +00:00
|
|
|
|
2012-04-24 18:23:14 +00:00
|
|
|
#include <tools/be_byteshift.h>
|
|
|
|
#include <tools/le_byteshift.h>
|
|
|
|
|
2013-11-15 06:38:05 +00:00
|
|
|
#ifndef EM_ARCOMPACT
|
|
|
|
#define EM_ARCOMPACT 93
|
|
|
|
#endif
|
|
|
|
|
2014-02-18 11:29:11 +00:00
|
|
|
#ifndef EM_XTENSA
|
|
|
|
#define EM_XTENSA 94
|
|
|
|
#endif
|
|
|
|
|
2013-05-08 16:29:24 +00:00
|
|
|
#ifndef EM_AARCH64
|
|
|
|
#define EM_AARCH64 183
|
|
|
|
#endif
|
|
|
|
|
2014-01-23 23:52:46 +00:00
|
|
|
#ifndef EM_MICROBLAZE
|
|
|
|
#define EM_MICROBLAZE 189
|
|
|
|
#endif
|
|
|
|
|
2013-11-22 07:35:58 +00:00
|
|
|
#ifndef EM_ARCV2
|
|
|
|
#define EM_ARCV2 195
|
|
|
|
#endif
|
|
|
|
|
2021-09-24 22:43:38 +00:00
|
|
|
#ifndef EM_RISCV
|
|
|
|
#define EM_RISCV 243
|
|
|
|
#endif
|
|
|
|
|
2022-05-31 10:04:10 +00:00
|
|
|
#ifndef EM_LOONGARCH
|
|
|
|
#define EM_LOONGARCH 258
|
|
|
|
#endif
|
|
|
|
|
2025-01-05 16:22:17 +00:00
|
|
|
typedef union {
|
|
|
|
Elf32_Ehdr e32;
|
|
|
|
Elf64_Ehdr e64;
|
|
|
|
} Elf_Ehdr;
|
|
|
|
|
2025-01-05 16:22:18 +00:00
|
|
|
typedef union {
|
|
|
|
Elf32_Shdr e32;
|
|
|
|
Elf64_Shdr e64;
|
|
|
|
} Elf_Shdr;
|
|
|
|
|
2025-01-05 16:22:19 +00:00
|
|
|
typedef union {
|
|
|
|
Elf32_Sym e32;
|
|
|
|
Elf64_Sym e64;
|
|
|
|
} Elf_Sym;
|
|
|
|
|
2025-02-18 19:59:19 +00:00
|
|
|
typedef union {
|
|
|
|
Elf32_Rela e32;
|
|
|
|
Elf64_Rela e64;
|
|
|
|
} Elf_Rela;
|
|
|
|
|
2019-12-04 00:46:28 +00:00
|
|
|
static uint32_t (*r)(const uint32_t *);
|
|
|
|
static uint16_t (*r2)(const uint16_t *);
|
|
|
|
static uint64_t (*r8)(const uint64_t *);
|
|
|
|
static void (*w)(uint32_t, uint32_t *);
|
2025-02-18 19:59:19 +00:00
|
|
|
static void (*w8)(uint64_t, uint64_t *);
|
2019-12-04 00:46:28 +00:00
|
|
|
typedef void (*table_sort_t)(char *, int);
|
|
|
|
|
2025-01-10 12:54:59 +00:00
|
|
|
static struct elf_funcs {
|
|
|
|
int (*compare_extable)(const void *a, const void *b);
|
|
|
|
uint64_t (*ehdr_shoff)(Elf_Ehdr *ehdr);
|
|
|
|
uint16_t (*ehdr_shstrndx)(Elf_Ehdr *ehdr);
|
|
|
|
uint16_t (*ehdr_shentsize)(Elf_Ehdr *ehdr);
|
|
|
|
uint16_t (*ehdr_shnum)(Elf_Ehdr *ehdr);
|
|
|
|
uint64_t (*shdr_addr)(Elf_Shdr *shdr);
|
|
|
|
uint64_t (*shdr_offset)(Elf_Shdr *shdr);
|
|
|
|
uint64_t (*shdr_size)(Elf_Shdr *shdr);
|
|
|
|
uint64_t (*shdr_entsize)(Elf_Shdr *shdr);
|
|
|
|
uint32_t (*shdr_link)(Elf_Shdr *shdr);
|
|
|
|
uint32_t (*shdr_name)(Elf_Shdr *shdr);
|
|
|
|
uint32_t (*shdr_type)(Elf_Shdr *shdr);
|
|
|
|
uint8_t (*sym_type)(Elf_Sym *sym);
|
|
|
|
uint32_t (*sym_name)(Elf_Sym *sym);
|
|
|
|
uint64_t (*sym_value)(Elf_Sym *sym);
|
|
|
|
uint16_t (*sym_shndx)(Elf_Sym *sym);
|
2025-02-18 19:59:19 +00:00
|
|
|
uint64_t (*rela_offset)(Elf_Rela *rela);
|
|
|
|
uint64_t (*rela_info)(Elf_Rela *rela);
|
|
|
|
uint64_t (*rela_addend)(Elf_Rela *rela);
|
|
|
|
void (*rela_write_addend)(Elf_Rela *rela, uint64_t val);
|
2025-01-10 12:54:59 +00:00
|
|
|
} e;
|
|
|
|
|
2025-01-05 16:22:20 +00:00
|
|
|
static uint64_t ehdr64_shoff(Elf_Ehdr *ehdr)
|
|
|
|
{
|
|
|
|
return r8(&ehdr->e64.e_shoff);
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint64_t ehdr32_shoff(Elf_Ehdr *ehdr)
|
|
|
|
{
|
|
|
|
return r(&ehdr->e32.e_shoff);
|
|
|
|
}
|
|
|
|
|
2025-01-10 12:54:59 +00:00
|
|
|
static uint64_t ehdr_shoff(Elf_Ehdr *ehdr)
|
|
|
|
{
|
|
|
|
return e.ehdr_shoff(ehdr);
|
|
|
|
}
|
|
|
|
|
2025-01-05 16:22:20 +00:00
|
|
|
#define EHDR_HALF(fn_name) \
|
|
|
|
static uint16_t ehdr64_##fn_name(Elf_Ehdr *ehdr) \
|
|
|
|
{ \
|
|
|
|
return r2(&ehdr->e64.e_##fn_name); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
static uint16_t ehdr32_##fn_name(Elf_Ehdr *ehdr) \
|
|
|
|
{ \
|
|
|
|
return r2(&ehdr->e32.e_##fn_name); \
|
2025-01-10 12:54:59 +00:00
|
|
|
} \
|
|
|
|
\
|
|
|
|
static uint16_t ehdr_##fn_name(Elf_Ehdr *ehdr) \
|
|
|
|
{ \
|
|
|
|
return e.ehdr_##fn_name(ehdr); \
|
2025-01-05 16:22:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
EHDR_HALF(shentsize)
|
|
|
|
EHDR_HALF(shstrndx)
|
|
|
|
EHDR_HALF(shnum)
|
|
|
|
|
2025-01-05 16:22:21 +00:00
|
|
|
#define SHDR_WORD(fn_name) \
|
|
|
|
static uint32_t shdr64_##fn_name(Elf_Shdr *shdr) \
|
|
|
|
{ \
|
|
|
|
return r(&shdr->e64.sh_##fn_name); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
static uint32_t shdr32_##fn_name(Elf_Shdr *shdr) \
|
|
|
|
{ \
|
|
|
|
return r(&shdr->e32.sh_##fn_name); \
|
2025-01-10 12:54:59 +00:00
|
|
|
} \
|
|
|
|
\
|
|
|
|
static uint32_t shdr_##fn_name(Elf_Shdr *shdr) \
|
|
|
|
{ \
|
|
|
|
return e.shdr_##fn_name(shdr); \
|
2025-01-05 16:22:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#define SHDR_ADDR(fn_name) \
|
|
|
|
static uint64_t shdr64_##fn_name(Elf_Shdr *shdr) \
|
|
|
|
{ \
|
|
|
|
return r8(&shdr->e64.sh_##fn_name); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
static uint64_t shdr32_##fn_name(Elf_Shdr *shdr) \
|
|
|
|
{ \
|
|
|
|
return r(&shdr->e32.sh_##fn_name); \
|
2025-01-10 12:54:59 +00:00
|
|
|
} \
|
|
|
|
\
|
|
|
|
static uint64_t shdr_##fn_name(Elf_Shdr *shdr) \
|
|
|
|
{ \
|
|
|
|
return e.shdr_##fn_name(shdr); \
|
2025-01-05 16:22:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#define SHDR_WORD(fn_name) \
|
|
|
|
static uint32_t shdr64_##fn_name(Elf_Shdr *shdr) \
|
|
|
|
{ \
|
|
|
|
return r(&shdr->e64.sh_##fn_name); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
static uint32_t shdr32_##fn_name(Elf_Shdr *shdr) \
|
|
|
|
{ \
|
|
|
|
return r(&shdr->e32.sh_##fn_name); \
|
2025-01-10 12:54:59 +00:00
|
|
|
} \
|
|
|
|
static uint32_t shdr_##fn_name(Elf_Shdr *shdr) \
|
|
|
|
{ \
|
|
|
|
return e.shdr_##fn_name(shdr); \
|
2025-01-05 16:22:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
SHDR_ADDR(addr)
|
|
|
|
SHDR_ADDR(offset)
|
|
|
|
SHDR_ADDR(size)
|
|
|
|
SHDR_ADDR(entsize)
|
|
|
|
|
|
|
|
SHDR_WORD(link)
|
|
|
|
SHDR_WORD(name)
|
|
|
|
SHDR_WORD(type)
|
|
|
|
|
2025-01-05 16:22:22 +00:00
|
|
|
#define SYM_ADDR(fn_name) \
|
|
|
|
static uint64_t sym64_##fn_name(Elf_Sym *sym) \
|
|
|
|
{ \
|
|
|
|
return r8(&sym->e64.st_##fn_name); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
static uint64_t sym32_##fn_name(Elf_Sym *sym) \
|
|
|
|
{ \
|
|
|
|
return r(&sym->e32.st_##fn_name); \
|
2025-01-10 12:54:59 +00:00
|
|
|
} \
|
|
|
|
\
|
|
|
|
static uint64_t sym_##fn_name(Elf_Sym *sym) \
|
|
|
|
{ \
|
|
|
|
return e.sym_##fn_name(sym); \
|
2025-01-05 16:22:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#define SYM_WORD(fn_name) \
|
|
|
|
static uint32_t sym64_##fn_name(Elf_Sym *sym) \
|
|
|
|
{ \
|
|
|
|
return r(&sym->e64.st_##fn_name); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
static uint32_t sym32_##fn_name(Elf_Sym *sym) \
|
|
|
|
{ \
|
|
|
|
return r(&sym->e32.st_##fn_name); \
|
2025-01-10 12:54:59 +00:00
|
|
|
} \
|
|
|
|
\
|
|
|
|
static uint32_t sym_##fn_name(Elf_Sym *sym) \
|
|
|
|
{ \
|
|
|
|
return e.sym_##fn_name(sym); \
|
2025-01-05 16:22:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#define SYM_HALF(fn_name) \
|
|
|
|
static uint16_t sym64_##fn_name(Elf_Sym *sym) \
|
|
|
|
{ \
|
|
|
|
return r2(&sym->e64.st_##fn_name); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
static uint16_t sym32_##fn_name(Elf_Sym *sym) \
|
|
|
|
{ \
|
|
|
|
return r2(&sym->e32.st_##fn_name); \
|
2025-01-10 12:54:59 +00:00
|
|
|
} \
|
|
|
|
\
|
|
|
|
static uint16_t sym_##fn_name(Elf_Sym *sym) \
|
|
|
|
{ \
|
|
|
|
return e.sym_##fn_name(sym); \
|
2025-01-05 16:22:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static uint8_t sym64_type(Elf_Sym *sym)
|
|
|
|
{
|
|
|
|
return ELF64_ST_TYPE(sym->e64.st_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint8_t sym32_type(Elf_Sym *sym)
|
|
|
|
{
|
|
|
|
return ELF32_ST_TYPE(sym->e32.st_info);
|
|
|
|
}
|
|
|
|
|
2025-01-10 12:54:59 +00:00
|
|
|
static uint8_t sym_type(Elf_Sym *sym)
|
|
|
|
{
|
|
|
|
return e.sym_type(sym);
|
|
|
|
}
|
|
|
|
|
2025-01-05 16:22:22 +00:00
|
|
|
SYM_ADDR(value)
|
|
|
|
SYM_WORD(name)
|
|
|
|
SYM_HALF(shndx)
|
|
|
|
|
2025-02-18 19:59:19 +00:00
|
|
|
#define __maybe_unused __attribute__((__unused__))
|
|
|
|
|
|
|
|
#define RELA_ADDR(fn_name) \
|
|
|
|
static uint64_t rela64_##fn_name(Elf_Rela *rela) \
|
|
|
|
{ \
|
|
|
|
return r8((uint64_t *)&rela->e64.r_##fn_name); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
static uint64_t rela32_##fn_name(Elf_Rela *rela) \
|
|
|
|
{ \
|
|
|
|
return r((uint32_t *)&rela->e32.r_##fn_name); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
static uint64_t __maybe_unused rela_##fn_name(Elf_Rela *rela) \
|
|
|
|
{ \
|
|
|
|
return e.rela_##fn_name(rela); \
|
|
|
|
}
|
|
|
|
|
|
|
|
RELA_ADDR(offset)
|
|
|
|
RELA_ADDR(info)
|
|
|
|
RELA_ADDR(addend)
|
|
|
|
|
|
|
|
static void rela64_write_addend(Elf_Rela *rela, uint64_t val)
|
|
|
|
{
|
|
|
|
w8(val, (uint64_t *)&rela->e64.r_addend);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void rela32_write_addend(Elf_Rela *rela, uint64_t val)
|
|
|
|
{
|
|
|
|
w(val, (uint32_t *)&rela->e32.r_addend);
|
|
|
|
}
|
|
|
|
|
2012-04-19 21:59:55 +00:00
|
|
|
/*
|
|
|
|
* Get the whole file as a programming convenience in order to avoid
|
|
|
|
* malloc+lseek+read+free of many pieces. If successful, then mmap
|
|
|
|
* avoids copying unused pieces; else just read the whole file.
|
|
|
|
* Open for both read and write.
|
|
|
|
*/
|
2019-12-04 00:46:27 +00:00
|
|
|
static void *mmap_file(char const *fname, size_t *size)
|
2012-04-19 21:59:55 +00:00
|
|
|
{
|
2019-12-04 00:46:27 +00:00
|
|
|
int fd;
|
|
|
|
struct stat sb;
|
|
|
|
void *addr = NULL;
|
2012-04-19 21:59:55 +00:00
|
|
|
|
2019-12-04 00:46:27 +00:00
|
|
|
fd = open(fname, O_RDWR);
|
|
|
|
if (fd < 0) {
|
2012-04-19 21:59:55 +00:00
|
|
|
perror(fname);
|
2019-12-04 00:46:27 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (fstat(fd, &sb) < 0) {
|
|
|
|
perror(fname);
|
|
|
|
goto out;
|
2012-04-19 21:59:55 +00:00
|
|
|
}
|
|
|
|
if (!S_ISREG(sb.st_mode)) {
|
|
|
|
fprintf(stderr, "not a regular file: %s\n", fname);
|
2019-12-04 00:46:27 +00:00
|
|
|
goto out;
|
2012-04-19 21:59:55 +00:00
|
|
|
}
|
2019-12-04 00:46:28 +00:00
|
|
|
|
2019-12-04 00:46:27 +00:00
|
|
|
addr = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
|
2012-04-19 21:59:55 +00:00
|
|
|
if (addr == MAP_FAILED) {
|
|
|
|
fprintf(stderr, "Could not mmap file: %s\n", fname);
|
2019-12-04 00:46:27 +00:00
|
|
|
goto out;
|
2012-04-19 21:59:55 +00:00
|
|
|
}
|
2019-12-04 00:46:27 +00:00
|
|
|
|
|
|
|
*size = sb.st_size;
|
|
|
|
|
|
|
|
out:
|
|
|
|
close(fd);
|
2012-04-19 21:59:55 +00:00
|
|
|
return addr;
|
|
|
|
}
|
|
|
|
|
2012-04-24 18:23:14 +00:00
|
|
|
static uint32_t rbe(const uint32_t *x)
|
2012-04-19 21:59:55 +00:00
|
|
|
{
|
2012-04-24 18:23:14 +00:00
|
|
|
return get_unaligned_be32(x);
|
2012-04-19 21:59:55 +00:00
|
|
|
}
|
2019-12-04 00:46:28 +00:00
|
|
|
|
2012-04-24 18:23:14 +00:00
|
|
|
static uint16_t r2be(const uint16_t *x)
|
2012-04-19 21:59:55 +00:00
|
|
|
{
|
2012-04-24 18:23:14 +00:00
|
|
|
return get_unaligned_be16(x);
|
2012-04-19 21:59:55 +00:00
|
|
|
}
|
2019-12-04 00:46:28 +00:00
|
|
|
|
|
|
|
static uint64_t r8be(const uint64_t *x)
|
2012-04-19 21:59:55 +00:00
|
|
|
{
|
2019-12-04 00:46:28 +00:00
|
|
|
return get_unaligned_be64(x);
|
2012-04-19 21:59:55 +00:00
|
|
|
}
|
2019-12-04 00:46:28 +00:00
|
|
|
|
2012-04-24 18:23:14 +00:00
|
|
|
static uint32_t rle(const uint32_t *x)
|
|
|
|
{
|
|
|
|
return get_unaligned_le32(x);
|
|
|
|
}
|
2019-12-04 00:46:28 +00:00
|
|
|
|
2012-04-24 18:23:14 +00:00
|
|
|
static uint16_t r2le(const uint16_t *x)
|
2012-04-19 21:59:55 +00:00
|
|
|
{
|
2012-04-24 18:23:14 +00:00
|
|
|
return get_unaligned_le16(x);
|
2012-04-19 21:59:55 +00:00
|
|
|
}
|
|
|
|
|
2019-12-04 00:46:28 +00:00
|
|
|
static uint64_t r8le(const uint64_t *x)
|
2012-04-24 18:23:14 +00:00
|
|
|
{
|
2019-12-04 00:46:28 +00:00
|
|
|
return get_unaligned_le64(x);
|
2012-04-24 18:23:14 +00:00
|
|
|
}
|
2019-12-04 00:46:28 +00:00
|
|
|
|
2012-04-24 18:23:14 +00:00
|
|
|
static void wbe(uint32_t val, uint32_t *x)
|
|
|
|
{
|
|
|
|
put_unaligned_be32(val, x);
|
|
|
|
}
|
2019-12-04 00:46:28 +00:00
|
|
|
|
2012-04-24 18:23:14 +00:00
|
|
|
static void wle(uint32_t val, uint32_t *x)
|
|
|
|
{
|
|
|
|
put_unaligned_le32(val, x);
|
|
|
|
}
|
2019-12-04 00:46:28 +00:00
|
|
|
|
2025-02-18 19:59:19 +00:00
|
|
|
static void w8be(uint64_t val, uint64_t *x)
|
|
|
|
{
|
|
|
|
put_unaligned_be64(val, x);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void w8le(uint64_t val, uint64_t *x)
|
|
|
|
{
|
|
|
|
put_unaligned_le64(val, x);
|
|
|
|
}
|
|
|
|
|
2013-11-12 23:06:51 +00:00
|
|
|
/*
|
|
|
|
* Move reserved section indices SHN_LORESERVE..SHN_HIRESERVE out of
|
|
|
|
* the way to -256..-1, to avoid conflicting with real section
|
|
|
|
* indices.
|
|
|
|
*/
|
|
|
|
#define SPECIAL(i) ((i) - (SHN_HIRESERVE + 1))
|
|
|
|
|
|
|
|
static inline int is_shndx_special(unsigned int i)
|
|
|
|
{
|
|
|
|
return i != SHN_XINDEX && i >= SHN_LORESERVE && i <= SHN_HIRESERVE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Accessor for sym->st_shndx, hides ugliness of "64k sections" */
|
|
|
|
static inline unsigned int get_secindex(unsigned int shndx,
|
|
|
|
unsigned int sym_offs,
|
|
|
|
const Elf32_Word *symtab_shndx_start)
|
|
|
|
{
|
|
|
|
if (is_shndx_special(shndx))
|
|
|
|
return SPECIAL(shndx);
|
|
|
|
if (shndx != SHN_XINDEX)
|
|
|
|
return shndx;
|
|
|
|
return r(&symtab_shndx_start[sym_offs]);
|
|
|
|
}
|
|
|
|
|
2025-01-05 16:22:16 +00:00
|
|
|
static int compare_extable_32(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
Elf32_Addr av = r(a);
|
|
|
|
Elf32_Addr bv = r(b);
|
|
|
|
|
|
|
|
if (av < bv)
|
|
|
|
return -1;
|
|
|
|
return av > bv;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int compare_extable_64(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
Elf64_Addr av = r8(a);
|
|
|
|
Elf64_Addr bv = r8(b);
|
|
|
|
|
|
|
|
if (av < bv)
|
|
|
|
return -1;
|
|
|
|
return av > bv;
|
|
|
|
}
|
|
|
|
|
2025-01-10 12:54:59 +00:00
|
|
|
static int compare_extable(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
return e.compare_extable(a, b);
|
|
|
|
}
|
|
|
|
|
2025-01-05 16:22:18 +00:00
|
|
|
static inline void *get_index(void *start, int entsize, int index)
|
|
|
|
{
|
|
|
|
return start + (entsize * index);
|
|
|
|
}
|
|
|
|
|
2025-01-08 03:32:17 +00:00
|
|
|
static int extable_ent_size;
|
|
|
|
static int long_size;
|
|
|
|
|
2025-02-18 19:59:19 +00:00
|
|
|
#define ERRSTR_MAXSZ 256
|
2025-01-08 03:32:17 +00:00
|
|
|
|
|
|
|
#ifdef UNWINDER_ORC_ENABLED
|
|
|
|
/* ORC unwinder only support X86_64 */
|
|
|
|
#include <asm/orc_types.h>
|
|
|
|
|
|
|
|
static char g_err[ERRSTR_MAXSZ];
|
|
|
|
static int *g_orc_ip_table;
|
|
|
|
static struct orc_entry *g_orc_table;
|
|
|
|
|
|
|
|
static pthread_t orc_sort_thread;
|
|
|
|
|
|
|
|
static inline unsigned long orc_ip(const int *ip)
|
|
|
|
{
|
|
|
|
return (unsigned long)ip + *ip;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int orc_sort_cmp(const void *_a, const void *_b)
|
|
|
|
{
|
|
|
|
struct orc_entry *orc_a, *orc_b;
|
|
|
|
const int *a = g_orc_ip_table + *(int *)_a;
|
|
|
|
const int *b = g_orc_ip_table + *(int *)_b;
|
|
|
|
unsigned long a_val = orc_ip(a);
|
|
|
|
unsigned long b_val = orc_ip(b);
|
|
|
|
|
|
|
|
if (a_val > b_val)
|
|
|
|
return 1;
|
|
|
|
if (a_val < b_val)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The "weak" section terminator entries need to always be on the left
|
|
|
|
* to ensure the lookup code skips them in favor of real entries.
|
|
|
|
* These terminator entries exist to handle any gaps created by
|
|
|
|
* whitelisted .o files which didn't get objtool generation.
|
|
|
|
*/
|
|
|
|
orc_a = g_orc_table + (a - g_orc_ip_table);
|
|
|
|
orc_b = g_orc_table + (b - g_orc_ip_table);
|
|
|
|
if (orc_a->type == ORC_TYPE_UNDEFINED && orc_b->type == ORC_TYPE_UNDEFINED)
|
|
|
|
return 0;
|
|
|
|
return orc_a->type == ORC_TYPE_UNDEFINED ? -1 : 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *sort_orctable(void *arg)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int *idxs = NULL;
|
|
|
|
int *tmp_orc_ip_table = NULL;
|
|
|
|
struct orc_entry *tmp_orc_table = NULL;
|
|
|
|
unsigned int *orc_ip_size = (unsigned int *)arg;
|
|
|
|
unsigned int num_entries = *orc_ip_size / sizeof(int);
|
|
|
|
unsigned int orc_size = num_entries * sizeof(struct orc_entry);
|
|
|
|
|
|
|
|
idxs = (int *)malloc(*orc_ip_size);
|
|
|
|
if (!idxs) {
|
|
|
|
snprintf(g_err, ERRSTR_MAXSZ, "malloc idxs: %s",
|
|
|
|
strerror(errno));
|
|
|
|
pthread_exit(g_err);
|
|
|
|
}
|
|
|
|
|
|
|
|
tmp_orc_ip_table = (int *)malloc(*orc_ip_size);
|
|
|
|
if (!tmp_orc_ip_table) {
|
|
|
|
snprintf(g_err, ERRSTR_MAXSZ, "malloc tmp_orc_ip_table: %s",
|
|
|
|
strerror(errno));
|
|
|
|
pthread_exit(g_err);
|
|
|
|
}
|
|
|
|
|
|
|
|
tmp_orc_table = (struct orc_entry *)malloc(orc_size);
|
|
|
|
if (!tmp_orc_table) {
|
|
|
|
snprintf(g_err, ERRSTR_MAXSZ, "malloc tmp_orc_table: %s",
|
|
|
|
strerror(errno));
|
|
|
|
pthread_exit(g_err);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize indices array, convert ip_table to absolute address */
|
|
|
|
for (i = 0; i < num_entries; i++) {
|
|
|
|
idxs[i] = i;
|
|
|
|
tmp_orc_ip_table[i] = g_orc_ip_table[i] + i * sizeof(int);
|
|
|
|
}
|
|
|
|
memcpy(tmp_orc_table, g_orc_table, orc_size);
|
|
|
|
|
|
|
|
qsort(idxs, num_entries, sizeof(int), orc_sort_cmp);
|
|
|
|
|
|
|
|
for (i = 0; i < num_entries; i++) {
|
|
|
|
if (idxs[i] == i)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* convert back to relative address */
|
|
|
|
g_orc_ip_table[i] = tmp_orc_ip_table[idxs[i]] - i * sizeof(int);
|
|
|
|
g_orc_table[i] = tmp_orc_table[idxs[i]];
|
|
|
|
}
|
|
|
|
|
|
|
|
free(idxs);
|
|
|
|
free(tmp_orc_ip_table);
|
|
|
|
free(tmp_orc_table);
|
|
|
|
pthread_exit(NULL);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef MCOUNT_SORT_ENABLED
|
2025-02-18 19:59:19 +00:00
|
|
|
|
2025-02-18 19:59:20 +00:00
|
|
|
static int compare_values_64(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
uint64_t av = *(uint64_t *)a;
|
|
|
|
uint64_t bv = *(uint64_t *)b;
|
|
|
|
|
|
|
|
if (av < bv)
|
|
|
|
return -1;
|
|
|
|
return av > bv;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int compare_values_32(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
uint32_t av = *(uint32_t *)a;
|
|
|
|
uint32_t bv = *(uint32_t *)b;
|
|
|
|
|
|
|
|
if (av < bv)
|
|
|
|
return -1;
|
|
|
|
return av > bv;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int (*compare_values)(const void *a, const void *b);
|
|
|
|
|
2025-02-18 19:59:19 +00:00
|
|
|
/* Only used for sorting mcount table */
|
|
|
|
static void rela_write_addend(Elf_Rela *rela, uint64_t val)
|
|
|
|
{
|
|
|
|
e.rela_write_addend(rela, val);
|
|
|
|
}
|
|
|
|
|
scripts/sorttable: Zero out weak functions in mcount_loc table
When a function is annotated as "weak" and is overridden, the code is not
removed. If it is traced, the fentry/mcount location in the weak function
will be referenced by the "__mcount_loc" section. This will then be added
to the available_filter_functions list. Since only the address of the
functions are listed, to find the name to show, a search of kallsyms is
used.
Since kallsyms will return the function by simply finding the function
that the address is after but before the next function, an address of a
weak function will show up as the function before it. This is because
kallsyms does not save names of weak functions. This has caused issues in
the past, as now the traced weak function will be listed in
available_filter_functions with the name of the function before it.
At best, this will cause the previous function's name to be listed twice.
At worse, if the previous function was marked notrace, it will now show up
as a function that can be traced. Note that it only shows up that it can
be traced but will not be if enabled, which causes confusion.
https://lore.kernel.org/all/20220412094923.0abe90955e5db486b7bca279@kernel.org/
The commit b39181f7c6907 ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to avoid
adding weak function") was a workaround to this by checking the function
address before printing its name. If the address was too far from the
function given by the name then instead of printing the name it would
print: __ftrace_invalid_address___<invalid-offset>
The real issue is that these invalid addresses are listed in the ftrace
table look up which available_filter_functions is derived from. A place
holder must be listed in that file because set_ftrace_filter may take a
series of indexes into that file instead of names to be able to do O(1)
lookups to enable filtering (many tools use this method).
Even if kallsyms saved the size of the function, it does not remove the
need of having these place holders. The real solution is to not add a weak
function into the ftrace table in the first place.
To solve this, the sorttable.c code that sorts the mcount regions during
the build is modified to take a "nm -S vmlinux" input, sort it, and any
function listed in the mcount_loc section that is not within a boundary of
the function list given by nm is considered a weak function and is zeroed
out.
Note, this does not mean they will remain zero when booting as KASLR
will still shift those addresses. To handle this, the entries in the
mcount_loc section will be ignored if they are zero or match the
kaslr_offset() value.
Before:
~# grep __ftrace_invalid_address___ /sys/kernel/tracing/available_filter_functions | wc -l
551
After:
~# grep __ftrace_invalid_address___ /sys/kernel/tracing/available_filter_functions | wc -l
0
Cc: bpf <bpf@vger.kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Zheng Yejian <zhengyejian1@huawei.com>
Cc: Martin Kelly <martin.kelly@crowdstrike.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Link: https://lore.kernel.org/20250218200022.883095980@goodmis.org
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2025-02-18 19:59:22 +00:00
|
|
|
struct func_info {
|
|
|
|
uint64_t addr;
|
|
|
|
uint64_t size;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* List of functions created by: nm -S vmlinux */
|
|
|
|
static struct func_info *function_list;
|
|
|
|
static int function_list_size;
|
|
|
|
|
|
|
|
/* Allocate functions in 1k blocks */
|
|
|
|
#define FUNC_BLK_SIZE 1024
|
|
|
|
#define FUNC_BLK_MASK (FUNC_BLK_SIZE - 1)
|
|
|
|
|
|
|
|
static int add_field(uint64_t addr, uint64_t size)
|
|
|
|
{
|
|
|
|
struct func_info *fi;
|
|
|
|
int fsize = function_list_size;
|
|
|
|
|
|
|
|
if (!(fsize & FUNC_BLK_MASK)) {
|
|
|
|
fsize += FUNC_BLK_SIZE;
|
|
|
|
fi = realloc(function_list, fsize * sizeof(struct func_info));
|
|
|
|
if (!fi)
|
|
|
|
return -1;
|
|
|
|
function_list = fi;
|
|
|
|
}
|
|
|
|
fi = &function_list[function_list_size++];
|
|
|
|
fi->addr = addr;
|
|
|
|
fi->size = size;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2025-02-25 18:20:08 +00:00
|
|
|
/* Used for when mcount/fentry is before the function entry */
|
|
|
|
static int before_func;
|
|
|
|
|
scripts/sorttable: Zero out weak functions in mcount_loc table
When a function is annotated as "weak" and is overridden, the code is not
removed. If it is traced, the fentry/mcount location in the weak function
will be referenced by the "__mcount_loc" section. This will then be added
to the available_filter_functions list. Since only the address of the
functions are listed, to find the name to show, a search of kallsyms is
used.
Since kallsyms will return the function by simply finding the function
that the address is after but before the next function, an address of a
weak function will show up as the function before it. This is because
kallsyms does not save names of weak functions. This has caused issues in
the past, as now the traced weak function will be listed in
available_filter_functions with the name of the function before it.
At best, this will cause the previous function's name to be listed twice.
At worse, if the previous function was marked notrace, it will now show up
as a function that can be traced. Note that it only shows up that it can
be traced but will not be if enabled, which causes confusion.
https://lore.kernel.org/all/20220412094923.0abe90955e5db486b7bca279@kernel.org/
The commit b39181f7c6907 ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to avoid
adding weak function") was a workaround to this by checking the function
address before printing its name. If the address was too far from the
function given by the name then instead of printing the name it would
print: __ftrace_invalid_address___<invalid-offset>
The real issue is that these invalid addresses are listed in the ftrace
table look up which available_filter_functions is derived from. A place
holder must be listed in that file because set_ftrace_filter may take a
series of indexes into that file instead of names to be able to do O(1)
lookups to enable filtering (many tools use this method).
Even if kallsyms saved the size of the function, it does not remove the
need of having these place holders. The real solution is to not add a weak
function into the ftrace table in the first place.
To solve this, the sorttable.c code that sorts the mcount regions during
the build is modified to take a "nm -S vmlinux" input, sort it, and any
function listed in the mcount_loc section that is not within a boundary of
the function list given by nm is considered a weak function and is zeroed
out.
Note, this does not mean they will remain zero when booting as KASLR
will still shift those addresses. To handle this, the entries in the
mcount_loc section will be ignored if they are zero or match the
kaslr_offset() value.
Before:
~# grep __ftrace_invalid_address___ /sys/kernel/tracing/available_filter_functions | wc -l
551
After:
~# grep __ftrace_invalid_address___ /sys/kernel/tracing/available_filter_functions | wc -l
0
Cc: bpf <bpf@vger.kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Zheng Yejian <zhengyejian1@huawei.com>
Cc: Martin Kelly <martin.kelly@crowdstrike.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Link: https://lore.kernel.org/20250218200022.883095980@goodmis.org
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2025-02-18 19:59:22 +00:00
|
|
|
/* Only return match if the address lies inside the function size */
|
|
|
|
static int cmp_func_addr(const void *K, const void *A)
|
|
|
|
{
|
|
|
|
uint64_t key = *(const uint64_t *)K;
|
|
|
|
const struct func_info *a = A;
|
|
|
|
|
2025-02-25 18:20:08 +00:00
|
|
|
if (key + before_func < a->addr)
|
scripts/sorttable: Zero out weak functions in mcount_loc table
When a function is annotated as "weak" and is overridden, the code is not
removed. If it is traced, the fentry/mcount location in the weak function
will be referenced by the "__mcount_loc" section. This will then be added
to the available_filter_functions list. Since only the address of the
functions are listed, to find the name to show, a search of kallsyms is
used.
Since kallsyms will return the function by simply finding the function
that the address is after but before the next function, an address of a
weak function will show up as the function before it. This is because
kallsyms does not save names of weak functions. This has caused issues in
the past, as now the traced weak function will be listed in
available_filter_functions with the name of the function before it.
At best, this will cause the previous function's name to be listed twice.
At worse, if the previous function was marked notrace, it will now show up
as a function that can be traced. Note that it only shows up that it can
be traced but will not be if enabled, which causes confusion.
https://lore.kernel.org/all/20220412094923.0abe90955e5db486b7bca279@kernel.org/
The commit b39181f7c6907 ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to avoid
adding weak function") was a workaround to this by checking the function
address before printing its name. If the address was too far from the
function given by the name then instead of printing the name it would
print: __ftrace_invalid_address___<invalid-offset>
The real issue is that these invalid addresses are listed in the ftrace
table look up which available_filter_functions is derived from. A place
holder must be listed in that file because set_ftrace_filter may take a
series of indexes into that file instead of names to be able to do O(1)
lookups to enable filtering (many tools use this method).
Even if kallsyms saved the size of the function, it does not remove the
need of having these place holders. The real solution is to not add a weak
function into the ftrace table in the first place.
To solve this, the sorttable.c code that sorts the mcount regions during
the build is modified to take a "nm -S vmlinux" input, sort it, and any
function listed in the mcount_loc section that is not within a boundary of
the function list given by nm is considered a weak function and is zeroed
out.
Note, this does not mean they will remain zero when booting as KASLR
will still shift those addresses. To handle this, the entries in the
mcount_loc section will be ignored if they are zero or match the
kaslr_offset() value.
Before:
~# grep __ftrace_invalid_address___ /sys/kernel/tracing/available_filter_functions | wc -l
551
After:
~# grep __ftrace_invalid_address___ /sys/kernel/tracing/available_filter_functions | wc -l
0
Cc: bpf <bpf@vger.kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Zheng Yejian <zhengyejian1@huawei.com>
Cc: Martin Kelly <martin.kelly@crowdstrike.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Link: https://lore.kernel.org/20250218200022.883095980@goodmis.org
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2025-02-18 19:59:22 +00:00
|
|
|
return -1;
|
|
|
|
return key >= a->addr + a->size;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Find the function in function list that is bounded by the function size */
|
|
|
|
static int find_func(uint64_t key)
|
|
|
|
{
|
|
|
|
return bsearch(&key, function_list, function_list_size,
|
|
|
|
sizeof(struct func_info), cmp_func_addr) != NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cmp_funcs(const void *A, const void *B)
|
|
|
|
{
|
|
|
|
const struct func_info *a = A;
|
|
|
|
const struct func_info *b = B;
|
|
|
|
|
|
|
|
if (a->addr < b->addr)
|
|
|
|
return -1;
|
|
|
|
return a->addr > b->addr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_symbols(const char *fname)
|
|
|
|
{
|
|
|
|
FILE *fp;
|
|
|
|
char addr_str[20]; /* Only need 17, but round up to next int size */
|
|
|
|
char size_str[20];
|
|
|
|
char type;
|
|
|
|
|
|
|
|
fp = fopen(fname, "r");
|
|
|
|
if (!fp) {
|
|
|
|
perror(fname);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (fscanf(fp, "%16s %16s %c %*s\n", addr_str, size_str, &type) == 3) {
|
|
|
|
uint64_t addr;
|
|
|
|
uint64_t size;
|
|
|
|
|
|
|
|
/* Only care about functions */
|
|
|
|
if (type != 't' && type != 'T' && type != 'W')
|
|
|
|
continue;
|
|
|
|
|
|
|
|
addr = strtoull(addr_str, NULL, 16);
|
|
|
|
size = strtoull(size_str, NULL, 16);
|
|
|
|
if (add_field(addr, size) < 0)
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
fclose(fp);
|
|
|
|
|
|
|
|
qsort(function_list, function_list_size, sizeof(struct func_info), cmp_funcs);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2025-01-08 03:32:17 +00:00
|
|
|
static pthread_t mcount_sort_thread;
|
2025-02-18 19:59:19 +00:00
|
|
|
static bool sort_reloc;
|
|
|
|
|
|
|
|
static long rela_type;
|
|
|
|
|
|
|
|
static char m_err[ERRSTR_MAXSZ];
|
2025-01-08 03:32:17 +00:00
|
|
|
|
|
|
|
struct elf_mcount_loc {
|
|
|
|
Elf_Ehdr *ehdr;
|
|
|
|
Elf_Shdr *init_data_sec;
|
|
|
|
uint64_t start_mcount_loc;
|
|
|
|
uint64_t stop_mcount_loc;
|
|
|
|
};
|
|
|
|
|
2025-02-18 19:59:21 +00:00
|
|
|
/* Fill the array with the content of the relocs */
|
|
|
|
static int fill_relocs(void *ptr, uint64_t size, Elf_Ehdr *ehdr, uint64_t start_loc)
|
2025-02-18 19:59:19 +00:00
|
|
|
{
|
|
|
|
Elf_Shdr *shdr_start;
|
|
|
|
Elf_Rela *rel;
|
|
|
|
unsigned int shnum;
|
2025-02-18 19:59:21 +00:00
|
|
|
unsigned int count = 0;
|
2025-02-18 19:59:19 +00:00
|
|
|
int shentsize;
|
2025-02-18 19:59:21 +00:00
|
|
|
void *array_end = ptr + size;
|
2025-02-18 19:59:20 +00:00
|
|
|
|
2025-02-18 19:59:19 +00:00
|
|
|
shdr_start = (Elf_Shdr *)((char *)ehdr + ehdr_shoff(ehdr));
|
|
|
|
shentsize = ehdr_shentsize(ehdr);
|
|
|
|
|
|
|
|
shnum = ehdr_shnum(ehdr);
|
|
|
|
if (shnum == SHN_UNDEF)
|
|
|
|
shnum = shdr_size(shdr_start);
|
|
|
|
|
|
|
|
for (int i = 0; i < shnum; i++) {
|
|
|
|
Elf_Shdr *shdr = get_index(shdr_start, shentsize, i);
|
|
|
|
void *end;
|
|
|
|
|
|
|
|
if (shdr_type(shdr) != SHT_RELA)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
rel = (void *)ehdr + shdr_offset(shdr);
|
|
|
|
end = (void *)rel + shdr_size(shdr);
|
|
|
|
|
|
|
|
for (; (void *)rel < end; rel = (void *)rel + shdr_entsize(shdr)) {
|
|
|
|
uint64_t offset = rela_offset(rel);
|
|
|
|
|
|
|
|
if (offset >= start_loc && offset < start_loc + size) {
|
2025-02-18 19:59:21 +00:00
|
|
|
if (ptr + long_size > array_end) {
|
2025-02-18 19:59:19 +00:00
|
|
|
snprintf(m_err, ERRSTR_MAXSZ,
|
|
|
|
"Too many relocations");
|
2025-02-18 19:59:21 +00:00
|
|
|
return -1;
|
2025-02-18 19:59:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Make sure this has the correct type */
|
|
|
|
if (rela_info(rel) != rela_type) {
|
|
|
|
snprintf(m_err, ERRSTR_MAXSZ,
|
|
|
|
"rela has type %lx but expected %lx\n",
|
|
|
|
(long)rela_info(rel), rela_type);
|
2025-02-18 19:59:21 +00:00
|
|
|
return -1;
|
2025-02-18 19:59:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (long_size == 4)
|
|
|
|
*(uint32_t *)ptr = rela_addend(rel);
|
|
|
|
else
|
|
|
|
*(uint64_t *)ptr = rela_addend(rel);
|
|
|
|
ptr += long_size;
|
2025-02-18 19:59:21 +00:00
|
|
|
count++;
|
2025-02-18 19:59:19 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2025-02-18 19:59:21 +00:00
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Put the sorted vals back into the relocation elements */
|
|
|
|
static void replace_relocs(void *ptr, uint64_t size, Elf_Ehdr *ehdr, uint64_t start_loc)
|
|
|
|
{
|
|
|
|
Elf_Shdr *shdr_start;
|
|
|
|
Elf_Rela *rel;
|
|
|
|
unsigned int shnum;
|
|
|
|
int shentsize;
|
|
|
|
|
|
|
|
shdr_start = (Elf_Shdr *)((char *)ehdr + ehdr_shoff(ehdr));
|
|
|
|
shentsize = ehdr_shentsize(ehdr);
|
|
|
|
|
|
|
|
shnum = ehdr_shnum(ehdr);
|
|
|
|
if (shnum == SHN_UNDEF)
|
|
|
|
shnum = shdr_size(shdr_start);
|
2025-02-18 19:59:19 +00:00
|
|
|
|
|
|
|
for (int i = 0; i < shnum; i++) {
|
|
|
|
Elf_Shdr *shdr = get_index(shdr_start, shentsize, i);
|
|
|
|
void *end;
|
|
|
|
|
|
|
|
if (shdr_type(shdr) != SHT_RELA)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
rel = (void *)ehdr + shdr_offset(shdr);
|
|
|
|
end = (void *)rel + shdr_size(shdr);
|
|
|
|
|
|
|
|
for (; (void *)rel < end; rel = (void *)rel + shdr_entsize(shdr)) {
|
|
|
|
uint64_t offset = rela_offset(rel);
|
|
|
|
|
|
|
|
if (offset >= start_loc && offset < start_loc + size) {
|
|
|
|
if (long_size == 4)
|
|
|
|
rela_write_addend(rel, *(uint32_t *)ptr);
|
|
|
|
else
|
|
|
|
rela_write_addend(rel, *(uint64_t *)ptr);
|
|
|
|
ptr += long_size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2025-02-18 19:59:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int fill_addrs(void *ptr, uint64_t size, void *addrs)
|
|
|
|
{
|
|
|
|
void *end = ptr + size;
|
|
|
|
int count = 0;
|
|
|
|
|
|
|
|
for (; ptr < end; ptr += long_size, addrs += long_size, count++) {
|
|
|
|
if (long_size == 4)
|
|
|
|
*(uint32_t *)ptr = r(addrs);
|
|
|
|
else
|
|
|
|
*(uint64_t *)ptr = r8(addrs);
|
|
|
|
}
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void replace_addrs(void *ptr, uint64_t size, void *addrs)
|
|
|
|
{
|
|
|
|
void *end = ptr + size;
|
|
|
|
|
|
|
|
for (; ptr < end; ptr += long_size, addrs += long_size) {
|
|
|
|
if (long_size == 4)
|
|
|
|
w(*(uint32_t *)ptr, addrs);
|
|
|
|
else
|
|
|
|
w8(*(uint64_t *)ptr, addrs);
|
|
|
|
}
|
2025-02-18 19:59:19 +00:00
|
|
|
}
|
|
|
|
|
2025-01-08 03:32:17 +00:00
|
|
|
/* Sort the addresses stored between __start_mcount_loc to __stop_mcount_loc in vmlinux */
|
|
|
|
static void *sort_mcount_loc(void *arg)
|
|
|
|
{
|
|
|
|
struct elf_mcount_loc *emloc = (struct elf_mcount_loc *)arg;
|
|
|
|
uint64_t offset = emloc->start_mcount_loc - shdr_addr(emloc->init_data_sec)
|
|
|
|
+ shdr_offset(emloc->init_data_sec);
|
2025-02-18 19:59:21 +00:00
|
|
|
uint64_t size = emloc->stop_mcount_loc - emloc->start_mcount_loc;
|
2025-01-08 03:32:17 +00:00
|
|
|
unsigned char *start_loc = (void *)emloc->ehdr + offset;
|
2025-02-18 19:59:21 +00:00
|
|
|
Elf_Ehdr *ehdr = emloc->ehdr;
|
|
|
|
void *e_msg = NULL;
|
|
|
|
void *vals;
|
|
|
|
int count;
|
|
|
|
|
|
|
|
vals = malloc(long_size * size);
|
|
|
|
if (!vals) {
|
|
|
|
snprintf(m_err, ERRSTR_MAXSZ, "Failed to allocate sort array");
|
|
|
|
pthread_exit(m_err);
|
|
|
|
}
|
2025-01-08 03:32:17 +00:00
|
|
|
|
2025-02-25 18:20:07 +00:00
|
|
|
if (sort_reloc) {
|
2025-02-18 19:59:21 +00:00
|
|
|
count = fill_relocs(vals, size, ehdr, emloc->start_mcount_loc);
|
2025-02-25 18:20:07 +00:00
|
|
|
/* gcc may use relocs to save the addresses, but clang does not. */
|
|
|
|
if (!count) {
|
|
|
|
count = fill_addrs(vals, size, start_loc);
|
|
|
|
sort_reloc = 0;
|
|
|
|
}
|
|
|
|
} else
|
2025-02-18 19:59:21 +00:00
|
|
|
count = fill_addrs(vals, size, start_loc);
|
|
|
|
|
|
|
|
if (count < 0) {
|
|
|
|
e_msg = m_err;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (count != size / long_size) {
|
|
|
|
snprintf(m_err, ERRSTR_MAXSZ, "Expected %u mcount elements but found %u\n",
|
|
|
|
(int)(size / long_size), count);
|
|
|
|
e_msg = m_err;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
scripts/sorttable: Zero out weak functions in mcount_loc table
When a function is annotated as "weak" and is overridden, the code is not
removed. If it is traced, the fentry/mcount location in the weak function
will be referenced by the "__mcount_loc" section. This will then be added
to the available_filter_functions list. Since only the address of the
functions are listed, to find the name to show, a search of kallsyms is
used.
Since kallsyms will return the function by simply finding the function
that the address is after but before the next function, an address of a
weak function will show up as the function before it. This is because
kallsyms does not save names of weak functions. This has caused issues in
the past, as now the traced weak function will be listed in
available_filter_functions with the name of the function before it.
At best, this will cause the previous function's name to be listed twice.
At worse, if the previous function was marked notrace, it will now show up
as a function that can be traced. Note that it only shows up that it can
be traced but will not be if enabled, which causes confusion.
https://lore.kernel.org/all/20220412094923.0abe90955e5db486b7bca279@kernel.org/
The commit b39181f7c6907 ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to avoid
adding weak function") was a workaround to this by checking the function
address before printing its name. If the address was too far from the
function given by the name then instead of printing the name it would
print: __ftrace_invalid_address___<invalid-offset>
The real issue is that these invalid addresses are listed in the ftrace
table look up which available_filter_functions is derived from. A place
holder must be listed in that file because set_ftrace_filter may take a
series of indexes into that file instead of names to be able to do O(1)
lookups to enable filtering (many tools use this method).
Even if kallsyms saved the size of the function, it does not remove the
need of having these place holders. The real solution is to not add a weak
function into the ftrace table in the first place.
To solve this, the sorttable.c code that sorts the mcount regions during
the build is modified to take a "nm -S vmlinux" input, sort it, and any
function listed in the mcount_loc section that is not within a boundary of
the function list given by nm is considered a weak function and is zeroed
out.
Note, this does not mean they will remain zero when booting as KASLR
will still shift those addresses. To handle this, the entries in the
mcount_loc section will be ignored if they are zero or match the
kaslr_offset() value.
Before:
~# grep __ftrace_invalid_address___ /sys/kernel/tracing/available_filter_functions | wc -l
551
After:
~# grep __ftrace_invalid_address___ /sys/kernel/tracing/available_filter_functions | wc -l
0
Cc: bpf <bpf@vger.kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Zheng Yejian <zhengyejian1@huawei.com>
Cc: Martin Kelly <martin.kelly@crowdstrike.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Link: https://lore.kernel.org/20250218200022.883095980@goodmis.org
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2025-02-18 19:59:22 +00:00
|
|
|
/* zero out any locations not found by function list */
|
|
|
|
if (function_list_size) {
|
|
|
|
for (void *ptr = vals; ptr < vals + size; ptr += long_size) {
|
|
|
|
uint64_t key;
|
|
|
|
|
|
|
|
key = long_size == 4 ? r((uint32_t *)ptr) : r8((uint64_t *)ptr);
|
|
|
|
if (!find_func(key)) {
|
|
|
|
if (long_size == 4)
|
|
|
|
*(uint32_t *)ptr = 0;
|
|
|
|
else
|
|
|
|
*(uint64_t *)ptr = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-02-18 19:59:21 +00:00
|
|
|
compare_values = long_size == 4 ? compare_values_32 : compare_values_64;
|
|
|
|
|
|
|
|
qsort(vals, count, long_size, compare_values);
|
|
|
|
|
|
|
|
if (sort_reloc)
|
|
|
|
replace_relocs(vals, size, ehdr, emloc->start_mcount_loc);
|
|
|
|
else
|
|
|
|
replace_addrs(vals, size, start_loc);
|
|
|
|
|
|
|
|
out:
|
|
|
|
free(vals);
|
2025-02-18 19:59:19 +00:00
|
|
|
|
2025-02-18 19:59:21 +00:00
|
|
|
pthread_exit(e_msg);
|
2025-01-08 03:32:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Get the address of __start_mcount_loc and __stop_mcount_loc in System.map */
|
2025-01-05 16:22:25 +00:00
|
|
|
static void get_mcount_loc(struct elf_mcount_loc *emloc, Elf_Shdr *symtab_sec,
|
|
|
|
const char *strtab)
|
2025-01-08 03:32:17 +00:00
|
|
|
{
|
2025-01-05 16:22:25 +00:00
|
|
|
Elf_Sym *sym, *end_sym;
|
|
|
|
int symentsize = shdr_entsize(symtab_sec);
|
|
|
|
int found = 0;
|
|
|
|
|
|
|
|
sym = (void *)emloc->ehdr + shdr_offset(symtab_sec);
|
|
|
|
end_sym = (void *)sym + shdr_size(symtab_sec);
|
|
|
|
|
|
|
|
while (sym < end_sym) {
|
|
|
|
if (!strcmp(strtab + sym_name(sym), "__start_mcount_loc")) {
|
|
|
|
emloc->start_mcount_loc = sym_value(sym);
|
|
|
|
if (++found == 2)
|
|
|
|
break;
|
|
|
|
} else if (!strcmp(strtab + sym_name(sym), "__stop_mcount_loc")) {
|
|
|
|
emloc->stop_mcount_loc = sym_value(sym);
|
|
|
|
if (++found == 2)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
sym = (void *)sym + symentsize;
|
|
|
|
}
|
2025-01-08 03:32:17 +00:00
|
|
|
|
2025-01-05 16:22:25 +00:00
|
|
|
if (!emloc->start_mcount_loc) {
|
2025-01-08 03:32:17 +00:00
|
|
|
fprintf(stderr, "get start_mcount_loc error!");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2025-01-05 16:22:25 +00:00
|
|
|
if (!emloc->stop_mcount_loc) {
|
2025-01-08 03:32:17 +00:00
|
|
|
fprintf(stderr, "get stop_mcount_loc error!");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
scripts/sorttable: Zero out weak functions in mcount_loc table
When a function is annotated as "weak" and is overridden, the code is not
removed. If it is traced, the fentry/mcount location in the weak function
will be referenced by the "__mcount_loc" section. This will then be added
to the available_filter_functions list. Since only the address of the
functions are listed, to find the name to show, a search of kallsyms is
used.
Since kallsyms will return the function by simply finding the function
that the address is after but before the next function, an address of a
weak function will show up as the function before it. This is because
kallsyms does not save names of weak functions. This has caused issues in
the past, as now the traced weak function will be listed in
available_filter_functions with the name of the function before it.
At best, this will cause the previous function's name to be listed twice.
At worse, if the previous function was marked notrace, it will now show up
as a function that can be traced. Note that it only shows up that it can
be traced but will not be if enabled, which causes confusion.
https://lore.kernel.org/all/20220412094923.0abe90955e5db486b7bca279@kernel.org/
The commit b39181f7c6907 ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to avoid
adding weak function") was a workaround to this by checking the function
address before printing its name. If the address was too far from the
function given by the name then instead of printing the name it would
print: __ftrace_invalid_address___<invalid-offset>
The real issue is that these invalid addresses are listed in the ftrace
table look up which available_filter_functions is derived from. A place
holder must be listed in that file because set_ftrace_filter may take a
series of indexes into that file instead of names to be able to do O(1)
lookups to enable filtering (many tools use this method).
Even if kallsyms saved the size of the function, it does not remove the
need of having these place holders. The real solution is to not add a weak
function into the ftrace table in the first place.
To solve this, the sorttable.c code that sorts the mcount regions during
the build is modified to take a "nm -S vmlinux" input, sort it, and any
function listed in the mcount_loc section that is not within a boundary of
the function list given by nm is considered a weak function and is zeroed
out.
Note, this does not mean they will remain zero when booting as KASLR
will still shift those addresses. To handle this, the entries in the
mcount_loc section will be ignored if they are zero or match the
kaslr_offset() value.
Before:
~# grep __ftrace_invalid_address___ /sys/kernel/tracing/available_filter_functions | wc -l
551
After:
~# grep __ftrace_invalid_address___ /sys/kernel/tracing/available_filter_functions | wc -l
0
Cc: bpf <bpf@vger.kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Zheng Yejian <zhengyejian1@huawei.com>
Cc: Martin Kelly <martin.kelly@crowdstrike.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Link: https://lore.kernel.org/20250218200022.883095980@goodmis.org
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2025-02-18 19:59:22 +00:00
|
|
|
#else /* MCOUNT_SORT_ENABLED */
|
|
|
|
static inline int parse_symbols(const char *fname) { return 0; }
|
2025-01-08 03:32:17 +00:00
|
|
|
#endif
|
2025-01-05 16:22:25 +00:00
|
|
|
|
2025-01-08 03:32:17 +00:00
|
|
|
static int do_sort(Elf_Ehdr *ehdr,
|
|
|
|
char const *const fname,
|
|
|
|
table_sort_t custom_sort)
|
|
|
|
{
|
|
|
|
int rc = -1;
|
|
|
|
Elf_Shdr *shdr_start;
|
|
|
|
Elf_Shdr *strtab_sec = NULL;
|
|
|
|
Elf_Shdr *symtab_sec = NULL;
|
|
|
|
Elf_Shdr *extab_sec = NULL;
|
|
|
|
Elf_Shdr *string_sec;
|
|
|
|
Elf_Sym *sym;
|
|
|
|
const Elf_Sym *symtab;
|
|
|
|
Elf32_Word *symtab_shndx = NULL;
|
|
|
|
Elf_Sym *sort_needed_sym = NULL;
|
|
|
|
Elf_Shdr *sort_needed_sec;
|
|
|
|
uint32_t *sort_needed_loc;
|
|
|
|
void *sym_start;
|
|
|
|
void *sym_end;
|
|
|
|
const char *secstrings;
|
|
|
|
const char *strtab;
|
|
|
|
char *extab_image;
|
|
|
|
int sort_need_index;
|
|
|
|
int symentsize;
|
|
|
|
int shentsize;
|
|
|
|
int idx;
|
|
|
|
int i;
|
|
|
|
unsigned int shnum;
|
|
|
|
unsigned int shstrndx;
|
|
|
|
#ifdef MCOUNT_SORT_ENABLED
|
|
|
|
struct elf_mcount_loc mstruct = {0};
|
|
|
|
#endif
|
|
|
|
#ifdef UNWINDER_ORC_ENABLED
|
|
|
|
unsigned int orc_ip_size = 0;
|
|
|
|
unsigned int orc_size = 0;
|
|
|
|
unsigned int orc_num_entries = 0;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
shdr_start = (Elf_Shdr *)((char *)ehdr + ehdr_shoff(ehdr));
|
|
|
|
shentsize = ehdr_shentsize(ehdr);
|
|
|
|
|
|
|
|
shstrndx = ehdr_shstrndx(ehdr);
|
|
|
|
if (shstrndx == SHN_XINDEX)
|
|
|
|
shstrndx = shdr_link(shdr_start);
|
|
|
|
string_sec = get_index(shdr_start, shentsize, shstrndx);
|
|
|
|
secstrings = (const char *)ehdr + shdr_offset(string_sec);
|
|
|
|
|
|
|
|
shnum = ehdr_shnum(ehdr);
|
|
|
|
if (shnum == SHN_UNDEF)
|
|
|
|
shnum = shdr_size(shdr_start);
|
|
|
|
|
|
|
|
for (i = 0; i < shnum; i++) {
|
|
|
|
Elf_Shdr *shdr = get_index(shdr_start, shentsize, i);
|
|
|
|
|
|
|
|
idx = shdr_name(shdr);
|
|
|
|
if (!strcmp(secstrings + idx, "__ex_table"))
|
|
|
|
extab_sec = shdr;
|
|
|
|
if (!strcmp(secstrings + idx, ".symtab"))
|
|
|
|
symtab_sec = shdr;
|
|
|
|
if (!strcmp(secstrings + idx, ".strtab"))
|
|
|
|
strtab_sec = shdr;
|
|
|
|
|
|
|
|
if (shdr_type(shdr) == SHT_SYMTAB_SHNDX)
|
|
|
|
symtab_shndx = (Elf32_Word *)((const char *)ehdr +
|
|
|
|
shdr_offset(shdr));
|
|
|
|
|
|
|
|
#ifdef MCOUNT_SORT_ENABLED
|
|
|
|
/* locate the .init.data section in vmlinux */
|
2025-01-05 16:22:25 +00:00
|
|
|
if (!strcmp(secstrings + idx, ".init.data"))
|
2025-01-08 03:32:17 +00:00
|
|
|
mstruct.init_data_sec = shdr;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef UNWINDER_ORC_ENABLED
|
|
|
|
/* locate the ORC unwind tables */
|
|
|
|
if (!strcmp(secstrings + idx, ".orc_unwind_ip")) {
|
|
|
|
orc_ip_size = shdr_size(shdr);
|
|
|
|
g_orc_ip_table = (int *)((void *)ehdr +
|
|
|
|
shdr_offset(shdr));
|
|
|
|
}
|
|
|
|
if (!strcmp(secstrings + idx, ".orc_unwind")) {
|
|
|
|
orc_size = shdr_size(shdr);
|
|
|
|
g_orc_table = (struct orc_entry *)((void *)ehdr +
|
|
|
|
shdr_offset(shdr));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
} /* for loop */
|
|
|
|
|
|
|
|
#ifdef UNWINDER_ORC_ENABLED
|
|
|
|
if (!g_orc_ip_table || !g_orc_table) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"incomplete ORC unwind tables in file: %s\n", fname);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
orc_num_entries = orc_ip_size / sizeof(int);
|
|
|
|
if (orc_ip_size % sizeof(int) != 0 ||
|
|
|
|
orc_size % sizeof(struct orc_entry) != 0 ||
|
|
|
|
orc_num_entries != orc_size / sizeof(struct orc_entry)) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"inconsistent ORC unwind table entries in file: %s\n",
|
|
|
|
fname);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* create thread to sort ORC unwind tables concurrently */
|
|
|
|
if (pthread_create(&orc_sort_thread, NULL,
|
|
|
|
sort_orctable, &orc_ip_size)) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"pthread_create orc_sort_thread failed '%s': %s\n",
|
|
|
|
strerror(errno), fname);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
if (!extab_sec) {
|
|
|
|
fprintf(stderr, "no __ex_table in file: %s\n", fname);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!symtab_sec) {
|
|
|
|
fprintf(stderr, "no .symtab in file: %s\n", fname);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!strtab_sec) {
|
|
|
|
fprintf(stderr, "no .strtab in file: %s\n", fname);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
extab_image = (void *)ehdr + shdr_offset(extab_sec);
|
|
|
|
strtab = (const char *)ehdr + shdr_offset(strtab_sec);
|
|
|
|
symtab = (const Elf_Sym *)((const char *)ehdr + shdr_offset(symtab_sec));
|
|
|
|
|
2025-01-05 16:22:25 +00:00
|
|
|
#ifdef MCOUNT_SORT_ENABLED
|
|
|
|
mstruct.ehdr = ehdr;
|
|
|
|
get_mcount_loc(&mstruct, symtab_sec, strtab);
|
|
|
|
|
|
|
|
if (!mstruct.init_data_sec || !mstruct.start_mcount_loc || !mstruct.stop_mcount_loc) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"incomplete mcount's sort in file: %s\n",
|
|
|
|
fname);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* create thread to sort mcount_loc concurrently */
|
|
|
|
if (pthread_create(&mcount_sort_thread, NULL, &sort_mcount_loc, &mstruct)) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"pthread_create mcount_sort_thread failed '%s': %s\n",
|
|
|
|
strerror(errno), fname);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2025-01-08 03:32:17 +00:00
|
|
|
if (custom_sort) {
|
|
|
|
custom_sort(extab_image, shdr_size(extab_sec));
|
|
|
|
} else {
|
|
|
|
int num_entries = shdr_size(extab_sec) / extable_ent_size;
|
|
|
|
qsort(extab_image, num_entries,
|
|
|
|
extable_ent_size, compare_extable);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* find the flag main_extable_sort_needed */
|
|
|
|
sym_start = (void *)ehdr + shdr_offset(symtab_sec);
|
|
|
|
sym_end = sym_start + shdr_size(symtab_sec);
|
|
|
|
symentsize = shdr_entsize(symtab_sec);
|
|
|
|
|
|
|
|
for (sym = sym_start; (void *)sym + symentsize < sym_end;
|
|
|
|
sym = (void *)sym + symentsize) {
|
|
|
|
if (sym_type(sym) != STT_OBJECT)
|
|
|
|
continue;
|
|
|
|
if (!strcmp(strtab + sym_name(sym),
|
|
|
|
"main_extable_sort_needed")) {
|
|
|
|
sort_needed_sym = sym;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!sort_needed_sym) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"no main_extable_sort_needed symbol in file: %s\n",
|
|
|
|
fname);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
sort_need_index = get_secindex(sym_shndx(sym),
|
|
|
|
((void *)sort_needed_sym - (void *)symtab) / symentsize,
|
|
|
|
symtab_shndx);
|
|
|
|
sort_needed_sec = get_index(shdr_start, shentsize, sort_need_index);
|
|
|
|
sort_needed_loc = (void *)ehdr +
|
|
|
|
shdr_offset(sort_needed_sec) +
|
|
|
|
sym_value(sort_needed_sym) - shdr_addr(sort_needed_sec);
|
|
|
|
|
|
|
|
/* extable has been sorted, clear the flag */
|
|
|
|
w(0, sort_needed_loc);
|
|
|
|
rc = 0;
|
|
|
|
|
|
|
|
out:
|
|
|
|
#ifdef UNWINDER_ORC_ENABLED
|
|
|
|
if (orc_sort_thread) {
|
|
|
|
void *retval = NULL;
|
|
|
|
/* wait for ORC tables sort done */
|
|
|
|
rc = pthread_join(orc_sort_thread, &retval);
|
|
|
|
if (rc) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"pthread_join failed '%s': %s\n",
|
|
|
|
strerror(errno), fname);
|
|
|
|
} else if (retval) {
|
|
|
|
rc = -1;
|
|
|
|
fprintf(stderr,
|
|
|
|
"failed to sort ORC tables '%s': %s\n",
|
|
|
|
(char *)retval, fname);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef MCOUNT_SORT_ENABLED
|
|
|
|
if (mcount_sort_thread) {
|
|
|
|
void *retval = NULL;
|
|
|
|
/* wait for mcount sort done */
|
|
|
|
rc = pthread_join(mcount_sort_thread, &retval);
|
|
|
|
if (rc) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"pthread_join failed '%s': %s\n",
|
|
|
|
strerror(errno), fname);
|
|
|
|
} else if (retval) {
|
|
|
|
rc = -1;
|
|
|
|
fprintf(stderr,
|
|
|
|
"failed to sort mcount '%s': %s\n",
|
|
|
|
(char *)retval, fname);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return rc;
|
|
|
|
}
|
2012-04-19 21:59:55 +00:00
|
|
|
|
2012-09-05 11:26:11 +00:00
|
|
|
static int compare_relative_table(const void *a, const void *b)
|
2012-04-24 18:23:14 +00:00
|
|
|
{
|
|
|
|
int32_t av = (int32_t)r(a);
|
|
|
|
int32_t bv = (int32_t)r(b);
|
|
|
|
|
|
|
|
if (av < bv)
|
|
|
|
return -1;
|
|
|
|
if (av > bv)
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-12-04 00:46:28 +00:00
|
|
|
static void sort_relative_table(char *extab_image, int image_size)
|
2016-02-17 18:20:12 +00:00
|
|
|
{
|
2019-12-04 00:46:28 +00:00
|
|
|
int i = 0;
|
2016-02-17 18:20:12 +00:00
|
|
|
|
2019-12-04 00:46:28 +00:00
|
|
|
/*
|
|
|
|
* Do the same thing the runtime sort does, first normalize to
|
|
|
|
* being relative to the start of the section.
|
|
|
|
*/
|
2016-02-17 18:20:12 +00:00
|
|
|
while (i < image_size) {
|
|
|
|
uint32_t *loc = (uint32_t *)(extab_image + i);
|
|
|
|
w(r(loc) + i, loc);
|
2019-12-04 00:46:28 +00:00
|
|
|
i += 4;
|
2016-02-17 18:20:12 +00:00
|
|
|
}
|
|
|
|
|
2019-12-04 00:46:28 +00:00
|
|
|
qsort(extab_image, image_size / 8, 8, compare_relative_table);
|
2016-02-17 18:20:12 +00:00
|
|
|
|
2019-12-04 00:46:28 +00:00
|
|
|
/* Now denormalize. */
|
2016-02-17 18:20:12 +00:00
|
|
|
i = 0;
|
|
|
|
while (i < image_size) {
|
|
|
|
uint32_t *loc = (uint32_t *)(extab_image + i);
|
|
|
|
w(r(loc) - i, loc);
|
2019-12-04 00:46:28 +00:00
|
|
|
i += 4;
|
2016-02-17 18:20:12 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-08 11:42:20 +00:00
|
|
|
static void sort_relative_table_with_data(char *extab_image, int image_size)
|
arm64: extable: add `type` and `data` fields
Subsequent patches will add specialized handlers for fixups, in addition
to the simple PC fixup and BPF handlers we have today. In preparation,
this patch adds a new `type` field to struct exception_table_entry, and
uses this to distinguish the fixup and BPF cases. A `data` field is also
added so that subsequent patches can associate data specific to each
exception site (e.g. register numbers).
Handlers are named ex_handler_*() for consistency, following the exmaple
of x86. At the same time, get_ex_fixup() is split out into a helper so
that it can be used by other ex_handler_*() functions ins subsequent
patches.
This patch will increase the size of the exception tables, which will be
remedied by subsequent patches removing redundant fixup code. There
should be no functional change as a result of this patch.
Since each entry is now 12 bytes in size, we must reduce the alignment
of each entry from `.align 3` (i.e. 8 bytes) to `.align 2` (i.e. 4
bytes), which is the natrual alignment of the `insn` and `fixup` fields.
The current 8-byte alignment is a holdover from when the `insn` and
`fixup` fields was 8 bytes, and while not harmful has not been necessary
since commit:
6c94f27ac847ff8e ("arm64: switch to relative exception tables")
Similarly, RO_EXCEPTION_TABLE_ALIGN is dropped to 4 bytes.
Concurrently with this patch, x86's exception table entry format is
being updated (similarly to a 12-byte format, with 32-bytes of absolute
data). Once both have been merged it should be possible to unify the
sorttable logic for the two.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: James Morse <james.morse@arm.com>
Cc: Jean-Philippe Brucker <jean-philippe@linaro.org>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20211019160219.5202-11-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-10-19 16:02:16 +00:00
|
|
|
{
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
while (i < image_size) {
|
|
|
|
uint32_t *loc = (uint32_t *)(extab_image + i);
|
|
|
|
|
|
|
|
w(r(loc) + i, loc);
|
|
|
|
w(r(loc + 1) + i + 4, loc + 1);
|
|
|
|
/* Don't touch the fixup type or data */
|
|
|
|
|
|
|
|
i += sizeof(uint32_t) * 3;
|
|
|
|
}
|
|
|
|
|
|
|
|
qsort(extab_image, image_size / 12, 12, compare_relative_table);
|
|
|
|
|
|
|
|
i = 0;
|
|
|
|
while (i < image_size) {
|
|
|
|
uint32_t *loc = (uint32_t *)(extab_image + i);
|
|
|
|
|
|
|
|
w(r(loc) - i, loc);
|
|
|
|
w(r(loc + 1) - (i + 4), loc + 1);
|
|
|
|
/* Don't touch the fixup type or data */
|
|
|
|
|
|
|
|
i += sizeof(uint32_t) * 3;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-04 00:46:28 +00:00
|
|
|
static int do_file(char const *const fname, void *addr)
|
2012-04-19 21:59:55 +00:00
|
|
|
{
|
2025-01-05 16:22:17 +00:00
|
|
|
Elf_Ehdr *ehdr = addr;
|
2019-12-04 00:46:28 +00:00
|
|
|
table_sort_t custom_sort = NULL;
|
2012-04-19 21:59:55 +00:00
|
|
|
|
2025-01-05 16:22:17 +00:00
|
|
|
switch (ehdr->e32.e_ident[EI_DATA]) {
|
2012-04-19 21:59:55 +00:00
|
|
|
case ELFDATA2LSB:
|
2019-12-04 00:46:28 +00:00
|
|
|
r = rle;
|
|
|
|
r2 = r2le;
|
|
|
|
r8 = r8le;
|
|
|
|
w = wle;
|
2025-02-18 19:59:19 +00:00
|
|
|
w8 = w8le;
|
2012-04-19 21:59:55 +00:00
|
|
|
break;
|
|
|
|
case ELFDATA2MSB:
|
2019-12-04 00:46:28 +00:00
|
|
|
r = rbe;
|
|
|
|
r2 = r2be;
|
|
|
|
r8 = r8be;
|
|
|
|
w = wbe;
|
2025-02-18 19:59:19 +00:00
|
|
|
w8 = w8be;
|
2012-04-19 21:59:55 +00:00
|
|
|
break;
|
2019-12-04 00:46:28 +00:00
|
|
|
default:
|
|
|
|
fprintf(stderr, "unrecognized ELF data encoding %d: %s\n",
|
2025-01-05 16:22:17 +00:00
|
|
|
ehdr->e32.e_ident[EI_DATA], fname);
|
2019-12-04 00:46:28 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2025-01-05 16:22:17 +00:00
|
|
|
if (memcmp(ELFMAG, ehdr->e32.e_ident, SELFMAG) != 0 ||
|
|
|
|
(r2(&ehdr->e32.e_type) != ET_EXEC && r2(&ehdr->e32.e_type) != ET_DYN) ||
|
|
|
|
ehdr->e32.e_ident[EI_VERSION] != EV_CURRENT) {
|
2016-01-10 10:42:28 +00:00
|
|
|
fprintf(stderr, "unrecognized ET_EXEC/ET_DYN file %s\n", fname);
|
2019-12-04 00:46:27 +00:00
|
|
|
return -1;
|
2012-04-19 21:59:55 +00:00
|
|
|
}
|
|
|
|
|
2025-01-05 16:22:17 +00:00
|
|
|
switch (r2(&ehdr->e32.e_machine)) {
|
2021-11-08 11:42:20 +00:00
|
|
|
case EM_AARCH64:
|
2025-02-18 19:59:19 +00:00
|
|
|
#ifdef MCOUNT_SORT_ENABLED
|
|
|
|
sort_reloc = true;
|
|
|
|
rela_type = 0x403;
|
2025-02-25 18:20:08 +00:00
|
|
|
/* arm64 uses patchable function entry placing before function */
|
|
|
|
before_func = 8;
|
2025-02-18 19:59:19 +00:00
|
|
|
#endif
|
|
|
|
/* fallthrough */
|
|
|
|
case EM_386:
|
2022-12-10 14:39:59 +00:00
|
|
|
case EM_LOONGARCH:
|
2021-11-18 11:26:05 +00:00
|
|
|
case EM_RISCV:
|
2022-02-28 13:52:42 +00:00
|
|
|
case EM_S390:
|
2012-04-19 21:59:55 +00:00
|
|
|
case EM_X86_64:
|
2021-11-08 11:42:20 +00:00
|
|
|
custom_sort = sort_relative_table_with_data;
|
2016-02-17 18:20:12 +00:00
|
|
|
break;
|
2016-03-23 15:00:46 +00:00
|
|
|
case EM_PARISC:
|
2016-10-13 05:42:55 +00:00
|
|
|
case EM_PPC:
|
|
|
|
case EM_PPC64:
|
2012-09-05 11:26:11 +00:00
|
|
|
custom_sort = sort_relative_table;
|
|
|
|
break;
|
2013-11-15 06:38:05 +00:00
|
|
|
case EM_ARCOMPACT:
|
2013-11-22 07:35:58 +00:00
|
|
|
case EM_ARCV2:
|
2012-10-29 18:19:34 +00:00
|
|
|
case EM_ARM:
|
2014-01-23 23:52:46 +00:00
|
|
|
case EM_MICROBLAZE:
|
2012-04-24 18:23:14 +00:00
|
|
|
case EM_MIPS:
|
2014-02-18 11:29:11 +00:00
|
|
|
case EM_XTENSA:
|
2012-04-19 21:59:55 +00:00
|
|
|
break;
|
2019-12-04 00:46:28 +00:00
|
|
|
default:
|
|
|
|
fprintf(stderr, "unrecognized e_machine %d %s\n",
|
2025-01-05 16:22:17 +00:00
|
|
|
r2(&ehdr->e32.e_machine), fname);
|
2019-12-04 00:46:28 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2012-04-19 21:59:55 +00:00
|
|
|
|
2025-01-05 16:22:17 +00:00
|
|
|
switch (ehdr->e32.e_ident[EI_CLASS]) {
|
2025-01-10 12:54:59 +00:00
|
|
|
case ELFCLASS32: {
|
|
|
|
struct elf_funcs efuncs = {
|
|
|
|
.compare_extable = compare_extable_32,
|
|
|
|
.ehdr_shoff = ehdr32_shoff,
|
|
|
|
.ehdr_shentsize = ehdr32_shentsize,
|
|
|
|
.ehdr_shstrndx = ehdr32_shstrndx,
|
|
|
|
.ehdr_shnum = ehdr32_shnum,
|
|
|
|
.shdr_addr = shdr32_addr,
|
|
|
|
.shdr_offset = shdr32_offset,
|
|
|
|
.shdr_link = shdr32_link,
|
|
|
|
.shdr_size = shdr32_size,
|
|
|
|
.shdr_name = shdr32_name,
|
|
|
|
.shdr_type = shdr32_type,
|
|
|
|
.shdr_entsize = shdr32_entsize,
|
|
|
|
.sym_type = sym32_type,
|
|
|
|
.sym_name = sym32_name,
|
|
|
|
.sym_value = sym32_value,
|
|
|
|
.sym_shndx = sym32_shndx,
|
2025-02-18 19:59:19 +00:00
|
|
|
.rela_offset = rela32_offset,
|
|
|
|
.rela_info = rela32_info,
|
|
|
|
.rela_addend = rela32_addend,
|
|
|
|
.rela_write_addend = rela32_write_addend,
|
2025-01-10 12:54:59 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
e = efuncs;
|
|
|
|
long_size = 4;
|
|
|
|
extable_ent_size = 8;
|
|
|
|
|
2025-01-05 16:22:17 +00:00
|
|
|
if (r2(&ehdr->e32.e_ehsize) != sizeof(Elf32_Ehdr) ||
|
|
|
|
r2(&ehdr->e32.e_shentsize) != sizeof(Elf32_Shdr)) {
|
2012-04-19 21:59:55 +00:00
|
|
|
fprintf(stderr,
|
2016-01-10 10:42:28 +00:00
|
|
|
"unrecognized ET_EXEC/ET_DYN file: %s\n", fname);
|
2025-01-08 03:32:17 +00:00
|
|
|
return -1;
|
2012-04-19 21:59:55 +00:00
|
|
|
}
|
2025-01-08 03:32:17 +00:00
|
|
|
|
2025-01-10 12:54:59 +00:00
|
|
|
}
|
2012-04-19 21:59:55 +00:00
|
|
|
break;
|
2025-01-10 12:54:59 +00:00
|
|
|
case ELFCLASS64: {
|
|
|
|
struct elf_funcs efuncs = {
|
|
|
|
.compare_extable = compare_extable_64,
|
|
|
|
.ehdr_shoff = ehdr64_shoff,
|
|
|
|
.ehdr_shentsize = ehdr64_shentsize,
|
|
|
|
.ehdr_shstrndx = ehdr64_shstrndx,
|
|
|
|
.ehdr_shnum = ehdr64_shnum,
|
|
|
|
.shdr_addr = shdr64_addr,
|
|
|
|
.shdr_offset = shdr64_offset,
|
|
|
|
.shdr_link = shdr64_link,
|
|
|
|
.shdr_size = shdr64_size,
|
|
|
|
.shdr_name = shdr64_name,
|
|
|
|
.shdr_type = shdr64_type,
|
|
|
|
.shdr_entsize = shdr64_entsize,
|
|
|
|
.sym_type = sym64_type,
|
|
|
|
.sym_name = sym64_name,
|
|
|
|
.sym_value = sym64_value,
|
|
|
|
.sym_shndx = sym64_shndx,
|
2025-02-18 19:59:19 +00:00
|
|
|
.rela_offset = rela64_offset,
|
|
|
|
.rela_info = rela64_info,
|
|
|
|
.rela_addend = rela64_addend,
|
|
|
|
.rela_write_addend = rela64_write_addend,
|
2025-01-10 12:54:59 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
e = efuncs;
|
|
|
|
long_size = 8;
|
|
|
|
extable_ent_size = 16;
|
|
|
|
|
2025-01-05 16:22:17 +00:00
|
|
|
if (r2(&ehdr->e64.e_ehsize) != sizeof(Elf64_Ehdr) ||
|
|
|
|
r2(&ehdr->e64.e_shentsize) != sizeof(Elf64_Shdr)) {
|
2012-04-19 21:59:55 +00:00
|
|
|
fprintf(stderr,
|
2019-12-04 00:46:28 +00:00
|
|
|
"unrecognized ET_EXEC/ET_DYN file: %s\n",
|
|
|
|
fname);
|
2025-01-08 03:32:17 +00:00
|
|
|
return -1;
|
2019-12-04 00:46:28 +00:00
|
|
|
}
|
2025-01-08 03:32:17 +00:00
|
|
|
|
2025-01-10 12:54:59 +00:00
|
|
|
}
|
2019-12-04 00:46:28 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
fprintf(stderr, "unrecognized ELF class %d %s\n",
|
2025-01-05 16:22:17 +00:00
|
|
|
ehdr->e32.e_ident[EI_CLASS], fname);
|
2025-01-08 03:32:17 +00:00
|
|
|
return -1;
|
2012-04-19 21:59:55 +00:00
|
|
|
}
|
|
|
|
|
2025-01-08 03:32:17 +00:00
|
|
|
return do_sort(ehdr, fname, custom_sort);
|
2012-04-19 21:59:55 +00:00
|
|
|
}
|
|
|
|
|
2019-12-04 00:46:28 +00:00
|
|
|
int main(int argc, char *argv[])
|
2012-04-19 21:59:55 +00:00
|
|
|
{
|
2019-12-04 00:46:27 +00:00
|
|
|
int i, n_error = 0; /* gcc-4.3.0 false positive complaint */
|
|
|
|
size_t size = 0;
|
|
|
|
void *addr = NULL;
|
scripts/sorttable: Zero out weak functions in mcount_loc table
When a function is annotated as "weak" and is overridden, the code is not
removed. If it is traced, the fentry/mcount location in the weak function
will be referenced by the "__mcount_loc" section. This will then be added
to the available_filter_functions list. Since only the address of the
functions are listed, to find the name to show, a search of kallsyms is
used.
Since kallsyms will return the function by simply finding the function
that the address is after but before the next function, an address of a
weak function will show up as the function before it. This is because
kallsyms does not save names of weak functions. This has caused issues in
the past, as now the traced weak function will be listed in
available_filter_functions with the name of the function before it.
At best, this will cause the previous function's name to be listed twice.
At worse, if the previous function was marked notrace, it will now show up
as a function that can be traced. Note that it only shows up that it can
be traced but will not be if enabled, which causes confusion.
https://lore.kernel.org/all/20220412094923.0abe90955e5db486b7bca279@kernel.org/
The commit b39181f7c6907 ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to avoid
adding weak function") was a workaround to this by checking the function
address before printing its name. If the address was too far from the
function given by the name then instead of printing the name it would
print: __ftrace_invalid_address___<invalid-offset>
The real issue is that these invalid addresses are listed in the ftrace
table look up which available_filter_functions is derived from. A place
holder must be listed in that file because set_ftrace_filter may take a
series of indexes into that file instead of names to be able to do O(1)
lookups to enable filtering (many tools use this method).
Even if kallsyms saved the size of the function, it does not remove the
need of having these place holders. The real solution is to not add a weak
function into the ftrace table in the first place.
To solve this, the sorttable.c code that sorts the mcount regions during
the build is modified to take a "nm -S vmlinux" input, sort it, and any
function listed in the mcount_loc section that is not within a boundary of
the function list given by nm is considered a weak function and is zeroed
out.
Note, this does not mean they will remain zero when booting as KASLR
will still shift those addresses. To handle this, the entries in the
mcount_loc section will be ignored if they are zero or match the
kaslr_offset() value.
Before:
~# grep __ftrace_invalid_address___ /sys/kernel/tracing/available_filter_functions | wc -l
551
After:
~# grep __ftrace_invalid_address___ /sys/kernel/tracing/available_filter_functions | wc -l
0
Cc: bpf <bpf@vger.kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Zheng Yejian <zhengyejian1@huawei.com>
Cc: Martin Kelly <martin.kelly@crowdstrike.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Link: https://lore.kernel.org/20250218200022.883095980@goodmis.org
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2025-02-18 19:59:22 +00:00
|
|
|
int c;
|
|
|
|
|
|
|
|
while ((c = getopt(argc, argv, "s:")) >= 0) {
|
|
|
|
switch (c) {
|
|
|
|
case 's':
|
|
|
|
if (parse_symbols(optarg) < 0) {
|
|
|
|
fprintf(stderr, "Could not parse %s\n", optarg);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
fprintf(stderr, "usage: sorttable [-s nm-file] vmlinux...\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
2012-04-19 21:59:55 +00:00
|
|
|
|
scripts/sorttable: Zero out weak functions in mcount_loc table
When a function is annotated as "weak" and is overridden, the code is not
removed. If it is traced, the fentry/mcount location in the weak function
will be referenced by the "__mcount_loc" section. This will then be added
to the available_filter_functions list. Since only the address of the
functions are listed, to find the name to show, a search of kallsyms is
used.
Since kallsyms will return the function by simply finding the function
that the address is after but before the next function, an address of a
weak function will show up as the function before it. This is because
kallsyms does not save names of weak functions. This has caused issues in
the past, as now the traced weak function will be listed in
available_filter_functions with the name of the function before it.
At best, this will cause the previous function's name to be listed twice.
At worse, if the previous function was marked notrace, it will now show up
as a function that can be traced. Note that it only shows up that it can
be traced but will not be if enabled, which causes confusion.
https://lore.kernel.org/all/20220412094923.0abe90955e5db486b7bca279@kernel.org/
The commit b39181f7c6907 ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to avoid
adding weak function") was a workaround to this by checking the function
address before printing its name. If the address was too far from the
function given by the name then instead of printing the name it would
print: __ftrace_invalid_address___<invalid-offset>
The real issue is that these invalid addresses are listed in the ftrace
table look up which available_filter_functions is derived from. A place
holder must be listed in that file because set_ftrace_filter may take a
series of indexes into that file instead of names to be able to do O(1)
lookups to enable filtering (many tools use this method).
Even if kallsyms saved the size of the function, it does not remove the
need of having these place holders. The real solution is to not add a weak
function into the ftrace table in the first place.
To solve this, the sorttable.c code that sorts the mcount regions during
the build is modified to take a "nm -S vmlinux" input, sort it, and any
function listed in the mcount_loc section that is not within a boundary of
the function list given by nm is considered a weak function and is zeroed
out.
Note, this does not mean they will remain zero when booting as KASLR
will still shift those addresses. To handle this, the entries in the
mcount_loc section will be ignored if they are zero or match the
kaslr_offset() value.
Before:
~# grep __ftrace_invalid_address___ /sys/kernel/tracing/available_filter_functions | wc -l
551
After:
~# grep __ftrace_invalid_address___ /sys/kernel/tracing/available_filter_functions | wc -l
0
Cc: bpf <bpf@vger.kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Zheng Yejian <zhengyejian1@huawei.com>
Cc: Martin Kelly <martin.kelly@crowdstrike.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Link: https://lore.kernel.org/20250218200022.883095980@goodmis.org
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2025-02-18 19:59:22 +00:00
|
|
|
if ((argc - optind) < 1) {
|
2019-12-04 00:46:31 +00:00
|
|
|
fprintf(stderr, "usage: sorttable vmlinux...\n");
|
2012-04-19 21:59:55 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Process each file in turn, allowing deep failure. */
|
scripts/sorttable: Zero out weak functions in mcount_loc table
When a function is annotated as "weak" and is overridden, the code is not
removed. If it is traced, the fentry/mcount location in the weak function
will be referenced by the "__mcount_loc" section. This will then be added
to the available_filter_functions list. Since only the address of the
functions are listed, to find the name to show, a search of kallsyms is
used.
Since kallsyms will return the function by simply finding the function
that the address is after but before the next function, an address of a
weak function will show up as the function before it. This is because
kallsyms does not save names of weak functions. This has caused issues in
the past, as now the traced weak function will be listed in
available_filter_functions with the name of the function before it.
At best, this will cause the previous function's name to be listed twice.
At worse, if the previous function was marked notrace, it will now show up
as a function that can be traced. Note that it only shows up that it can
be traced but will not be if enabled, which causes confusion.
https://lore.kernel.org/all/20220412094923.0abe90955e5db486b7bca279@kernel.org/
The commit b39181f7c6907 ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to avoid
adding weak function") was a workaround to this by checking the function
address before printing its name. If the address was too far from the
function given by the name then instead of printing the name it would
print: __ftrace_invalid_address___<invalid-offset>
The real issue is that these invalid addresses are listed in the ftrace
table look up which available_filter_functions is derived from. A place
holder must be listed in that file because set_ftrace_filter may take a
series of indexes into that file instead of names to be able to do O(1)
lookups to enable filtering (many tools use this method).
Even if kallsyms saved the size of the function, it does not remove the
need of having these place holders. The real solution is to not add a weak
function into the ftrace table in the first place.
To solve this, the sorttable.c code that sorts the mcount regions during
the build is modified to take a "nm -S vmlinux" input, sort it, and any
function listed in the mcount_loc section that is not within a boundary of
the function list given by nm is considered a weak function and is zeroed
out.
Note, this does not mean they will remain zero when booting as KASLR
will still shift those addresses. To handle this, the entries in the
mcount_loc section will be ignored if they are zero or match the
kaslr_offset() value.
Before:
~# grep __ftrace_invalid_address___ /sys/kernel/tracing/available_filter_functions | wc -l
551
After:
~# grep __ftrace_invalid_address___ /sys/kernel/tracing/available_filter_functions | wc -l
0
Cc: bpf <bpf@vger.kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Zheng Yejian <zhengyejian1@huawei.com>
Cc: Martin Kelly <martin.kelly@crowdstrike.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Link: https://lore.kernel.org/20250218200022.883095980@goodmis.org
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2025-02-18 19:59:22 +00:00
|
|
|
for (i = optind; i < argc; i++) {
|
2019-12-04 00:46:27 +00:00
|
|
|
addr = mmap_file(argv[i], &size);
|
|
|
|
if (!addr) {
|
|
|
|
++n_error;
|
|
|
|
continue;
|
|
|
|
}
|
2012-04-19 21:59:55 +00:00
|
|
|
|
2019-12-04 00:46:27 +00:00
|
|
|
if (do_file(argv[i], addr))
|
2012-04-19 21:59:55 +00:00
|
|
|
++n_error;
|
2019-12-04 00:46:27 +00:00
|
|
|
|
|
|
|
munmap(addr, size);
|
2012-04-19 21:59:55 +00:00
|
|
|
}
|
2019-12-04 00:46:28 +00:00
|
|
|
|
2012-04-19 21:59:55 +00:00
|
|
|
return !!n_error;
|
|
|
|
}
|