2019-05-19 13:51:43 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2017-06-28 15:11:05 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdlib.h>
|
2022-05-16 15:06:36 +00:00
|
|
|
#include <inttypes.h>
|
2021-06-24 09:41:01 +00:00
|
|
|
#include <sys/mman.h>
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2020-11-12 23:03:32 +00:00
|
|
|
#include <objtool/builtin.h>
|
|
|
|
#include <objtool/cfi.h>
|
|
|
|
#include <objtool/arch.h>
|
|
|
|
#include <objtool/check.h>
|
|
|
|
#include <objtool/special.h>
|
|
|
|
#include <objtool/warn.h>
|
|
|
|
#include <objtool/endianness.h>
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2023-03-01 15:13:07 +00:00
|
|
|
#include <linux/objtool_types.h>
|
2017-06-28 15:11:05 +00:00
|
|
|
#include <linux/hashtable.h>
|
|
|
|
#include <linux/kernel.h>
|
2020-08-18 13:57:45 +00:00
|
|
|
#include <linux/static_call_types.h>
|
2024-03-11 14:23:47 +00:00
|
|
|
#include <linux/string.h>
|
2017-06-28 15:11:05 +00:00
|
|
|
|
|
|
|
struct alternative {
|
objtool: Make instruction::alts a single-linked list
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
unsigned int len; /* 64 4 */
enum insn_type type; /* 68 4 */
long unsigned int immediate; /* 72 8 */
u16 dead_end:1; /* 80: 0 2 */
u16 ignore:1; /* 80: 1 2 */
u16 ignore_alts:1; /* 80: 2 2 */
u16 hint:1; /* 80: 3 2 */
u16 save:1; /* 80: 4 2 */
u16 restore:1; /* 80: 5 2 */
u16 retpoline_safe:1; /* 80: 6 2 */
u16 noendbr:1; /* 80: 7 2 */
u16 entry:1; /* 80: 8 2 */
/* XXX 7 bits hole, try to pack */
s8 instr; /* 82 1 */
u8 visited; /* 83 1 */
/* XXX 4 bytes hole, try to pack */
struct alt_group * alt_group; /* 88 8 */
struct symbol * call_dest; /* 96 8 */
struct instruction * jump_dest; /* 104 8 */
struct instruction * first_jump_src; /* 112 8 */
struct reloc * jump_table; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
struct reloc * reloc; /* 128 8 */
- struct list_head alts; /* 136 16 */
- struct symbol * sym; /* 152 8 */
- struct stack_op * stack_ops; /* 160 8 */
- struct cfi_state * cfi; /* 168 8 */
+ struct alternative * alts; /* 136 8 */
+ struct symbol * sym; /* 144 8 */
+ struct stack_op * stack_ops; /* 152 8 */
+ struct cfi_state * cfi; /* 160 8 */
- /* size: 176, cachelines: 3, members: 29 */
- /* sum members: 170, holes: 1, sum holes: 4 */
+ /* size: 168, cachelines: 3, members: 29 */
+ /* sum members: 162, holes: 1, sum holes: 4 */
/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
- /* last cacheline: 48 bytes */
+ /* last cacheline: 40 bytes */
};
pre: 5:58.50 real, 229.64 user, 128.65 sys, 26221520 mem
post: 5:48.86 real, 220.30 user, 128.34 sys, 24834672 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.430556498@infradead.org
2023-02-08 17:17:59 +00:00
|
|
|
struct alternative *next;
|
2017-06-28 15:11:05 +00:00
|
|
|
struct instruction *insn;
|
|
|
|
};
|
|
|
|
|
2021-06-24 09:41:01 +00:00
|
|
|
static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache;
|
|
|
|
|
|
|
|
static struct cfi_init_state initial_func_cfi;
|
|
|
|
static struct cfi_state init_cfi;
|
|
|
|
static struct cfi_state func_cfi;
|
2023-06-05 16:12:21 +00:00
|
|
|
static struct cfi_state force_undefined_cfi;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2017-07-11 15:33:42 +00:00
|
|
|
struct instruction *find_insn(struct objtool_file *file,
|
|
|
|
struct section *sec, unsigned long offset)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
|
|
|
struct instruction *insn;
|
|
|
|
|
2020-03-16 14:47:27 +00:00
|
|
|
hash_for_each_possible(file->insn_hash, insn, hash, sec_offset_hash(sec, offset)) {
|
2017-06-28 15:11:05 +00:00
|
|
|
if (insn->sec == sec && insn->offset == offset)
|
|
|
|
return insn;
|
2020-03-16 14:47:27 +00:00
|
|
|
}
|
2017-06-28 15:11:05 +00:00
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
struct instruction *next_insn_same_sec(struct objtool_file *file,
|
|
|
|
struct instruction *insn)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
if (insn->idx == INSN_CHUNK_MAX)
|
|
|
|
return find_insn(file, insn->sec, insn->offset + insn->len);
|
2017-06-28 15:11:05 +00:00
|
|
|
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
insn++;
|
|
|
|
if (!insn->len)
|
2017-06-28 15:11:05 +00:00
|
|
|
return NULL;
|
|
|
|
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
return insn;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2018-05-10 03:39:15 +00:00
|
|
|
static struct instruction *next_insn_same_func(struct objtool_file *file,
|
|
|
|
struct instruction *insn)
|
|
|
|
{
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
struct instruction *next = next_insn_same_sec(file, insn);
|
2022-09-22 20:03:50 +00:00
|
|
|
struct symbol *func = insn_func(insn);
|
2018-05-10 03:39:15 +00:00
|
|
|
|
|
|
|
if (!func)
|
|
|
|
return NULL;
|
|
|
|
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
if (next && insn_func(next) == func)
|
2018-05-10 03:39:15 +00:00
|
|
|
return next;
|
|
|
|
|
|
|
|
/* Check if we're already in the subfunction: */
|
|
|
|
if (func == func->cfunc)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* Move to the subfunction: */
|
|
|
|
return find_insn(file, func->cfunc->sec, func->cfunc->offset);
|
|
|
|
}
|
|
|
|
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
static struct instruction *prev_insn_same_sec(struct objtool_file *file,
|
|
|
|
struct instruction *insn)
|
|
|
|
{
|
|
|
|
if (insn->idx == 0) {
|
|
|
|
if (insn->prev_len)
|
|
|
|
return find_insn(file, insn->sec, insn->offset - insn->prev_len);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return insn - 1;
|
|
|
|
}
|
|
|
|
|
2020-04-28 21:45:16 +00:00
|
|
|
static struct instruction *prev_insn_same_sym(struct objtool_file *file,
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
struct instruction *insn)
|
2020-04-28 21:45:16 +00:00
|
|
|
{
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
struct instruction *prev = prev_insn_same_sec(file, insn);
|
2020-04-28 21:45:16 +00:00
|
|
|
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
if (prev && insn_func(prev) == insn_func(insn))
|
2020-04-28 21:45:16 +00:00
|
|
|
return prev;
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
#define for_each_insn(file, insn) \
|
|
|
|
for (struct section *__sec, *__fake = (struct section *)1; \
|
|
|
|
__fake; __fake = NULL) \
|
|
|
|
for_each_sec(file, __sec) \
|
|
|
|
sec_for_each_insn(file, __sec, insn)
|
|
|
|
|
2020-03-10 17:27:24 +00:00
|
|
|
#define func_for_each_insn(file, func, insn) \
|
2018-05-10 03:39:15 +00:00
|
|
|
for (insn = find_insn(file, func->sec, func->offset); \
|
|
|
|
insn; \
|
|
|
|
insn = next_insn_same_func(file, insn))
|
|
|
|
|
2020-03-10 17:24:59 +00:00
|
|
|
#define sym_for_each_insn(file, sym, insn) \
|
|
|
|
for (insn = find_insn(file, sym->sec, sym->offset); \
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
insn && insn->offset < sym->offset + sym->len; \
|
|
|
|
insn = next_insn_same_sec(file, insn))
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2020-03-10 17:24:59 +00:00
|
|
|
#define sym_for_each_insn_continue_reverse(file, sym, insn) \
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
for (insn = prev_insn_same_sec(file, insn); \
|
|
|
|
insn && insn->offset >= sym->offset; \
|
|
|
|
insn = prev_insn_same_sec(file, insn))
|
2017-06-28 15:11:05 +00:00
|
|
|
|
|
|
|
#define sec_for_each_insn_from(file, insn) \
|
|
|
|
for (; insn; insn = next_insn_same_sec(file, insn))
|
|
|
|
|
2017-06-28 15:11:07 +00:00
|
|
|
#define sec_for_each_insn_continue(file, insn) \
|
|
|
|
for (insn = next_insn_same_sec(file, insn); insn; \
|
|
|
|
insn = next_insn_same_sec(file, insn))
|
2017-06-28 15:11:05 +00:00
|
|
|
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
static inline struct symbol *insn_call_dest(struct instruction *insn)
|
|
|
|
{
|
|
|
|
if (insn->type == INSN_JUMP_DYNAMIC ||
|
|
|
|
insn->type == INSN_CALL_DYNAMIC)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return insn->_call_dest;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct reloc *insn_jump_table(struct instruction *insn)
|
|
|
|
{
|
|
|
|
if (insn->type == INSN_JUMP_DYNAMIC ||
|
|
|
|
insn->type == INSN_CALL_DYNAMIC)
|
|
|
|
return insn->_jump_table;
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2024-10-11 17:08:50 +00:00
|
|
|
static inline unsigned long insn_jump_table_size(struct instruction *insn)
|
|
|
|
{
|
|
|
|
if (insn->type == INSN_JUMP_DYNAMIC ||
|
|
|
|
insn->type == INSN_CALL_DYNAMIC)
|
|
|
|
return insn->_jump_table_size;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-02-24 16:29:14 +00:00
|
|
|
static bool is_jump_table_jump(struct instruction *insn)
|
|
|
|
{
|
|
|
|
struct alt_group *alt_group = insn->alt_group;
|
|
|
|
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
if (insn_jump_table(insn))
|
2021-02-24 16:29:14 +00:00
|
|
|
return true;
|
|
|
|
|
|
|
|
/* Retpoline alternative for a jump table? */
|
|
|
|
return alt_group && alt_group->orig_group &&
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
insn_jump_table(alt_group->orig_group->first_insn);
|
2021-02-24 16:29:14 +00:00
|
|
|
}
|
|
|
|
|
2019-07-18 01:36:52 +00:00
|
|
|
static bool is_sibling_call(struct instruction *insn)
|
|
|
|
{
|
2021-01-21 21:29:22 +00:00
|
|
|
/*
|
2022-07-11 09:49:50 +00:00
|
|
|
* Assume only STT_FUNC calls have jump-tables.
|
2021-01-21 21:29:22 +00:00
|
|
|
*/
|
2022-07-11 09:49:50 +00:00
|
|
|
if (insn_func(insn)) {
|
|
|
|
/* An indirect jump is either a sibling call or a jump to a table. */
|
|
|
|
if (insn->type == INSN_JUMP_DYNAMIC)
|
|
|
|
return !is_jump_table_jump(insn);
|
|
|
|
}
|
2019-07-18 01:36:52 +00:00
|
|
|
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
/* add_jump_destinations() sets insn_call_dest(insn) for sibling calls. */
|
|
|
|
return (is_static_jump(insn) && insn_call_dest(insn));
|
2019-07-18 01:36:52 +00:00
|
|
|
}
|
|
|
|
|
objtool/rust: list `noreturn` Rust functions
Rust functions may be `noreturn` (i.e. diverging) by returning the
"never" type, `!`, e.g.
fn f() -> ! {
loop {}
}
Thus list the known `noreturn` functions to avoid such warnings.
Without this, `objtool` would complain if enabled for Rust, e.g.:
rust/core.o: warning: objtool:
_R...9panic_fmt() falls through to next function _R...18panic_nounwind_fmt()
rust/alloc.o: warning: objtool:
.text: unexpected end of section
In order to do so, we cannot match symbols' names exactly, for two
reasons:
- Rust mangling scheme [1] contains disambiguators [2] which we
cannot predict (e.g. they may vary depending on the compiler version).
One possibility to solve this would be to parse v0 and ignore/zero
those before comparison.
- Some of the diverging functions come from `core`, i.e. the Rust
standard library, which may change with each compiler version
since they are implementation details (e.g. `panic_internals`).
Thus, to workaround both issues, only part of the symbols are matched,
instead of using the `NORETURN` macro in `noreturns.h`.
Ideally, just like for the C side, we should have a better solution. For
instance, the compiler could give us the list via something like:
$ rustc --emit=noreturns ...
[ Kees agrees this should be automated and Peter says:
So it would be fairly simple to make objtool consume a magic section
emitted by the compiler.. I think we've asked the compiler folks
for that at some point even, but I don't have clear recollections.
We will ask upstream Rust about it. And if they agree, then perhaps
we can get Clang/GCC to implement something similar too -- for this
sort of thing we can take advantage of the shorter cycles of `rustc`
as well as their unstable features concept to experiment.
Gary proposed using DWARF (though it would need to be available), and
wrote a proof of concept script using the `object` and `gimli` crates:
https://gist.github.com/nbdd0121/449692570622c2f46a29ad9f47c3379a
- Miguel ]
Link: https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html [1]
Link: https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#disambiguator [2]
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Kees Cook <kees@kernel.org>
Tested-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20240725183325.122827-6-ojeda@kernel.org
[ Added `len_mismatch_fail` symbol for new `kernel` crate code merged
since then as well as 3 more `core::panicking` symbols that appear
in `RUST_DEBUG_ASSERTIONS=y` builds. - Miguel ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
2024-07-25 18:33:22 +00:00
|
|
|
/*
|
|
|
|
* Checks if a string ends with another.
|
|
|
|
*/
|
|
|
|
static bool str_ends_with(const char *s, const char *sub)
|
|
|
|
{
|
|
|
|
const int slen = strlen(s);
|
|
|
|
const int sublen = strlen(sub);
|
|
|
|
|
|
|
|
if (sublen > slen)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return !memcmp(s + slen - sublen, sub, sublen);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Checks if a function is a Rust "noreturn" one.
|
|
|
|
*/
|
|
|
|
static bool is_rust_noreturn(const struct symbol *func)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If it does not start with "_R", then it is not a Rust symbol.
|
|
|
|
*/
|
|
|
|
if (strncmp(func->name, "_R", 2))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* These are just heuristics -- we do not control the precise symbol
|
|
|
|
* name, due to the crate disambiguators (which depend on the compiler)
|
|
|
|
* as well as changes to the source code itself between versions (since
|
|
|
|
* these come from the Rust standard library).
|
|
|
|
*/
|
|
|
|
return str_ends_with(func->name, "_4core5sliceSp15copy_from_slice17len_mismatch_fail") ||
|
|
|
|
str_ends_with(func->name, "_4core6option13unwrap_failed") ||
|
|
|
|
str_ends_with(func->name, "_4core6result13unwrap_failed") ||
|
|
|
|
str_ends_with(func->name, "_4core9panicking5panic") ||
|
|
|
|
str_ends_with(func->name, "_4core9panicking9panic_fmt") ||
|
|
|
|
str_ends_with(func->name, "_4core9panicking14panic_explicit") ||
|
|
|
|
str_ends_with(func->name, "_4core9panicking14panic_nounwind") ||
|
|
|
|
str_ends_with(func->name, "_4core9panicking18panic_bounds_check") ||
|
2025-07-12 16:01:02 +00:00
|
|
|
str_ends_with(func->name, "_4core9panicking18panic_nounwind_fmt") ||
|
objtool/rust: list `noreturn` Rust functions
Rust functions may be `noreturn` (i.e. diverging) by returning the
"never" type, `!`, e.g.
fn f() -> ! {
loop {}
}
Thus list the known `noreturn` functions to avoid such warnings.
Without this, `objtool` would complain if enabled for Rust, e.g.:
rust/core.o: warning: objtool:
_R...9panic_fmt() falls through to next function _R...18panic_nounwind_fmt()
rust/alloc.o: warning: objtool:
.text: unexpected end of section
In order to do so, we cannot match symbols' names exactly, for two
reasons:
- Rust mangling scheme [1] contains disambiguators [2] which we
cannot predict (e.g. they may vary depending on the compiler version).
One possibility to solve this would be to parse v0 and ignore/zero
those before comparison.
- Some of the diverging functions come from `core`, i.e. the Rust
standard library, which may change with each compiler version
since they are implementation details (e.g. `panic_internals`).
Thus, to workaround both issues, only part of the symbols are matched,
instead of using the `NORETURN` macro in `noreturns.h`.
Ideally, just like for the C side, we should have a better solution. For
instance, the compiler could give us the list via something like:
$ rustc --emit=noreturns ...
[ Kees agrees this should be automated and Peter says:
So it would be fairly simple to make objtool consume a magic section
emitted by the compiler.. I think we've asked the compiler folks
for that at some point even, but I don't have clear recollections.
We will ask upstream Rust about it. And if they agree, then perhaps
we can get Clang/GCC to implement something similar too -- for this
sort of thing we can take advantage of the shorter cycles of `rustc`
as well as their unstable features concept to experiment.
Gary proposed using DWARF (though it would need to be available), and
wrote a proof of concept script using the `object` and `gimli` crates:
https://gist.github.com/nbdd0121/449692570622c2f46a29ad9f47c3379a
- Miguel ]
Link: https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html [1]
Link: https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#disambiguator [2]
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Kees Cook <kees@kernel.org>
Tested-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20240725183325.122827-6-ojeda@kernel.org
[ Added `len_mismatch_fail` symbol for new `kernel` crate code merged
since then as well as 3 more `core::panicking` symbols that appear
in `RUST_DEBUG_ASSERTIONS=y` builds. - Miguel ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
2024-07-25 18:33:22 +00:00
|
|
|
str_ends_with(func->name, "_4core9panicking19assert_failed_inner") ||
|
2025-04-13 00:23:38 +00:00
|
|
|
str_ends_with(func->name, "_4core9panicking30panic_null_pointer_dereference") ||
|
objtool/rust: list `noreturn` Rust functions
Rust functions may be `noreturn` (i.e. diverging) by returning the
"never" type, `!`, e.g.
fn f() -> ! {
loop {}
}
Thus list the known `noreturn` functions to avoid such warnings.
Without this, `objtool` would complain if enabled for Rust, e.g.:
rust/core.o: warning: objtool:
_R...9panic_fmt() falls through to next function _R...18panic_nounwind_fmt()
rust/alloc.o: warning: objtool:
.text: unexpected end of section
In order to do so, we cannot match symbols' names exactly, for two
reasons:
- Rust mangling scheme [1] contains disambiguators [2] which we
cannot predict (e.g. they may vary depending on the compiler version).
One possibility to solve this would be to parse v0 and ignore/zero
those before comparison.
- Some of the diverging functions come from `core`, i.e. the Rust
standard library, which may change with each compiler version
since they are implementation details (e.g. `panic_internals`).
Thus, to workaround both issues, only part of the symbols are matched,
instead of using the `NORETURN` macro in `noreturns.h`.
Ideally, just like for the C side, we should have a better solution. For
instance, the compiler could give us the list via something like:
$ rustc --emit=noreturns ...
[ Kees agrees this should be automated and Peter says:
So it would be fairly simple to make objtool consume a magic section
emitted by the compiler.. I think we've asked the compiler folks
for that at some point even, but I don't have clear recollections.
We will ask upstream Rust about it. And if they agree, then perhaps
we can get Clang/GCC to implement something similar too -- for this
sort of thing we can take advantage of the shorter cycles of `rustc`
as well as their unstable features concept to experiment.
Gary proposed using DWARF (though it would need to be available), and
wrote a proof of concept script using the `object` and `gimli` crates:
https://gist.github.com/nbdd0121/449692570622c2f46a29ad9f47c3379a
- Miguel ]
Link: https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html [1]
Link: https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#disambiguator [2]
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Kees Cook <kees@kernel.org>
Tested-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20240725183325.122827-6-ojeda@kernel.org
[ Added `len_mismatch_fail` symbol for new `kernel` crate code merged
since then as well as 3 more `core::panicking` symbols that appear
in `RUST_DEBUG_ASSERTIONS=y` builds. - Miguel ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
2024-07-25 18:33:22 +00:00
|
|
|
str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") ||
|
2025-05-02 14:02:33 +00:00
|
|
|
str_ends_with(func->name, "_7___rustc17rust_begin_unwind") ||
|
2025-01-12 14:39:51 +00:00
|
|
|
strstr(func->name, "_4core9panicking13assert_failed") ||
|
objtool/rust: list `noreturn` Rust functions
Rust functions may be `noreturn` (i.e. diverging) by returning the
"never" type, `!`, e.g.
fn f() -> ! {
loop {}
}
Thus list the known `noreturn` functions to avoid such warnings.
Without this, `objtool` would complain if enabled for Rust, e.g.:
rust/core.o: warning: objtool:
_R...9panic_fmt() falls through to next function _R...18panic_nounwind_fmt()
rust/alloc.o: warning: objtool:
.text: unexpected end of section
In order to do so, we cannot match symbols' names exactly, for two
reasons:
- Rust mangling scheme [1] contains disambiguators [2] which we
cannot predict (e.g. they may vary depending on the compiler version).
One possibility to solve this would be to parse v0 and ignore/zero
those before comparison.
- Some of the diverging functions come from `core`, i.e. the Rust
standard library, which may change with each compiler version
since they are implementation details (e.g. `panic_internals`).
Thus, to workaround both issues, only part of the symbols are matched,
instead of using the `NORETURN` macro in `noreturns.h`.
Ideally, just like for the C side, we should have a better solution. For
instance, the compiler could give us the list via something like:
$ rustc --emit=noreturns ...
[ Kees agrees this should be automated and Peter says:
So it would be fairly simple to make objtool consume a magic section
emitted by the compiler.. I think we've asked the compiler folks
for that at some point even, but I don't have clear recollections.
We will ask upstream Rust about it. And if they agree, then perhaps
we can get Clang/GCC to implement something similar too -- for this
sort of thing we can take advantage of the shorter cycles of `rustc`
as well as their unstable features concept to experiment.
Gary proposed using DWARF (though it would need to be available), and
wrote a proof of concept script using the `object` and `gimli` crates:
https://gist.github.com/nbdd0121/449692570622c2f46a29ad9f47c3379a
- Miguel ]
Link: https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html [1]
Link: https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#disambiguator [2]
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Kees Cook <kees@kernel.org>
Tested-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20240725183325.122827-6-ojeda@kernel.org
[ Added `len_mismatch_fail` symbol for new `kernel` crate code merged
since then as well as 3 more `core::panicking` symbols that appear
in `RUST_DEBUG_ASSERTIONS=y` builds. - Miguel ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
2024-07-25 18:33:22 +00:00
|
|
|
strstr(func->name, "_4core9panicking11panic_const24panic_const_") ||
|
2025-05-20 18:55:55 +00:00
|
|
|
(strstr(func->name, "_4core5slice5index") &&
|
|
|
|
strstr(func->name, "slice_") &&
|
objtool/rust: list `noreturn` Rust functions
Rust functions may be `noreturn` (i.e. diverging) by returning the
"never" type, `!`, e.g.
fn f() -> ! {
loop {}
}
Thus list the known `noreturn` functions to avoid such warnings.
Without this, `objtool` would complain if enabled for Rust, e.g.:
rust/core.o: warning: objtool:
_R...9panic_fmt() falls through to next function _R...18panic_nounwind_fmt()
rust/alloc.o: warning: objtool:
.text: unexpected end of section
In order to do so, we cannot match symbols' names exactly, for two
reasons:
- Rust mangling scheme [1] contains disambiguators [2] which we
cannot predict (e.g. they may vary depending on the compiler version).
One possibility to solve this would be to parse v0 and ignore/zero
those before comparison.
- Some of the diverging functions come from `core`, i.e. the Rust
standard library, which may change with each compiler version
since they are implementation details (e.g. `panic_internals`).
Thus, to workaround both issues, only part of the symbols are matched,
instead of using the `NORETURN` macro in `noreturns.h`.
Ideally, just like for the C side, we should have a better solution. For
instance, the compiler could give us the list via something like:
$ rustc --emit=noreturns ...
[ Kees agrees this should be automated and Peter says:
So it would be fairly simple to make objtool consume a magic section
emitted by the compiler.. I think we've asked the compiler folks
for that at some point even, but I don't have clear recollections.
We will ask upstream Rust about it. And if they agree, then perhaps
we can get Clang/GCC to implement something similar too -- for this
sort of thing we can take advantage of the shorter cycles of `rustc`
as well as their unstable features concept to experiment.
Gary proposed using DWARF (though it would need to be available), and
wrote a proof of concept script using the `object` and `gimli` crates:
https://gist.github.com/nbdd0121/449692570622c2f46a29ad9f47c3379a
- Miguel ]
Link: https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html [1]
Link: https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#disambiguator [2]
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Kees Cook <kees@kernel.org>
Tested-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20240725183325.122827-6-ojeda@kernel.org
[ Added `len_mismatch_fail` symbol for new `kernel` crate code merged
since then as well as 3 more `core::panicking` symbols that appear
in `RUST_DEBUG_ASSERTIONS=y` builds. - Miguel ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
2024-07-25 18:33:22 +00:00
|
|
|
str_ends_with(func->name, "_fail"));
|
|
|
|
}
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
/*
|
|
|
|
* This checks to see if the given function is a "noreturn" function.
|
|
|
|
*
|
|
|
|
* For global functions which are outside the scope of this object file, we
|
|
|
|
* have to keep a manual list of them.
|
|
|
|
*
|
|
|
|
* For local functions, we have to detect them manually by simply looking for
|
|
|
|
* the lack of a return instruction.
|
|
|
|
*/
|
2019-07-18 01:36:50 +00:00
|
|
|
static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
|
|
|
|
int recursion)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct instruction *insn;
|
|
|
|
bool empty = true;
|
|
|
|
|
2023-04-18 21:27:54 +00:00
|
|
|
#define NORETURN(func) __stringify(func),
|
2017-06-28 15:11:05 +00:00
|
|
|
static const char * const global_noreturns[] = {
|
2023-04-18 21:27:54 +00:00
|
|
|
#include "noreturns.h"
|
2017-06-28 15:11:05 +00:00
|
|
|
};
|
2023-04-18 21:27:54 +00:00
|
|
|
#undef NORETURN
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2019-07-18 01:36:51 +00:00
|
|
|
if (!func)
|
|
|
|
return false;
|
|
|
|
|
objtool/rust: list `noreturn` Rust functions
Rust functions may be `noreturn` (i.e. diverging) by returning the
"never" type, `!`, e.g.
fn f() -> ! {
loop {}
}
Thus list the known `noreturn` functions to avoid such warnings.
Without this, `objtool` would complain if enabled for Rust, e.g.:
rust/core.o: warning: objtool:
_R...9panic_fmt() falls through to next function _R...18panic_nounwind_fmt()
rust/alloc.o: warning: objtool:
.text: unexpected end of section
In order to do so, we cannot match symbols' names exactly, for two
reasons:
- Rust mangling scheme [1] contains disambiguators [2] which we
cannot predict (e.g. they may vary depending on the compiler version).
One possibility to solve this would be to parse v0 and ignore/zero
those before comparison.
- Some of the diverging functions come from `core`, i.e. the Rust
standard library, which may change with each compiler version
since they are implementation details (e.g. `panic_internals`).
Thus, to workaround both issues, only part of the symbols are matched,
instead of using the `NORETURN` macro in `noreturns.h`.
Ideally, just like for the C side, we should have a better solution. For
instance, the compiler could give us the list via something like:
$ rustc --emit=noreturns ...
[ Kees agrees this should be automated and Peter says:
So it would be fairly simple to make objtool consume a magic section
emitted by the compiler.. I think we've asked the compiler folks
for that at some point even, but I don't have clear recollections.
We will ask upstream Rust about it. And if they agree, then perhaps
we can get Clang/GCC to implement something similar too -- for this
sort of thing we can take advantage of the shorter cycles of `rustc`
as well as their unstable features concept to experiment.
Gary proposed using DWARF (though it would need to be available), and
wrote a proof of concept script using the `object` and `gimli` crates:
https://gist.github.com/nbdd0121/449692570622c2f46a29ad9f47c3379a
- Miguel ]
Link: https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html [1]
Link: https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#disambiguator [2]
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Kees Cook <kees@kernel.org>
Tested-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20240725183325.122827-6-ojeda@kernel.org
[ Added `len_mismatch_fail` symbol for new `kernel` crate code merged
since then as well as 3 more `core::panicking` symbols that appear
in `RUST_DEBUG_ASSERTIONS=y` builds. - Miguel ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
2024-07-25 18:33:22 +00:00
|
|
|
if (func->bind == STB_GLOBAL || func->bind == STB_WEAK) {
|
|
|
|
if (is_rust_noreturn(func))
|
|
|
|
return true;
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
for (i = 0; i < ARRAY_SIZE(global_noreturns); i++)
|
|
|
|
if (!strcmp(func->name, global_noreturns[i]))
|
2019-07-18 01:36:50 +00:00
|
|
|
return true;
|
objtool/rust: list `noreturn` Rust functions
Rust functions may be `noreturn` (i.e. diverging) by returning the
"never" type, `!`, e.g.
fn f() -> ! {
loop {}
}
Thus list the known `noreturn` functions to avoid such warnings.
Without this, `objtool` would complain if enabled for Rust, e.g.:
rust/core.o: warning: objtool:
_R...9panic_fmt() falls through to next function _R...18panic_nounwind_fmt()
rust/alloc.o: warning: objtool:
.text: unexpected end of section
In order to do so, we cannot match symbols' names exactly, for two
reasons:
- Rust mangling scheme [1] contains disambiguators [2] which we
cannot predict (e.g. they may vary depending on the compiler version).
One possibility to solve this would be to parse v0 and ignore/zero
those before comparison.
- Some of the diverging functions come from `core`, i.e. the Rust
standard library, which may change with each compiler version
since they are implementation details (e.g. `panic_internals`).
Thus, to workaround both issues, only part of the symbols are matched,
instead of using the `NORETURN` macro in `noreturns.h`.
Ideally, just like for the C side, we should have a better solution. For
instance, the compiler could give us the list via something like:
$ rustc --emit=noreturns ...
[ Kees agrees this should be automated and Peter says:
So it would be fairly simple to make objtool consume a magic section
emitted by the compiler.. I think we've asked the compiler folks
for that at some point even, but I don't have clear recollections.
We will ask upstream Rust about it. And if they agree, then perhaps
we can get Clang/GCC to implement something similar too -- for this
sort of thing we can take advantage of the shorter cycles of `rustc`
as well as their unstable features concept to experiment.
Gary proposed using DWARF (though it would need to be available), and
wrote a proof of concept script using the `object` and `gimli` crates:
https://gist.github.com/nbdd0121/449692570622c2f46a29ad9f47c3379a
- Miguel ]
Link: https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html [1]
Link: https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#disambiguator [2]
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Kees Cook <kees@kernel.org>
Tested-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20240725183325.122827-6-ojeda@kernel.org
[ Added `len_mismatch_fail` symbol for new `kernel` crate code merged
since then as well as 3 more `core::panicking` symbols that appear
in `RUST_DEBUG_ASSERTIONS=y` builds. - Miguel ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
2024-07-25 18:33:22 +00:00
|
|
|
}
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2023-04-12 23:49:37 +00:00
|
|
|
if (func->bind == STB_WEAK)
|
|
|
|
return false;
|
|
|
|
|
2018-05-10 03:39:15 +00:00
|
|
|
if (!func->len)
|
2019-07-18 01:36:50 +00:00
|
|
|
return false;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2018-05-10 03:39:15 +00:00
|
|
|
insn = find_insn(file, func->sec, func->offset);
|
powerpc updates for 6.2
- Add powerpc qspinlock implementation optimised for large system scalability and
paravirt. See the merge message for more details.
- Enable objtool to be built on powerpc to generate mcount locations.
- Use a temporary mm for code patching with the Radix MMU, so the writable mapping is
restricted to the patching CPU.
- Add an option to build the 64-bit big-endian kernel with the ELFv2 ABI.
- Sanitise user registers on interrupt entry on 64-bit Book3S.
- Many other small features and fixes.
Thanks to: Aboorva Devarajan, Angel Iglesias, Benjamin Gray, Bjorn Helgaas, Bo Liu, Chen
Lifu, Christoph Hellwig, Christophe JAILLET, Christophe Leroy, Christopher M. Riedl, Colin
Ian King, Deming Wang, Disha Goel, Dmitry Torokhov, Finn Thain, Geert Uytterhoeven,
Gustavo A. R. Silva, Haowen Bai, Joel Stanley, Jordan Niethe, Julia Lawall, Kajol Jain,
Laurent Dufour, Li zeming, Miaoqian Lin, Michael Jeanson, Nathan Lynch, Naveen N. Rao,
Nayna Jain, Nicholas Miehlbradt, Nicholas Piggin, Pali Rohár, Randy Dunlap, Rohan McLure,
Russell Currey, Sathvika Vasireddy, Shaomin Deng, Stephen Kitt, Stephen Rothwell, Thomas
Weißschuh, Tiezhu Yang, Uwe Kleine-König, Xie Shaowen, Xiu Jianfeng, XueBing Chen, Yang
Yingliang, Zhang Jiaming, ruanjinjie, Jessica Yu, Wolfram Sang.
-----BEGIN PGP SIGNATURE-----
iQJHBAABCAAxFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAmOfrj8THG1wZUBlbGxl
cm1hbi5pZC5hdQAKCRBR6+o8yOGlgIWtD/9mGF/ze2k+qFTo+30fb7bO8WJIDgsR
dIASnZjXV7q/45elvymhUdkQv4R7xL3pzC40P1+ZKtWzGTNe+zWUQLoALNwRK85j
8CsxZbqefGNKE5Z6ZHo9s37wsu3+jJu9yEQpGFo1LINyzeclCn5St5oqfRam+Hd/
cPF+VfvREwZ0+YOKGBhJ2EgC+Gc9xsFY7DLQsoYlu71iZZr6Z6rgZW/EY5h3RMGS
YKBoVwDsWaU0FpFWrr/rYTI6DqSr3AHr1+ftDg7ncCZMD6vQva6aMCCt94aLB1aE
vC+DNdhZlA558bXGa5yA7Wr//7aUBUIwyC60DogOeZ6vw3kD9tdEd1fbH5hmqNKY
K5bfqm28XU2959CTE8RDgsYYZvwDcfrjBIML14WZGdCQOTcGKpgOGp22o6yNb1Pq
JKpHHnVpvu2PZ/p2XdKSm9+etr2yI6lXZAEVTS7ehdtMukButjSHEVbSCEZ8tlWz
KokQt2J23BMHuSrXK6+67wWQBtdsLEk+LBOQmweiwarMocqvL/Zjz/5J7DR2DtH8
wlY3wOtB1+E5j7xZ+RgK3c3jNg5dH39ZwvFsSATWTI3P+iq6OK/bbk4q4LmZt2l9
ZIfH/CXPf9BvGCHzHa3AAd3UBbJLFwj17btMEv1wFVPS0T4LPUzkgTNTNUYeP6zL
h1e5QfgUxvKPuQ==
=7k3p
-----END PGP SIGNATURE-----
Merge tag 'powerpc-6.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
- Add powerpc qspinlock implementation optimised for large system
scalability and paravirt. See the merge message for more details
- Enable objtool to be built on powerpc to generate mcount locations
- Use a temporary mm for code patching with the Radix MMU, so the
writable mapping is restricted to the patching CPU
- Add an option to build the 64-bit big-endian kernel with the ELFv2
ABI
- Sanitise user registers on interrupt entry on 64-bit Book3S
- Many other small features and fixes
Thanks to Aboorva Devarajan, Angel Iglesias, Benjamin Gray, Bjorn
Helgaas, Bo Liu, Chen Lifu, Christoph Hellwig, Christophe JAILLET,
Christophe Leroy, Christopher M. Riedl, Colin Ian King, Deming Wang,
Disha Goel, Dmitry Torokhov, Finn Thain, Geert Uytterhoeven, Gustavo A.
R. Silva, Haowen Bai, Joel Stanley, Jordan Niethe, Julia Lawall, Kajol
Jain, Laurent Dufour, Li zeming, Miaoqian Lin, Michael Jeanson, Nathan
Lynch, Naveen N. Rao, Nayna Jain, Nicholas Miehlbradt, Nicholas Piggin,
Pali Rohár, Randy Dunlap, Rohan McLure, Russell Currey, Sathvika
Vasireddy, Shaomin Deng, Stephen Kitt, Stephen Rothwell, Thomas
Weißschuh, Tiezhu Yang, Uwe Kleine-König, Xie Shaowen, Xiu Jianfeng,
XueBing Chen, Yang Yingliang, Zhang Jiaming, ruanjinjie, Jessica Yu,
and Wolfram Sang.
* tag 'powerpc-6.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (181 commits)
powerpc/code-patching: Fix oops with DEBUG_VM enabled
powerpc/qspinlock: Fix 32-bit build
powerpc/prom: Fix 32-bit build
powerpc/rtas: mandate RTAS syscall filtering
powerpc/rtas: define pr_fmt and convert printk call sites
powerpc/rtas: clean up includes
powerpc/rtas: clean up rtas_error_log_max initialization
powerpc/pseries/eeh: use correct API for error log size
powerpc/rtas: avoid scheduling in rtas_os_term()
powerpc/rtas: avoid device tree lookups in rtas_os_term()
powerpc/rtasd: use correct OF API for event scan rate
powerpc/rtas: document rtas_call()
powerpc/pseries: unregister VPA when hot unplugging a CPU
powerpc/pseries: reset the RCU watchdogs after a LPM
powerpc: Take in account addition CPU node when building kexec FDT
powerpc: export the CPU node count
powerpc/cpuidle: Set CPUIDLE_FLAG_POLLING for snooze state
powerpc/dts/fsl: Fix pca954x i2c-mux node names
cxl: Remove unnecessary cxl_pci_window_alignment()
selftests/powerpc: Fix resource leaks
...
2022-12-19 13:13:33 +00:00
|
|
|
if (!insn || !insn_func(insn))
|
2019-07-18 01:36:50 +00:00
|
|
|
return false;
|
2018-05-10 03:39:15 +00:00
|
|
|
|
2020-03-10 17:27:24 +00:00
|
|
|
func_for_each_insn(file, func, insn) {
|
2017-06-28 15:11:05 +00:00
|
|
|
empty = false;
|
|
|
|
|
|
|
|
if (insn->type == INSN_RETURN)
|
2019-07-18 01:36:50 +00:00
|
|
|
return false;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (empty)
|
2019-07-18 01:36:50 +00:00
|
|
|
return false;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* A function can have a sibling call instead of a return. In that
|
|
|
|
* case, the function's dead-end status depends on whether the target
|
|
|
|
* of the sibling call returns.
|
|
|
|
*/
|
2020-03-10 17:27:24 +00:00
|
|
|
func_for_each_insn(file, func, insn) {
|
2019-07-18 01:36:52 +00:00
|
|
|
if (is_sibling_call(insn)) {
|
2017-06-28 15:11:05 +00:00
|
|
|
struct instruction *dest = insn->jump_dest;
|
|
|
|
|
|
|
|
if (!dest)
|
|
|
|
/* sibling call to another file */
|
2019-07-18 01:36:50 +00:00
|
|
|
return false;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2019-07-18 01:36:52 +00:00
|
|
|
/* local sibling call */
|
|
|
|
if (recursion == 5) {
|
|
|
|
/*
|
|
|
|
* Infinite recursion: two functions have
|
|
|
|
* sibling calls to each other. This is a very
|
|
|
|
* rare case. It means they aren't dead ends.
|
|
|
|
*/
|
|
|
|
return false;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2022-09-22 20:03:50 +00:00
|
|
|
return __dead_end_function(file, insn_func(dest), recursion+1);
|
2019-07-18 01:36:52 +00:00
|
|
|
}
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2019-07-18 01:36:50 +00:00
|
|
|
return true;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2019-07-18 01:36:50 +00:00
|
|
|
static bool dead_end_function(struct objtool_file *file, struct symbol *func)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
|
|
|
return __dead_end_function(file, func, 0);
|
|
|
|
}
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
static void init_cfi_state(struct cfi_state *cfi)
|
2017-06-28 15:11:07 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2017-08-29 17:51:03 +00:00
|
|
|
for (i = 0; i < CFI_NUM_REGS; i++) {
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->regs[i].base = CFI_UNDEFINED;
|
|
|
|
cfi->vals[i].base = CFI_UNDEFINED;
|
2017-08-29 17:51:03 +00:00
|
|
|
}
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->cfa.base = CFI_UNDEFINED;
|
|
|
|
cfi->drap_reg = CFI_UNDEFINED;
|
|
|
|
cfi->drap_offset = -1;
|
|
|
|
}
|
|
|
|
|
2022-04-18 16:50:43 +00:00
|
|
|
static void init_insn_state(struct objtool_file *file, struct insn_state *state,
|
|
|
|
struct section *sec)
|
2020-03-25 13:04:45 +00:00
|
|
|
{
|
|
|
|
memset(state, 0, sizeof(*state));
|
|
|
|
init_cfi_state(&state->cfi);
|
2020-03-23 17:26:03 +00:00
|
|
|
|
2025-03-24 21:56:03 +00:00
|
|
|
if (opts.noinstr && sec)
|
2020-03-23 17:26:03 +00:00
|
|
|
state->noinstr = sec->noinstr;
|
2017-06-28 15:11:07 +00:00
|
|
|
}
|
|
|
|
|
2021-06-24 09:41:01 +00:00
|
|
|
static struct cfi_state *cfi_alloc(void)
|
|
|
|
{
|
2023-11-07 20:55:00 +00:00
|
|
|
struct cfi_state *cfi = calloc(1, sizeof(struct cfi_state));
|
2021-06-24 09:41:01 +00:00
|
|
|
if (!cfi) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_GLIBC("calloc");
|
2021-06-24 09:41:01 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
nr_cfi++;
|
|
|
|
return cfi;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cfi_bits;
|
|
|
|
static struct hlist_head *cfi_hash;
|
|
|
|
|
|
|
|
static inline bool cficmp(struct cfi_state *cfi1, struct cfi_state *cfi2)
|
|
|
|
{
|
|
|
|
return memcmp((void *)cfi1 + sizeof(cfi1->hash),
|
|
|
|
(void *)cfi2 + sizeof(cfi2->hash),
|
|
|
|
sizeof(struct cfi_state) - sizeof(struct hlist_node));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline u32 cfi_key(struct cfi_state *cfi)
|
|
|
|
{
|
|
|
|
return jhash((void *)cfi + sizeof(cfi->hash),
|
|
|
|
sizeof(*cfi) - sizeof(cfi->hash), 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct cfi_state *cfi_hash_find_or_add(struct cfi_state *cfi)
|
|
|
|
{
|
|
|
|
struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)];
|
|
|
|
struct cfi_state *obj;
|
|
|
|
|
|
|
|
hlist_for_each_entry(obj, head, hash) {
|
|
|
|
if (!cficmp(cfi, obj)) {
|
|
|
|
nr_cfi_cache++;
|
|
|
|
return obj;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
obj = cfi_alloc();
|
|
|
|
*obj = *cfi;
|
|
|
|
hlist_add_head(&obj->hash, head);
|
|
|
|
|
|
|
|
return obj;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void cfi_hash_add(struct cfi_state *cfi)
|
|
|
|
{
|
|
|
|
struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)];
|
|
|
|
|
|
|
|
hlist_add_head(&cfi->hash, head);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *cfi_hash_alloc(unsigned long size)
|
|
|
|
{
|
|
|
|
cfi_bits = max(10, ilog2(size));
|
|
|
|
cfi_hash = mmap(NULL, sizeof(struct hlist_head) << cfi_bits,
|
|
|
|
PROT_READ|PROT_WRITE,
|
|
|
|
MAP_PRIVATE|MAP_ANON, -1, 0);
|
|
|
|
if (cfi_hash == (void *)-1L) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_GLIBC("mmap fail cfi_hash");
|
2021-06-24 09:41:01 +00:00
|
|
|
cfi_hash = NULL;
|
2022-04-18 16:50:26 +00:00
|
|
|
} else if (opts.stats) {
|
2021-06-24 09:41:01 +00:00
|
|
|
printf("cfi_bits: %d\n", cfi_bits);
|
|
|
|
}
|
|
|
|
|
|
|
|
return cfi_hash;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned long nr_insns;
|
|
|
|
static unsigned long nr_insns_visited;
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
/*
|
|
|
|
* Call the arch-specific instruction decoder for all the instructions and add
|
|
|
|
* them to the global instruction list.
|
|
|
|
*/
|
|
|
|
static int decode_instructions(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
struct section *sec;
|
|
|
|
struct symbol *func;
|
|
|
|
unsigned long offset;
|
|
|
|
struct instruction *insn;
|
|
|
|
int ret;
|
|
|
|
|
2017-06-28 15:11:07 +00:00
|
|
|
for_each_sec(file, sec) {
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
struct instruction *insns = NULL;
|
|
|
|
u8 prev_len = 0;
|
|
|
|
u8 idx = 0;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
|
|
|
if (!(sec->sh.sh_flags & SHF_EXECINSTR))
|
|
|
|
continue;
|
|
|
|
|
2017-07-11 15:33:42 +00:00
|
|
|
if (strcmp(sec->name, ".altinstr_replacement") &&
|
|
|
|
strcmp(sec->name, ".altinstr_aux") &&
|
|
|
|
strncmp(sec->name, ".discard.", 9))
|
|
|
|
sec->text = true;
|
|
|
|
|
2020-03-25 16:18:17 +00:00
|
|
|
if (!strcmp(sec->name, ".noinstr.text") ||
|
2022-06-14 21:15:47 +00:00
|
|
|
!strcmp(sec->name, ".entry.text") ||
|
2023-01-12 19:43:31 +00:00
|
|
|
!strcmp(sec->name, ".cpuidle.text") ||
|
2023-07-11 09:19:51 +00:00
|
|
|
!strncmp(sec->name, ".text..__x86.", 13))
|
2020-03-10 17:57:41 +00:00
|
|
|
sec->noinstr = true;
|
|
|
|
|
2022-09-15 11:11:08 +00:00
|
|
|
/*
|
|
|
|
* .init.text code is ran before userspace and thus doesn't
|
|
|
|
* strictly need retpolines, except for modules which are
|
|
|
|
* loaded late, they very much do need retpoline in their
|
|
|
|
* .init.text
|
|
|
|
*/
|
|
|
|
if (!strcmp(sec->name, ".init.text") && !opts.module)
|
|
|
|
sec->init = true;
|
|
|
|
|
2021-08-22 22:50:37 +00:00
|
|
|
for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
if (!insns || idx == INSN_CHUNK_MAX) {
|
|
|
|
insns = calloc(sizeof(*insn), INSN_CHUNK_SIZE);
|
|
|
|
if (!insns) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_GLIBC("calloc");
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
idx = 0;
|
|
|
|
} else {
|
|
|
|
idx++;
|
2017-06-28 15:11:07 +00:00
|
|
|
}
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
insn = &insns[idx];
|
|
|
|
insn->idx = idx;
|
2017-06-28 15:11:07 +00:00
|
|
|
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
INIT_LIST_HEAD(&insn->call_node);
|
2017-06-28 15:11:05 +00:00
|
|
|
insn->sec = sec;
|
|
|
|
insn->offset = offset;
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
insn->prev_len = prev_len;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2021-06-24 09:41:23 +00:00
|
|
|
ret = arch_decode_instruction(file, sec, offset,
|
2021-08-22 22:50:37 +00:00
|
|
|
sec->sh.sh_size - offset,
|
2023-02-08 17:17:57 +00:00
|
|
|
insn);
|
2017-06-28 15:11:05 +00:00
|
|
|
if (ret)
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
return ret;
|
|
|
|
|
|
|
|
prev_len = insn->len;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2022-03-08 15:30:49 +00:00
|
|
|
/*
|
|
|
|
* By default, "ud2" is a dead end unless otherwise
|
|
|
|
* annotated, because GCC 7 inserts it for certain
|
|
|
|
* divide-by-zero cases.
|
|
|
|
*/
|
|
|
|
if (insn->type == INSN_BUG)
|
|
|
|
insn->dead_end = true;
|
|
|
|
|
2020-03-16 14:47:27 +00:00
|
|
|
hash_add(file->insn_hash, &insn->hash, sec_offset_hash(sec, insn->offset));
|
2020-03-12 08:26:29 +00:00
|
|
|
nr_insns++;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2023-04-12 19:03:19 +00:00
|
|
|
sec_for_each_sym(sec, func) {
|
2022-09-22 20:03:50 +00:00
|
|
|
if (func->type != STT_NOTYPE && func->type != STT_FUNC)
|
|
|
|
continue;
|
|
|
|
|
2022-12-20 10:13:23 +00:00
|
|
|
if (func->offset == sec->sh.sh_size) {
|
|
|
|
/* Heuristic: likely an "end" symbol */
|
|
|
|
if (func->type == STT_NOTYPE)
|
|
|
|
continue;
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR("%s(): STT_FUNC at end of section", func->name);
|
2022-12-20 10:13:23 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2023-08-14 11:44:29 +00:00
|
|
|
if (func->embedded_insn || func->alias != func)
|
2017-06-28 15:11:05 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!find_insn(file, sec, func->offset)) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR("%s(): can't find starting instruction", func->name);
|
2017-06-28 15:11:05 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2022-03-08 15:30:54 +00:00
|
|
|
sym_for_each_insn(file, func, insn) {
|
2022-09-22 20:03:50 +00:00
|
|
|
insn->sym = func;
|
|
|
|
if (func->type == STT_FUNC &&
|
|
|
|
insn->type == INSN_ENDBR &&
|
|
|
|
list_empty(&insn->call_node)) {
|
|
|
|
if (insn->offset == func->offset) {
|
2022-03-08 15:30:55 +00:00
|
|
|
list_add_tail(&insn->call_node, &file->endbr_list);
|
2022-03-08 15:30:54 +00:00
|
|
|
file->nr_endbr++;
|
|
|
|
} else {
|
|
|
|
file->nr_endbr_int++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-18 16:50:26 +00:00
|
|
|
if (opts.stats)
|
2020-03-12 08:26:29 +00:00
|
|
|
printf("nr_insns: %lu\n", nr_insns);
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-06-24 09:41:23 +00:00
|
|
|
/*
|
|
|
|
* Read the pv_ops[] .data table to find the static initialized values.
|
|
|
|
*/
|
|
|
|
static int add_pv_ops(struct objtool_file *file, const char *symname)
|
|
|
|
{
|
|
|
|
struct symbol *sym, *func;
|
|
|
|
unsigned long off, end;
|
2023-05-30 17:20:55 +00:00
|
|
|
struct reloc *reloc;
|
2021-06-24 09:41:23 +00:00
|
|
|
int idx;
|
|
|
|
|
|
|
|
sym = find_symbol_by_name(file->elf, symname);
|
|
|
|
if (!sym)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
off = sym->offset;
|
|
|
|
end = off + sym->len;
|
|
|
|
for (;;) {
|
2023-05-30 17:20:55 +00:00
|
|
|
reloc = find_reloc_by_dest_range(file->elf, sym->sec, off, end - off);
|
|
|
|
if (!reloc)
|
2021-06-24 09:41:23 +00:00
|
|
|
break;
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
idx = (reloc_offset(reloc) - sym->offset) / sizeof(unsigned long);
|
|
|
|
|
2023-05-30 17:20:55 +00:00
|
|
|
func = reloc->sym;
|
2021-06-24 09:41:23 +00:00
|
|
|
if (func->type == STT_SECTION)
|
2023-05-30 17:21:08 +00:00
|
|
|
func = find_symbol_by_offset(reloc->sym->sec,
|
|
|
|
reloc_addend(reloc));
|
2025-03-24 21:55:59 +00:00
|
|
|
if (!func) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_FUNC(reloc->sym->sec, reloc_addend(reloc),
|
|
|
|
"can't find func at %s[%d]", symname, idx);
|
2025-03-24 21:55:59 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2021-06-24 09:41:23 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
if (objtool_pv_add(file, idx, func))
|
|
|
|
return -1;
|
2021-06-24 09:41:23 +00:00
|
|
|
|
2023-05-30 17:21:06 +00:00
|
|
|
off = reloc_offset(reloc) + 1;
|
2021-06-24 09:41:23 +00:00
|
|
|
if (off > end)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate and initialize file->pv_ops[].
|
|
|
|
*/
|
|
|
|
static int init_pv_ops(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
static const char *pv_ops_tables[] = {
|
|
|
|
"pv_ops",
|
|
|
|
"xen_cpu_ops",
|
|
|
|
"xen_irq_ops",
|
|
|
|
"xen_mmu_ops",
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
const char *pv_ops;
|
|
|
|
struct symbol *sym;
|
2025-03-24 21:55:59 +00:00
|
|
|
int idx, nr, ret;
|
2021-06-24 09:41:23 +00:00
|
|
|
|
2022-04-18 16:50:26 +00:00
|
|
|
if (!opts.noinstr)
|
2021-06-24 09:41:23 +00:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
file->pv_ops = NULL;
|
|
|
|
|
|
|
|
sym = find_symbol_by_name(file->elf, "pv_ops");
|
|
|
|
if (!sym)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
nr = sym->len / sizeof(unsigned long);
|
|
|
|
file->pv_ops = calloc(sizeof(struct pv_state), nr);
|
2025-03-24 21:55:59 +00:00
|
|
|
if (!file->pv_ops) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_GLIBC("calloc");
|
2021-06-24 09:41:23 +00:00
|
|
|
return -1;
|
2025-03-24 21:55:59 +00:00
|
|
|
}
|
2021-06-24 09:41:23 +00:00
|
|
|
|
|
|
|
for (idx = 0; idx < nr; idx++)
|
|
|
|
INIT_LIST_HEAD(&file->pv_ops[idx].targets);
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
for (idx = 0; (pv_ops = pv_ops_tables[idx]); idx++) {
|
|
|
|
ret = add_pv_ops(file, pv_ops);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
2021-06-24 09:41:23 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-08-18 13:57:45 +00:00
|
|
|
static int create_static_call_sections(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
struct static_call_site *site;
|
2023-05-30 17:20:59 +00:00
|
|
|
struct section *sec;
|
2020-08-18 13:57:45 +00:00
|
|
|
struct instruction *insn;
|
|
|
|
struct symbol *key_sym;
|
|
|
|
char *key_name, *tmp;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
sec = find_section_by_name(file->elf, ".static_call_sites");
|
|
|
|
if (sec) {
|
|
|
|
INIT_LIST_HEAD(&file->static_call_list);
|
|
|
|
WARN("file already has .static_call_sites section, skipping");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (list_empty(&file->static_call_list))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
idx = 0;
|
2021-03-26 15:12:12 +00:00
|
|
|
list_for_each_entry(insn, &file->static_call_list, call_node)
|
2020-08-18 13:57:45 +00:00
|
|
|
idx++;
|
|
|
|
|
2023-05-30 17:20:59 +00:00
|
|
|
sec = elf_create_section_pair(file->elf, ".static_call_sites",
|
|
|
|
sizeof(*site), idx, idx * 2);
|
2020-08-18 13:57:45 +00:00
|
|
|
if (!sec)
|
|
|
|
return -1;
|
|
|
|
|
2023-05-30 17:20:59 +00:00
|
|
|
/* Allow modules to modify the low bits of static_call_site::key */
|
2023-05-30 17:20:54 +00:00
|
|
|
sec->sh.sh_flags |= SHF_WRITE;
|
|
|
|
|
2020-08-18 13:57:45 +00:00
|
|
|
idx = 0;
|
2021-03-26 15:12:12 +00:00
|
|
|
list_for_each_entry(insn, &file->static_call_list, call_node) {
|
2020-08-18 13:57:45 +00:00
|
|
|
|
|
|
|
/* populate reloc for 'addr' */
|
2023-05-30 17:20:59 +00:00
|
|
|
if (!elf_init_reloc_text_sym(file->elf, sec,
|
|
|
|
idx * sizeof(*site), idx * 2,
|
|
|
|
insn->sec, insn->offset))
|
2020-12-14 22:04:20 +00:00
|
|
|
return -1;
|
2020-08-18 13:57:45 +00:00
|
|
|
|
|
|
|
/* find key symbol */
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
key_name = strdup(insn_call_dest(insn)->name);
|
2020-08-18 13:57:45 +00:00
|
|
|
if (!key_name) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_GLIBC("strdup");
|
2020-08-18 13:57:45 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR,
|
|
|
|
STATIC_CALL_TRAMP_PREFIX_LEN)) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR("static_call: trampoline name malformed: %s", key_name);
|
2020-08-18 13:57:45 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN;
|
|
|
|
memcpy(tmp, STATIC_CALL_KEY_PREFIX_STR, STATIC_CALL_KEY_PREFIX_LEN);
|
|
|
|
|
|
|
|
key_sym = find_symbol_by_name(file->elf, tmp);
|
|
|
|
if (!key_sym) {
|
2022-04-18 16:50:26 +00:00
|
|
|
if (!opts.module) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR("static_call: can't find static_call_key symbol: %s", tmp);
|
2021-01-27 23:18:37 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For modules(), the key might not be exported, which
|
|
|
|
* means the module can make static calls but isn't
|
|
|
|
* allowed to change them.
|
|
|
|
*
|
|
|
|
* In that case we temporarily set the key to be the
|
|
|
|
* trampoline address. This is fixed up in
|
|
|
|
* static_call_add_module().
|
|
|
|
*/
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
key_sym = insn_call_dest(insn);
|
2020-08-18 13:57:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* populate reloc for 'key' */
|
2023-05-30 17:20:59 +00:00
|
|
|
if (!elf_init_reloc_data_sym(file->elf, sec,
|
|
|
|
idx * sizeof(*site) + 4,
|
|
|
|
(idx * 2) + 1, key_sym,
|
|
|
|
is_sibling_call(insn) * STATIC_CALL_SITE_TAIL))
|
2020-08-18 13:57:45 +00:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
idx++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-10-26 12:01:36 +00:00
|
|
|
static int create_retpoline_sites_sections(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
struct instruction *insn;
|
|
|
|
struct section *sec;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
sec = find_section_by_name(file->elf, ".retpoline_sites");
|
|
|
|
if (sec) {
|
|
|
|
WARN("file already has .retpoline_sites, skipping");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
idx = 0;
|
|
|
|
list_for_each_entry(insn, &file->retpoline_call_list, call_node)
|
|
|
|
idx++;
|
|
|
|
|
|
|
|
if (!idx)
|
|
|
|
return 0;
|
|
|
|
|
2023-05-30 17:20:59 +00:00
|
|
|
sec = elf_create_section_pair(file->elf, ".retpoline_sites",
|
|
|
|
sizeof(int), idx, idx);
|
|
|
|
if (!sec)
|
2021-10-26 12:01:36 +00:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
idx = 0;
|
|
|
|
list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
|
|
|
|
|
2023-05-30 17:20:59 +00:00
|
|
|
if (!elf_init_reloc_text_sym(file->elf, sec,
|
|
|
|
idx * sizeof(int), idx,
|
|
|
|
insn->sec, insn->offset))
|
2021-10-26 12:01:36 +00:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
idx++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-06-14 21:15:38 +00:00
|
|
|
static int create_return_sites_sections(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
struct instruction *insn;
|
|
|
|
struct section *sec;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
sec = find_section_by_name(file->elf, ".return_sites");
|
|
|
|
if (sec) {
|
|
|
|
WARN("file already has .return_sites, skipping");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
idx = 0;
|
|
|
|
list_for_each_entry(insn, &file->return_thunk_list, call_node)
|
|
|
|
idx++;
|
|
|
|
|
|
|
|
if (!idx)
|
|
|
|
return 0;
|
|
|
|
|
2023-05-30 17:20:59 +00:00
|
|
|
sec = elf_create_section_pair(file->elf, ".return_sites",
|
|
|
|
sizeof(int), idx, idx);
|
|
|
|
if (!sec)
|
2022-06-14 21:15:38 +00:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
idx = 0;
|
|
|
|
list_for_each_entry(insn, &file->return_thunk_list, call_node) {
|
|
|
|
|
2023-05-30 17:20:59 +00:00
|
|
|
if (!elf_init_reloc_text_sym(file->elf, sec,
|
|
|
|
idx * sizeof(int), idx,
|
|
|
|
insn->sec, insn->offset))
|
2022-06-14 21:15:38 +00:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
idx++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-03-08 15:30:55 +00:00
|
|
|
static int create_ibt_endbr_seal_sections(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
struct instruction *insn;
|
|
|
|
struct section *sec;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
sec = find_section_by_name(file->elf, ".ibt_endbr_seal");
|
|
|
|
if (sec) {
|
|
|
|
WARN("file already has .ibt_endbr_seal, skipping");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
idx = 0;
|
|
|
|
list_for_each_entry(insn, &file->endbr_list, call_node)
|
|
|
|
idx++;
|
|
|
|
|
2022-04-18 16:50:26 +00:00
|
|
|
if (opts.stats) {
|
2022-03-08 15:30:55 +00:00
|
|
|
printf("ibt: ENDBR at function start: %d\n", file->nr_endbr);
|
|
|
|
printf("ibt: ENDBR inside functions: %d\n", file->nr_endbr_int);
|
|
|
|
printf("ibt: superfluous ENDBR: %d\n", idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!idx)
|
|
|
|
return 0;
|
|
|
|
|
2023-05-30 17:20:59 +00:00
|
|
|
sec = elf_create_section_pair(file->elf, ".ibt_endbr_seal",
|
|
|
|
sizeof(int), idx, idx);
|
|
|
|
if (!sec)
|
2022-03-08 15:30:55 +00:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
idx = 0;
|
|
|
|
list_for_each_entry(insn, &file->endbr_list, call_node) {
|
|
|
|
|
|
|
|
int *site = (int *)sec->data->d_buf + idx;
|
2023-01-18 10:52:15 +00:00
|
|
|
struct symbol *sym = insn->sym;
|
2022-03-08 15:30:55 +00:00
|
|
|
*site = 0;
|
|
|
|
|
2023-01-18 10:52:15 +00:00
|
|
|
if (opts.module && sym && sym->type == STT_FUNC &&
|
|
|
|
insn->offset == sym->offset &&
|
|
|
|
(!strcmp(sym->name, "init_module") ||
|
2025-03-24 21:55:56 +00:00
|
|
|
!strcmp(sym->name, "cleanup_module"))) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR("%s(): Magic init_module() function name is deprecated, use module_init(fn) instead",
|
|
|
|
sym->name);
|
2025-03-24 21:55:56 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2023-01-18 10:52:15 +00:00
|
|
|
|
2023-05-30 17:20:59 +00:00
|
|
|
if (!elf_init_reloc_text_sym(file->elf, sec,
|
|
|
|
idx * sizeof(int), idx,
|
|
|
|
insn->sec, insn->offset))
|
2022-03-08 15:30:55 +00:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
idx++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-10-27 09:28:13 +00:00
|
|
|
static int create_cfi_sections(struct objtool_file *file)
|
|
|
|
{
|
2023-04-12 19:03:19 +00:00
|
|
|
struct section *sec;
|
2022-10-27 09:28:13 +00:00
|
|
|
struct symbol *sym;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
sec = find_section_by_name(file->elf, ".cfi_sites");
|
|
|
|
if (sec) {
|
|
|
|
INIT_LIST_HEAD(&file->call_list);
|
|
|
|
WARN("file already has .cfi_sites section, skipping");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
idx = 0;
|
2023-04-12 19:03:19 +00:00
|
|
|
for_each_sym(file, sym) {
|
|
|
|
if (sym->type != STT_FUNC)
|
2022-10-27 09:28:13 +00:00
|
|
|
continue;
|
|
|
|
|
2023-04-12 19:03:19 +00:00
|
|
|
if (strncmp(sym->name, "__cfi_", 6))
|
|
|
|
continue;
|
2022-10-27 09:28:13 +00:00
|
|
|
|
2023-04-12 19:03:19 +00:00
|
|
|
idx++;
|
2022-10-27 09:28:13 +00:00
|
|
|
}
|
|
|
|
|
2023-05-30 17:20:59 +00:00
|
|
|
sec = elf_create_section_pair(file->elf, ".cfi_sites",
|
|
|
|
sizeof(unsigned int), idx, idx);
|
2022-10-27 09:28:13 +00:00
|
|
|
if (!sec)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
idx = 0;
|
2023-04-12 19:03:19 +00:00
|
|
|
for_each_sym(file, sym) {
|
|
|
|
if (sym->type != STT_FUNC)
|
2022-10-27 09:28:13 +00:00
|
|
|
continue;
|
|
|
|
|
2023-04-12 19:03:19 +00:00
|
|
|
if (strncmp(sym->name, "__cfi_", 6))
|
|
|
|
continue;
|
2022-10-27 09:28:13 +00:00
|
|
|
|
2023-05-30 17:20:59 +00:00
|
|
|
if (!elf_init_reloc_text_sym(file->elf, sec,
|
|
|
|
idx * sizeof(unsigned int), idx,
|
|
|
|
sym->sec, sym->offset))
|
2023-04-12 19:03:19 +00:00
|
|
|
return -1;
|
2022-10-27 09:28:13 +00:00
|
|
|
|
2023-04-12 19:03:19 +00:00
|
|
|
idx++;
|
2022-10-27 09:28:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-08-06 22:14:09 +00:00
|
|
|
static int create_mcount_loc_sections(struct objtool_file *file)
|
|
|
|
{
|
2023-05-30 17:20:56 +00:00
|
|
|
size_t addr_size = elf_addr_size(file->elf);
|
2020-08-06 22:14:09 +00:00
|
|
|
struct instruction *insn;
|
2022-11-14 17:57:48 +00:00
|
|
|
struct section *sec;
|
2020-08-06 22:14:09 +00:00
|
|
|
int idx;
|
|
|
|
|
|
|
|
sec = find_section_by_name(file->elf, "__mcount_loc");
|
|
|
|
if (sec) {
|
|
|
|
INIT_LIST_HEAD(&file->mcount_loc_list);
|
|
|
|
WARN("file already has __mcount_loc section, skipping");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (list_empty(&file->mcount_loc_list))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
idx = 0;
|
2021-10-26 12:01:35 +00:00
|
|
|
list_for_each_entry(insn, &file->mcount_loc_list, call_node)
|
2020-08-06 22:14:09 +00:00
|
|
|
idx++;
|
|
|
|
|
2023-05-30 17:20:59 +00:00
|
|
|
sec = elf_create_section_pair(file->elf, "__mcount_loc", addr_size,
|
|
|
|
idx, idx);
|
2020-08-06 22:14:09 +00:00
|
|
|
if (!sec)
|
|
|
|
return -1;
|
|
|
|
|
2023-05-30 17:20:56 +00:00
|
|
|
sec->sh.sh_addralign = addr_size;
|
2022-11-14 17:57:48 +00:00
|
|
|
|
2020-08-06 22:14:09 +00:00
|
|
|
idx = 0;
|
2021-10-26 12:01:35 +00:00
|
|
|
list_for_each_entry(insn, &file->mcount_loc_list, call_node) {
|
2020-08-06 22:14:09 +00:00
|
|
|
|
2023-05-30 17:20:59 +00:00
|
|
|
struct reloc *reloc;
|
2020-08-06 22:14:09 +00:00
|
|
|
|
2023-05-30 17:20:59 +00:00
|
|
|
reloc = elf_init_reloc_text_sym(file->elf, sec, idx * addr_size, idx,
|
|
|
|
insn->sec, insn->offset);
|
|
|
|
if (!reloc)
|
2020-08-06 22:14:09 +00:00
|
|
|
return -1;
|
|
|
|
|
2023-05-30 17:21:12 +00:00
|
|
|
set_reloc_type(file->elf, reloc, addr_size == 8 ? R_ABS64 : R_ABS32);
|
2023-05-30 17:20:59 +00:00
|
|
|
|
|
|
|
idx++;
|
2020-08-06 22:14:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-09-15 11:11:09 +00:00
|
|
|
static int create_direct_call_sections(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
struct instruction *insn;
|
|
|
|
struct section *sec;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
sec = find_section_by_name(file->elf, ".call_sites");
|
|
|
|
if (sec) {
|
|
|
|
INIT_LIST_HEAD(&file->call_list);
|
|
|
|
WARN("file already has .call_sites section, skipping");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (list_empty(&file->call_list))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
idx = 0;
|
|
|
|
list_for_each_entry(insn, &file->call_list, call_node)
|
|
|
|
idx++;
|
|
|
|
|
2023-05-30 17:20:59 +00:00
|
|
|
sec = elf_create_section_pair(file->elf, ".call_sites",
|
|
|
|
sizeof(unsigned int), idx, idx);
|
2022-09-15 11:11:09 +00:00
|
|
|
if (!sec)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
idx = 0;
|
|
|
|
list_for_each_entry(insn, &file->call_list, call_node) {
|
|
|
|
|
2023-05-30 17:20:59 +00:00
|
|
|
if (!elf_init_reloc_text_sym(file->elf, sec,
|
|
|
|
idx * sizeof(unsigned int), idx,
|
|
|
|
insn->sec, insn->offset))
|
2022-09-15 11:11:09 +00:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
idx++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
/*
|
|
|
|
* Warnings shouldn't be reported for ignored functions.
|
|
|
|
*/
|
2025-03-24 21:55:59 +00:00
|
|
|
static int add_ignores(struct objtool_file *file)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
2023-05-30 17:20:55 +00:00
|
|
|
struct section *rsec;
|
2017-06-28 15:11:05 +00:00
|
|
|
struct symbol *func;
|
objtool: Rename rela to reloc
Before supporting additional relocation types rename the relevant
types and functions from "rela" to "reloc". This work be done with
the following regex:
sed -e 's/struct rela/struct reloc/g' \
-e 's/\([_\*]\)rela\(s\{0,1\}\)/\1reloc\2/g' \
-e 's/tmprela\(s\{0,1\}\)/tmpreloc\1/g' \
-e 's/relasec/relocsec/g' \
-e 's/rela_list/reloc_list/g' \
-e 's/rela_hash/reloc_hash/g' \
-e 's/add_rela/add_reloc/g' \
-e 's/rela->/reloc->/g' \
-e '/rela[,\.]/{ s/\([^\.>]\)rela\([\.,]\)/\1reloc\2/g ; }' \
-e 's/rela =/reloc =/g' \
-e 's/relas =/relocs =/g' \
-e 's/relas\[/relocs[/g' \
-e 's/relaname =/relocname =/g' \
-e 's/= rela\;/= reloc\;/g' \
-e 's/= relas\;/= relocs\;/g' \
-e 's/= relaname\;/= relocname\;/g' \
-e 's/, rela)/, reloc)/g' \
-e 's/\([ @]\)rela\([ "]\)/\1reloc\2/g' \
-e 's/ rela$/ reloc/g' \
-e 's/, relaname/, relocname/g' \
-e 's/sec->rela/sec->reloc/g' \
-e 's/(\(!\{0,1\}\)rela/(\1reloc/g' \
-i \
arch.h \
arch/x86/decode.c \
check.c \
check.h \
elf.c \
elf.h \
orc_gen.c \
special.c
Notable exceptions which complicate the regex include gelf_*
library calls and standard/expected section names which still use
"rela" because they encode the type of relocation expected. Also, keep
"rela" in the struct because it encodes a specific type of relocation
we currently expect.
It will eventually turn into a member of an anonymous union when a
susequent patch adds implicit addend, or "rel", relocation support.
Signed-off-by: Matt Helsley <mhelsley@vmware.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-05-29 21:01:13 +00:00
|
|
|
struct reloc *reloc;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2023-05-30 17:20:55 +00:00
|
|
|
rsec = find_section_by_name(file->elf, ".rela.discard.func_stack_frame_non_standard");
|
|
|
|
if (!rsec)
|
2025-03-24 21:55:59 +00:00
|
|
|
return 0;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2023-05-30 17:21:02 +00:00
|
|
|
for_each_reloc(rsec, reloc) {
|
objtool: Rename rela to reloc
Before supporting additional relocation types rename the relevant
types and functions from "rela" to "reloc". This work be done with
the following regex:
sed -e 's/struct rela/struct reloc/g' \
-e 's/\([_\*]\)rela\(s\{0,1\}\)/\1reloc\2/g' \
-e 's/tmprela\(s\{0,1\}\)/tmpreloc\1/g' \
-e 's/relasec/relocsec/g' \
-e 's/rela_list/reloc_list/g' \
-e 's/rela_hash/reloc_hash/g' \
-e 's/add_rela/add_reloc/g' \
-e 's/rela->/reloc->/g' \
-e '/rela[,\.]/{ s/\([^\.>]\)rela\([\.,]\)/\1reloc\2/g ; }' \
-e 's/rela =/reloc =/g' \
-e 's/relas =/relocs =/g' \
-e 's/relas\[/relocs[/g' \
-e 's/relaname =/relocname =/g' \
-e 's/= rela\;/= reloc\;/g' \
-e 's/= relas\;/= relocs\;/g' \
-e 's/= relaname\;/= relocname\;/g' \
-e 's/, rela)/, reloc)/g' \
-e 's/\([ @]\)rela\([ "]\)/\1reloc\2/g' \
-e 's/ rela$/ reloc/g' \
-e 's/, relaname/, relocname/g' \
-e 's/sec->rela/sec->reloc/g' \
-e 's/(\(!\{0,1\}\)rela/(\1reloc/g' \
-i \
arch.h \
arch/x86/decode.c \
check.c \
check.h \
elf.c \
elf.h \
orc_gen.c \
special.c
Notable exceptions which complicate the regex include gelf_*
library calls and standard/expected section names which still use
"rela" because they encode the type of relocation expected. Also, keep
"rela" in the struct because it encodes a specific type of relocation
we currently expect.
It will eventually turn into a member of an anonymous union when a
susequent patch adds implicit addend, or "rel", relocation support.
Signed-off-by: Matt Helsley <mhelsley@vmware.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-05-29 21:01:13 +00:00
|
|
|
switch (reloc->sym->type) {
|
2019-02-27 13:04:13 +00:00
|
|
|
case STT_FUNC:
|
objtool: Rename rela to reloc
Before supporting additional relocation types rename the relevant
types and functions from "rela" to "reloc". This work be done with
the following regex:
sed -e 's/struct rela/struct reloc/g' \
-e 's/\([_\*]\)rela\(s\{0,1\}\)/\1reloc\2/g' \
-e 's/tmprela\(s\{0,1\}\)/tmpreloc\1/g' \
-e 's/relasec/relocsec/g' \
-e 's/rela_list/reloc_list/g' \
-e 's/rela_hash/reloc_hash/g' \
-e 's/add_rela/add_reloc/g' \
-e 's/rela->/reloc->/g' \
-e '/rela[,\.]/{ s/\([^\.>]\)rela\([\.,]\)/\1reloc\2/g ; }' \
-e 's/rela =/reloc =/g' \
-e 's/relas =/relocs =/g' \
-e 's/relas\[/relocs[/g' \
-e 's/relaname =/relocname =/g' \
-e 's/= rela\;/= reloc\;/g' \
-e 's/= relas\;/= relocs\;/g' \
-e 's/= relaname\;/= relocname\;/g' \
-e 's/, rela)/, reloc)/g' \
-e 's/\([ @]\)rela\([ "]\)/\1reloc\2/g' \
-e 's/ rela$/ reloc/g' \
-e 's/, relaname/, relocname/g' \
-e 's/sec->rela/sec->reloc/g' \
-e 's/(\(!\{0,1\}\)rela/(\1reloc/g' \
-i \
arch.h \
arch/x86/decode.c \
check.c \
check.h \
elf.c \
elf.h \
orc_gen.c \
special.c
Notable exceptions which complicate the regex include gelf_*
library calls and standard/expected section names which still use
"rela" because they encode the type of relocation expected. Also, keep
"rela" in the struct because it encodes a specific type of relocation
we currently expect.
It will eventually turn into a member of an anonymous union when a
susequent patch adds implicit addend, or "rel", relocation support.
Signed-off-by: Matt Helsley <mhelsley@vmware.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-05-29 21:01:13 +00:00
|
|
|
func = reloc->sym;
|
2019-02-27 13:04:13 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case STT_SECTION:
|
2023-05-30 17:21:08 +00:00
|
|
|
func = find_func_by_offset(reloc->sym->sec, reloc_addend(reloc));
|
2020-02-18 03:41:54 +00:00
|
|
|
if (!func)
|
2017-06-28 15:11:05 +00:00
|
|
|
continue;
|
2019-02-27 13:04:13 +00:00
|
|
|
break;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2019-02-27 13:04:13 +00:00
|
|
|
default:
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR("unexpected relocation symbol type in %s: %d",
|
|
|
|
rsec->name, reloc->sym->type);
|
2025-03-24 21:55:59 +00:00
|
|
|
return -1;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
2019-02-27 13:04:13 +00:00
|
|
|
|
2025-03-24 21:55:53 +00:00
|
|
|
func->ignore = true;
|
2025-03-28 05:04:22 +00:00
|
|
|
if (func->cfunc)
|
|
|
|
func->cfunc->ignore = true;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
2025-03-24 21:55:59 +00:00
|
|
|
|
|
|
|
return 0;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2019-02-25 11:50:09 +00:00
|
|
|
/*
|
|
|
|
* This is a whitelist of functions that is allowed to be called with AC set.
|
|
|
|
* The list is meant to be minimal and only contains compiler instrumentation
|
|
|
|
* ABI and a few functions used to implement *_{to,from}_user() functions.
|
|
|
|
*
|
|
|
|
* These functions must not directly change AC, but may PUSHF/POPF.
|
|
|
|
*/
|
|
|
|
static const char *uaccess_safe_builtin[] = {
|
|
|
|
/* KASAN */
|
|
|
|
"kasan_report",
|
2021-02-24 20:05:05 +00:00
|
|
|
"kasan_check_range",
|
2019-02-25 11:50:09 +00:00
|
|
|
/* KASAN out-of-line */
|
|
|
|
"__asan_loadN_noabort",
|
|
|
|
"__asan_load1_noabort",
|
|
|
|
"__asan_load2_noabort",
|
|
|
|
"__asan_load4_noabort",
|
|
|
|
"__asan_load8_noabort",
|
|
|
|
"__asan_load16_noabort",
|
|
|
|
"__asan_storeN_noabort",
|
|
|
|
"__asan_store1_noabort",
|
|
|
|
"__asan_store2_noabort",
|
|
|
|
"__asan_store4_noabort",
|
|
|
|
"__asan_store8_noabort",
|
|
|
|
"__asan_store16_noabort",
|
objtool: Permit __kasan_check_{read,write} under UACCESS
Building linux-next with JUMP_LABEL=n and KASAN=y, I got this objtool
warning:
arch/x86/lib/copy_mc.o: warning: objtool: copy_mc_to_user()+0x22: call to
__kasan_check_read() with UACCESS enabled
What happens here is that copy_mc_to_user() branches on a static key in a
UACCESS region:
__uaccess_begin();
if (static_branch_unlikely(©_mc_fragile_key))
ret = copy_mc_fragile(to, from, len);
ret = copy_mc_generic(to, from, len);
__uaccess_end();
and the !CONFIG_JUMP_LABEL version of static_branch_unlikely() uses
static_key_enabled(), which uses static_key_count(), which uses
atomic_read(), which calls instrument_atomic_read(), which uses
kasan_check_read(), which is __kasan_check_read().
Let's permit these KASAN helpers in UACCESS regions - static keys should
probably work under UACCESS, I think.
PeterZ adds:
It's not a matter of permitting, it's a matter of being safe and
correct. In this case it is, because it's a thin wrapper around
check_memory_region() which was already marked safe.
check_memory_region() is correct because the only thing it ends up
calling is kasa_report() and that is also marked safe because that is
annotated with user_access_save/restore() before it does anything else.
On top of that, all of KASAN is noinstr, so nothing in here will end up
in tracing and/or call schedule() before the user_access_save().
Signed-off-by: Jann Horn <jannh@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-09-28 22:49:16 +00:00
|
|
|
"__kasan_check_read",
|
|
|
|
"__kasan_check_write",
|
2019-02-25 11:50:09 +00:00
|
|
|
/* KASAN in-line */
|
|
|
|
"__asan_report_load_n_noabort",
|
|
|
|
"__asan_report_load1_noabort",
|
|
|
|
"__asan_report_load2_noabort",
|
|
|
|
"__asan_report_load4_noabort",
|
|
|
|
"__asan_report_load8_noabort",
|
|
|
|
"__asan_report_load16_noabort",
|
|
|
|
"__asan_report_store_n_noabort",
|
|
|
|
"__asan_report_store1_noabort",
|
|
|
|
"__asan_report_store2_noabort",
|
|
|
|
"__asan_report_store4_noabort",
|
|
|
|
"__asan_report_store8_noabort",
|
|
|
|
"__asan_report_store16_noabort",
|
2019-11-14 18:02:57 +00:00
|
|
|
/* KCSAN */
|
2020-03-25 16:41:57 +00:00
|
|
|
"__kcsan_check_access",
|
2021-11-30 11:44:30 +00:00
|
|
|
"__kcsan_mb",
|
|
|
|
"__kcsan_wmb",
|
|
|
|
"__kcsan_rmb",
|
|
|
|
"__kcsan_release",
|
2019-11-14 18:02:57 +00:00
|
|
|
"kcsan_found_watchpoint",
|
|
|
|
"kcsan_setup_watchpoint",
|
2020-03-25 16:41:57 +00:00
|
|
|
"kcsan_check_scoped_accesses",
|
2020-04-24 15:47:30 +00:00
|
|
|
"kcsan_disable_current",
|
|
|
|
"kcsan_enable_current_nowarn",
|
2019-11-14 18:02:57 +00:00
|
|
|
/* KCSAN/TSAN */
|
|
|
|
"__tsan_func_entry",
|
|
|
|
"__tsan_func_exit",
|
|
|
|
"__tsan_read_range",
|
|
|
|
"__tsan_write_range",
|
|
|
|
"__tsan_read1",
|
|
|
|
"__tsan_read2",
|
|
|
|
"__tsan_read4",
|
|
|
|
"__tsan_read8",
|
|
|
|
"__tsan_read16",
|
|
|
|
"__tsan_write1",
|
|
|
|
"__tsan_write2",
|
|
|
|
"__tsan_write4",
|
|
|
|
"__tsan_write8",
|
|
|
|
"__tsan_write16",
|
2020-07-24 07:00:02 +00:00
|
|
|
"__tsan_read_write1",
|
|
|
|
"__tsan_read_write2",
|
|
|
|
"__tsan_read_write4",
|
|
|
|
"__tsan_read_write8",
|
|
|
|
"__tsan_read_write16",
|
2022-09-12 09:45:41 +00:00
|
|
|
"__tsan_volatile_read1",
|
|
|
|
"__tsan_volatile_read2",
|
|
|
|
"__tsan_volatile_read4",
|
|
|
|
"__tsan_volatile_read8",
|
|
|
|
"__tsan_volatile_read16",
|
|
|
|
"__tsan_volatile_write1",
|
|
|
|
"__tsan_volatile_write2",
|
|
|
|
"__tsan_volatile_write4",
|
|
|
|
"__tsan_volatile_write8",
|
|
|
|
"__tsan_volatile_write16",
|
2020-07-03 13:40:30 +00:00
|
|
|
"__tsan_atomic8_load",
|
|
|
|
"__tsan_atomic16_load",
|
|
|
|
"__tsan_atomic32_load",
|
|
|
|
"__tsan_atomic64_load",
|
|
|
|
"__tsan_atomic8_store",
|
|
|
|
"__tsan_atomic16_store",
|
|
|
|
"__tsan_atomic32_store",
|
|
|
|
"__tsan_atomic64_store",
|
|
|
|
"__tsan_atomic8_exchange",
|
|
|
|
"__tsan_atomic16_exchange",
|
|
|
|
"__tsan_atomic32_exchange",
|
|
|
|
"__tsan_atomic64_exchange",
|
|
|
|
"__tsan_atomic8_fetch_add",
|
|
|
|
"__tsan_atomic16_fetch_add",
|
|
|
|
"__tsan_atomic32_fetch_add",
|
|
|
|
"__tsan_atomic64_fetch_add",
|
|
|
|
"__tsan_atomic8_fetch_sub",
|
|
|
|
"__tsan_atomic16_fetch_sub",
|
|
|
|
"__tsan_atomic32_fetch_sub",
|
|
|
|
"__tsan_atomic64_fetch_sub",
|
|
|
|
"__tsan_atomic8_fetch_and",
|
|
|
|
"__tsan_atomic16_fetch_and",
|
|
|
|
"__tsan_atomic32_fetch_and",
|
|
|
|
"__tsan_atomic64_fetch_and",
|
|
|
|
"__tsan_atomic8_fetch_or",
|
|
|
|
"__tsan_atomic16_fetch_or",
|
|
|
|
"__tsan_atomic32_fetch_or",
|
|
|
|
"__tsan_atomic64_fetch_or",
|
|
|
|
"__tsan_atomic8_fetch_xor",
|
|
|
|
"__tsan_atomic16_fetch_xor",
|
|
|
|
"__tsan_atomic32_fetch_xor",
|
|
|
|
"__tsan_atomic64_fetch_xor",
|
|
|
|
"__tsan_atomic8_fetch_nand",
|
|
|
|
"__tsan_atomic16_fetch_nand",
|
|
|
|
"__tsan_atomic32_fetch_nand",
|
|
|
|
"__tsan_atomic64_fetch_nand",
|
|
|
|
"__tsan_atomic8_compare_exchange_strong",
|
|
|
|
"__tsan_atomic16_compare_exchange_strong",
|
|
|
|
"__tsan_atomic32_compare_exchange_strong",
|
|
|
|
"__tsan_atomic64_compare_exchange_strong",
|
|
|
|
"__tsan_atomic8_compare_exchange_weak",
|
|
|
|
"__tsan_atomic16_compare_exchange_weak",
|
|
|
|
"__tsan_atomic32_compare_exchange_weak",
|
|
|
|
"__tsan_atomic64_compare_exchange_weak",
|
|
|
|
"__tsan_atomic8_compare_exchange_val",
|
|
|
|
"__tsan_atomic16_compare_exchange_val",
|
|
|
|
"__tsan_atomic32_compare_exchange_val",
|
|
|
|
"__tsan_atomic64_compare_exchange_val",
|
|
|
|
"__tsan_atomic_thread_fence",
|
|
|
|
"__tsan_atomic_signal_fence",
|
2023-02-15 13:00:58 +00:00
|
|
|
"__tsan_unaligned_read16",
|
|
|
|
"__tsan_unaligned_write16",
|
2019-02-25 11:50:09 +00:00
|
|
|
/* KCOV */
|
|
|
|
"write_comp_data",
|
2020-04-29 19:09:04 +00:00
|
|
|
"check_kcov_mode",
|
2019-02-25 11:50:09 +00:00
|
|
|
"__sanitizer_cov_trace_pc",
|
|
|
|
"__sanitizer_cov_trace_const_cmp1",
|
|
|
|
"__sanitizer_cov_trace_const_cmp2",
|
|
|
|
"__sanitizer_cov_trace_const_cmp4",
|
|
|
|
"__sanitizer_cov_trace_const_cmp8",
|
|
|
|
"__sanitizer_cov_trace_cmp1",
|
|
|
|
"__sanitizer_cov_trace_cmp2",
|
|
|
|
"__sanitizer_cov_trace_cmp4",
|
|
|
|
"__sanitizer_cov_trace_cmp8",
|
2020-02-16 18:07:49 +00:00
|
|
|
"__sanitizer_cov_trace_switch",
|
2022-09-15 15:04:05 +00:00
|
|
|
/* KMSAN */
|
|
|
|
"kmsan_copy_to_user",
|
2024-06-21 11:34:55 +00:00
|
|
|
"kmsan_disable_current",
|
|
|
|
"kmsan_enable_current",
|
2022-09-15 15:04:05 +00:00
|
|
|
"kmsan_report",
|
|
|
|
"kmsan_unpoison_entry_regs",
|
|
|
|
"kmsan_unpoison_memory",
|
|
|
|
"__msan_chain_origin",
|
|
|
|
"__msan_get_context_state",
|
|
|
|
"__msan_instrument_asm_store",
|
|
|
|
"__msan_metadata_ptr_for_load_1",
|
|
|
|
"__msan_metadata_ptr_for_load_2",
|
|
|
|
"__msan_metadata_ptr_for_load_4",
|
|
|
|
"__msan_metadata_ptr_for_load_8",
|
|
|
|
"__msan_metadata_ptr_for_load_n",
|
|
|
|
"__msan_metadata_ptr_for_store_1",
|
|
|
|
"__msan_metadata_ptr_for_store_2",
|
|
|
|
"__msan_metadata_ptr_for_store_4",
|
|
|
|
"__msan_metadata_ptr_for_store_8",
|
|
|
|
"__msan_metadata_ptr_for_store_n",
|
|
|
|
"__msan_poison_alloca",
|
|
|
|
"__msan_warning",
|
2019-02-25 11:50:09 +00:00
|
|
|
/* UBSAN */
|
|
|
|
"ubsan_type_mismatch_common",
|
|
|
|
"__ubsan_handle_type_mismatch",
|
|
|
|
"__ubsan_handle_type_mismatch_v1",
|
2019-10-21 13:11:49 +00:00
|
|
|
"__ubsan_handle_shift_out_of_bounds",
|
2023-01-12 19:43:56 +00:00
|
|
|
"__ubsan_handle_load_invalid_value",
|
2025-07-17 23:25:06 +00:00
|
|
|
/* KSTACK_ERASE */
|
2025-07-17 23:25:07 +00:00
|
|
|
"__sanitizer_cov_stack_depth",
|
2025-03-24 21:56:07 +00:00
|
|
|
/* TRACE_BRANCH_PROFILING */
|
|
|
|
"ftrace_likely_update",
|
|
|
|
/* STACKPROTECTOR */
|
|
|
|
"__stack_chk_fail",
|
2019-02-25 11:50:09 +00:00
|
|
|
/* misc */
|
|
|
|
"csum_partial_copy_generic",
|
x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}()
In reaction to a proposal to introduce a memcpy_mcsafe_fast()
implementation Linus points out that memcpy_mcsafe() is poorly named
relative to communicating the scope of the interface. Specifically what
addresses are valid to pass as source, destination, and what faults /
exceptions are handled.
Of particular concern is that even though x86 might be able to handle
the semantics of copy_mc_to_user() with its common copy_user_generic()
implementation other archs likely need / want an explicit path for this
case:
On Fri, May 1, 2020 at 11:28 AM Linus Torvalds <torvalds@linux-foundation.org> wrote:
>
> On Thu, Apr 30, 2020 at 6:21 PM Dan Williams <dan.j.williams@intel.com> wrote:
> >
> > However now I see that copy_user_generic() works for the wrong reason.
> > It works because the exception on the source address due to poison
> > looks no different than a write fault on the user address to the
> > caller, it's still just a short copy. So it makes copy_to_user() work
> > for the wrong reason relative to the name.
>
> Right.
>
> And it won't work that way on other architectures. On x86, we have a
> generic function that can take faults on either side, and we use it
> for both cases (and for the "in_user" case too), but that's an
> artifact of the architecture oddity.
>
> In fact, it's probably wrong even on x86 - because it can hide bugs -
> but writing those things is painful enough that everybody prefers
> having just one function.
Replace a single top-level memcpy_mcsafe() with either
copy_mc_to_user(), or copy_mc_to_kernel().
Introduce an x86 copy_mc_fragile() name as the rename for the
low-level x86 implementation formerly named memcpy_mcsafe(). It is used
as the slow / careful backend that is supplanted by a fast
copy_mc_generic() in a follow-on patch.
One side-effect of this reorganization is that separating copy_mc_64.S
to its own file means that perf no longer needs to track dependencies
for its memcpy_64.S benchmarks.
[ bp: Massage a bit. ]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: <stable@vger.kernel.org>
Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com
Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com
2020-10-06 03:40:16 +00:00
|
|
|
"copy_mc_fragile",
|
|
|
|
"copy_mc_fragile_handle_tail",
|
2020-10-06 03:40:25 +00:00
|
|
|
"copy_mc_enhanced_fast_string",
|
2023-04-16 21:06:58 +00:00
|
|
|
"rep_stos_alternative",
|
x86: improve on the non-rep 'copy_user' function
The old 'copy_user_generic_unrolled' function was oddly implemented for
largely historical reasons: it had been largely based on the uncached
copy case, which has some other concerns.
For example, the __copy_user_nocache() function uses 'movnti' for the
destination stores, and those want the destination to be aligned. In
contrast, the regular copy function doesn't really care, and trying to
align things only complicates matters.
Also, like the clear_user function, the copy function had some odd
handling of the repeat counts, complicating the exception handling for
no really good reason. So as with clear_user, just write it to keep all
the byte counts in the %rcx register, exactly like the 'rep movs'
functionality that this replaces.
Unlike a real 'rep movs', we do allow for this to trash a few temporary
registers to not have to unnecessarily save/restore registers on the
stack.
And like the clearing case, rename this to what it now clearly is:
'rep_movs_alternative', and make it one coherent function, so that it
shows up as such in profiles (instead of the odd split between
"copy_user_generic_unrolled" and "copy_user_short_string", the latter of
which was not about strings at all, and which was shared with the
uncached case).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2023-04-17 01:23:06 +00:00
|
|
|
"rep_movs_alternative",
|
2023-04-15 20:39:15 +00:00
|
|
|
"__copy_user_nocache",
|
2019-02-25 11:50:09 +00:00
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
static void add_uaccess_safe(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
struct symbol *func;
|
|
|
|
const char **name;
|
|
|
|
|
2022-04-18 16:50:26 +00:00
|
|
|
if (!opts.uaccess)
|
2019-02-25 11:50:09 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
for (name = uaccess_safe_builtin; *name; name++) {
|
|
|
|
func = find_symbol_by_name(file->elf, *name);
|
|
|
|
if (!func)
|
|
|
|
continue;
|
|
|
|
|
2019-07-18 01:36:48 +00:00
|
|
|
func->uaccess_safe = true;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-14 11:44:29 +00:00
|
|
|
/*
|
|
|
|
* Symbols that replace INSN_CALL_DYNAMIC, every (tail) call to such a symbol
|
|
|
|
* will be added to the .retpoline_sites section.
|
|
|
|
*/
|
2021-03-26 15:12:04 +00:00
|
|
|
__weak bool arch_is_retpoline(struct symbol *sym)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2023-08-14 11:44:29 +00:00
|
|
|
/*
|
|
|
|
* Symbols that replace INSN_RETURN, every (tail) call to such a symbol
|
|
|
|
* will be added to the .return_sites section.
|
|
|
|
*/
|
2022-06-14 21:15:38 +00:00
|
|
|
__weak bool arch_is_rethunk(struct symbol *sym)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2023-08-14 11:44:29 +00:00
|
|
|
/*
|
|
|
|
* Symbols that are embedded inside other instructions, because sometimes crazy
|
|
|
|
* code exists. These are mostly ignored for validation purposes.
|
|
|
|
*/
|
|
|
|
__weak bool arch_is_embedded_insn(struct symbol *sym)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-03-26 15:12:13 +00:00
|
|
|
static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
|
|
|
|
{
|
objtool: Remove instruction::reloc
Instead of caching the reloc for each instruction, only keep a
negative cache of not having a reloc (by far the most common case).
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
+ u16 no_reloc:1; /* 78: 5 2 */
- /* XXX 3 bits hole, try to pack */
+ /* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
struct symbol * call_dest; /* 88 8 */
struct instruction * jump_dest; /* 96 8 */
struct instruction * first_jump_src; /* 104 8 */
struct reloc * jump_table; /* 112 8 */
- struct reloc * reloc; /* 120 8 */
+ struct alternative * alts; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct alternative * alts; /* 128 8 */
- struct symbol * sym; /* 136 8 */
- struct stack_op * stack_ops; /* 144 8 */
- struct cfi_state * cfi; /* 152 8 */
+ struct symbol * sym; /* 128 8 */
+ struct stack_op * stack_ops; /* 136 8 */
+ struct cfi_state * cfi; /* 144 8 */
- /* size: 160, cachelines: 3, members: 29 */
- /* sum members: 158 */
- /* sum bitfield members: 13 bits, bit holes: 1, sum bit holes: 3 bits */
- /* last cacheline: 32 bytes */
+ /* size: 152, cachelines: 3, members: 29 */
+ /* sum members: 150 */
+ /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
+ /* last cacheline: 24 bytes */
};
pre: 5:48.89 real, 220.96 user, 127.55 sys, 24834672 mem
post: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.572145269@infradead.org
2023-02-08 17:18:01 +00:00
|
|
|
struct reloc *reloc;
|
|
|
|
|
|
|
|
if (insn->no_reloc)
|
2021-03-26 15:12:13 +00:00
|
|
|
return NULL;
|
|
|
|
|
objtool: Remove instruction::reloc
Instead of caching the reloc for each instruction, only keep a
negative cache of not having a reloc (by far the most common case).
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
+ u16 no_reloc:1; /* 78: 5 2 */
- /* XXX 3 bits hole, try to pack */
+ /* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
struct symbol * call_dest; /* 88 8 */
struct instruction * jump_dest; /* 96 8 */
struct instruction * first_jump_src; /* 104 8 */
struct reloc * jump_table; /* 112 8 */
- struct reloc * reloc; /* 120 8 */
+ struct alternative * alts; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct alternative * alts; /* 128 8 */
- struct symbol * sym; /* 136 8 */
- struct stack_op * stack_ops; /* 144 8 */
- struct cfi_state * cfi; /* 152 8 */
+ struct symbol * sym; /* 128 8 */
+ struct stack_op * stack_ops; /* 136 8 */
+ struct cfi_state * cfi; /* 144 8 */
- /* size: 160, cachelines: 3, members: 29 */
- /* sum members: 158 */
- /* sum bitfield members: 13 bits, bit holes: 1, sum bit holes: 3 bits */
- /* last cacheline: 32 bytes */
+ /* size: 152, cachelines: 3, members: 29 */
+ /* sum members: 150 */
+ /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
+ /* last cacheline: 24 bytes */
};
pre: 5:48.89 real, 220.96 user, 127.55 sys, 24834672 mem
post: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.572145269@infradead.org
2023-02-08 17:18:01 +00:00
|
|
|
if (!file)
|
|
|
|
return NULL;
|
2021-06-24 09:41:23 +00:00
|
|
|
|
objtool: Remove instruction::reloc
Instead of caching the reloc for each instruction, only keep a
negative cache of not having a reloc (by far the most common case).
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
+ u16 no_reloc:1; /* 78: 5 2 */
- /* XXX 3 bits hole, try to pack */
+ /* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
struct symbol * call_dest; /* 88 8 */
struct instruction * jump_dest; /* 96 8 */
struct instruction * first_jump_src; /* 104 8 */
struct reloc * jump_table; /* 112 8 */
- struct reloc * reloc; /* 120 8 */
+ struct alternative * alts; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct alternative * alts; /* 128 8 */
- struct symbol * sym; /* 136 8 */
- struct stack_op * stack_ops; /* 144 8 */
- struct cfi_state * cfi; /* 152 8 */
+ struct symbol * sym; /* 128 8 */
+ struct stack_op * stack_ops; /* 136 8 */
+ struct cfi_state * cfi; /* 144 8 */
- /* size: 160, cachelines: 3, members: 29 */
- /* sum members: 158 */
- /* sum bitfield members: 13 bits, bit holes: 1, sum bit holes: 3 bits */
- /* last cacheline: 32 bytes */
+ /* size: 152, cachelines: 3, members: 29 */
+ /* sum members: 150 */
+ /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
+ /* last cacheline: 24 bytes */
};
pre: 5:48.89 real, 220.96 user, 127.55 sys, 24834672 mem
post: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.572145269@infradead.org
2023-02-08 17:18:01 +00:00
|
|
|
reloc = find_reloc_by_dest_range(file->elf, insn->sec,
|
|
|
|
insn->offset, insn->len);
|
|
|
|
if (!reloc) {
|
|
|
|
insn->no_reloc = 1;
|
|
|
|
return NULL;
|
2021-03-26 15:12:13 +00:00
|
|
|
}
|
|
|
|
|
objtool: Remove instruction::reloc
Instead of caching the reloc for each instruction, only keep a
negative cache of not having a reloc (by far the most common case).
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
+ u16 no_reloc:1; /* 78: 5 2 */
- /* XXX 3 bits hole, try to pack */
+ /* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
struct symbol * call_dest; /* 88 8 */
struct instruction * jump_dest; /* 96 8 */
struct instruction * first_jump_src; /* 104 8 */
struct reloc * jump_table; /* 112 8 */
- struct reloc * reloc; /* 120 8 */
+ struct alternative * alts; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct alternative * alts; /* 128 8 */
- struct symbol * sym; /* 136 8 */
- struct stack_op * stack_ops; /* 144 8 */
- struct cfi_state * cfi; /* 152 8 */
+ struct symbol * sym; /* 128 8 */
+ struct stack_op * stack_ops; /* 136 8 */
+ struct cfi_state * cfi; /* 144 8 */
- /* size: 160, cachelines: 3, members: 29 */
- /* sum members: 158 */
- /* sum bitfield members: 13 bits, bit holes: 1, sum bit holes: 3 bits */
- /* last cacheline: 32 bytes */
+ /* size: 152, cachelines: 3, members: 29 */
+ /* sum members: 150 */
+ /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
+ /* last cacheline: 24 bytes */
};
pre: 5:48.89 real, 220.96 user, 127.55 sys, 24834672 mem
post: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.572145269@infradead.org
2023-02-08 17:18:01 +00:00
|
|
|
return reloc;
|
2021-03-26 15:12:13 +00:00
|
|
|
}
|
|
|
|
|
2021-06-24 09:41:02 +00:00
|
|
|
static void remove_insn_ops(struct instruction *insn)
|
|
|
|
{
|
objtool: Make instruction::stack_ops a single-linked list
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
unsigned int len; /* 64 4 */
enum insn_type type; /* 68 4 */
long unsigned int immediate; /* 72 8 */
u16 dead_end:1; /* 80: 0 2 */
u16 ignore:1; /* 80: 1 2 */
u16 ignore_alts:1; /* 80: 2 2 */
u16 hint:1; /* 80: 3 2 */
u16 save:1; /* 80: 4 2 */
u16 restore:1; /* 80: 5 2 */
u16 retpoline_safe:1; /* 80: 6 2 */
u16 noendbr:1; /* 80: 7 2 */
u16 entry:1; /* 80: 8 2 */
/* XXX 7 bits hole, try to pack */
s8 instr; /* 82 1 */
u8 visited; /* 83 1 */
/* XXX 4 bytes hole, try to pack */
struct alt_group * alt_group; /* 88 8 */
struct symbol * call_dest; /* 96 8 */
struct instruction * jump_dest; /* 104 8 */
struct instruction * first_jump_src; /* 112 8 */
struct reloc * jump_table; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
struct reloc * reloc; /* 128 8 */
struct list_head alts; /* 136 16 */
struct symbol * sym; /* 152 8 */
- struct list_head stack_ops; /* 160 16 */
- struct cfi_state * cfi; /* 176 8 */
+ struct stack_op * stack_ops; /* 160 8 */
+ struct cfi_state * cfi; /* 168 8 */
- /* size: 184, cachelines: 3, members: 29 */
- /* sum members: 178, holes: 1, sum holes: 4 */
+ /* size: 176, cachelines: 3, members: 29 */
+ /* sum members: 170, holes: 1, sum holes: 4 */
/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
- /* last cacheline: 56 bytes */
+ /* last cacheline: 48 bytes */
};
pre: 5:58.22 real, 226.69 user, 131.22 sys, 26221520 mem
post: 5:58.50 real, 229.64 user, 128.65 sys, 26221520 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.362196959@infradead.org
2023-02-08 17:17:58 +00:00
|
|
|
struct stack_op *op, *next;
|
2021-06-24 09:41:02 +00:00
|
|
|
|
objtool: Make instruction::stack_ops a single-linked list
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
unsigned int len; /* 64 4 */
enum insn_type type; /* 68 4 */
long unsigned int immediate; /* 72 8 */
u16 dead_end:1; /* 80: 0 2 */
u16 ignore:1; /* 80: 1 2 */
u16 ignore_alts:1; /* 80: 2 2 */
u16 hint:1; /* 80: 3 2 */
u16 save:1; /* 80: 4 2 */
u16 restore:1; /* 80: 5 2 */
u16 retpoline_safe:1; /* 80: 6 2 */
u16 noendbr:1; /* 80: 7 2 */
u16 entry:1; /* 80: 8 2 */
/* XXX 7 bits hole, try to pack */
s8 instr; /* 82 1 */
u8 visited; /* 83 1 */
/* XXX 4 bytes hole, try to pack */
struct alt_group * alt_group; /* 88 8 */
struct symbol * call_dest; /* 96 8 */
struct instruction * jump_dest; /* 104 8 */
struct instruction * first_jump_src; /* 112 8 */
struct reloc * jump_table; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
struct reloc * reloc; /* 128 8 */
struct list_head alts; /* 136 16 */
struct symbol * sym; /* 152 8 */
- struct list_head stack_ops; /* 160 16 */
- struct cfi_state * cfi; /* 176 8 */
+ struct stack_op * stack_ops; /* 160 8 */
+ struct cfi_state * cfi; /* 168 8 */
- /* size: 184, cachelines: 3, members: 29 */
- /* sum members: 178, holes: 1, sum holes: 4 */
+ /* size: 176, cachelines: 3, members: 29 */
+ /* sum members: 170, holes: 1, sum holes: 4 */
/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
- /* last cacheline: 56 bytes */
+ /* last cacheline: 48 bytes */
};
pre: 5:58.22 real, 226.69 user, 131.22 sys, 26221520 mem
post: 5:58.50 real, 229.64 user, 128.65 sys, 26221520 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.362196959@infradead.org
2023-02-08 17:17:58 +00:00
|
|
|
for (op = insn->stack_ops; op; op = next) {
|
|
|
|
next = op->next;
|
2021-06-24 09:41:02 +00:00
|
|
|
free(op);
|
|
|
|
}
|
objtool: Make instruction::stack_ops a single-linked list
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
unsigned int len; /* 64 4 */
enum insn_type type; /* 68 4 */
long unsigned int immediate; /* 72 8 */
u16 dead_end:1; /* 80: 0 2 */
u16 ignore:1; /* 80: 1 2 */
u16 ignore_alts:1; /* 80: 2 2 */
u16 hint:1; /* 80: 3 2 */
u16 save:1; /* 80: 4 2 */
u16 restore:1; /* 80: 5 2 */
u16 retpoline_safe:1; /* 80: 6 2 */
u16 noendbr:1; /* 80: 7 2 */
u16 entry:1; /* 80: 8 2 */
/* XXX 7 bits hole, try to pack */
s8 instr; /* 82 1 */
u8 visited; /* 83 1 */
/* XXX 4 bytes hole, try to pack */
struct alt_group * alt_group; /* 88 8 */
struct symbol * call_dest; /* 96 8 */
struct instruction * jump_dest; /* 104 8 */
struct instruction * first_jump_src; /* 112 8 */
struct reloc * jump_table; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
struct reloc * reloc; /* 128 8 */
struct list_head alts; /* 136 16 */
struct symbol * sym; /* 152 8 */
- struct list_head stack_ops; /* 160 16 */
- struct cfi_state * cfi; /* 176 8 */
+ struct stack_op * stack_ops; /* 160 8 */
+ struct cfi_state * cfi; /* 168 8 */
- /* size: 184, cachelines: 3, members: 29 */
- /* sum members: 178, holes: 1, sum holes: 4 */
+ /* size: 176, cachelines: 3, members: 29 */
+ /* sum members: 170, holes: 1, sum holes: 4 */
/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
- /* last cacheline: 56 bytes */
+ /* last cacheline: 48 bytes */
};
pre: 5:58.22 real, 226.69 user, 131.22 sys, 26221520 mem
post: 5:58.50 real, 229.64 user, 128.65 sys, 26221520 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.362196959@infradead.org
2023-02-08 17:17:58 +00:00
|
|
|
insn->stack_ops = NULL;
|
2021-06-24 09:41:02 +00:00
|
|
|
}
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
static int annotate_call_site(struct objtool_file *file,
|
2021-10-26 12:01:34 +00:00
|
|
|
struct instruction *insn, bool sibling)
|
2021-06-24 09:41:02 +00:00
|
|
|
{
|
|
|
|
struct reloc *reloc = insn_reloc(file, insn);
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
struct symbol *sym = insn_call_dest(insn);
|
2021-06-24 09:41:02 +00:00
|
|
|
|
2021-10-26 12:01:34 +00:00
|
|
|
if (!sym)
|
|
|
|
sym = reloc->sym;
|
|
|
|
|
|
|
|
if (sym->static_call_tramp) {
|
|
|
|
list_add_tail(&insn->call_node, &file->static_call_list);
|
2025-03-24 21:55:59 +00:00
|
|
|
return 0;
|
2021-06-24 09:41:02 +00:00
|
|
|
}
|
|
|
|
|
2021-10-26 12:01:36 +00:00
|
|
|
if (sym->retpoline_thunk) {
|
|
|
|
list_add_tail(&insn->call_node, &file->retpoline_call_list);
|
2025-03-24 21:55:59 +00:00
|
|
|
return 0;
|
2021-10-26 12:01:36 +00:00
|
|
|
}
|
|
|
|
|
2021-06-24 09:41:02 +00:00
|
|
|
/*
|
2021-11-30 11:44:31 +00:00
|
|
|
* Many compilers cannot disable KCOV or sanitizer calls with a function
|
|
|
|
* attribute so they need a little help, NOP out any such calls from
|
|
|
|
* noinstr text.
|
2021-06-24 09:41:02 +00:00
|
|
|
*/
|
2022-04-18 16:50:40 +00:00
|
|
|
if (opts.hack_noinstr && insn->sec->noinstr && sym->profiling_func) {
|
2023-05-30 17:21:12 +00:00
|
|
|
if (reloc)
|
|
|
|
set_reloc_type(file->elf, reloc, R_NONE);
|
2021-06-24 09:41:02 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
if (elf_write_insn(file->elf, insn->sec,
|
|
|
|
insn->offset, insn->len,
|
|
|
|
sibling ? arch_ret_insn(insn->len)
|
|
|
|
: arch_nop_insn(insn->len))) {
|
|
|
|
return -1;
|
|
|
|
}
|
2021-06-24 09:41:02 +00:00
|
|
|
|
|
|
|
insn->type = sibling ? INSN_RETURN : INSN_NOP;
|
2022-03-23 22:35:01 +00:00
|
|
|
|
|
|
|
if (sibling) {
|
|
|
|
/*
|
|
|
|
* We've replaced the tail-call JMP insn by two new
|
|
|
|
* insn: RET; INT3, except we only have a single struct
|
|
|
|
* insn here. Mark it retpoline_safe to avoid the SLS
|
|
|
|
* warning, instead of adding another insn.
|
|
|
|
*/
|
|
|
|
insn->retpoline_safe = true;
|
|
|
|
}
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
return 0;
|
2021-06-24 09:41:02 +00:00
|
|
|
}
|
|
|
|
|
2022-04-18 16:50:26 +00:00
|
|
|
if (opts.mcount && sym->fentry) {
|
2021-06-24 09:41:02 +00:00
|
|
|
if (sibling)
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "tail call to __fentry__ !?!?");
|
2022-11-14 17:57:49 +00:00
|
|
|
if (opts.mnop) {
|
2023-05-30 17:21:12 +00:00
|
|
|
if (reloc)
|
|
|
|
set_reloc_type(file->elf, reloc, R_NONE);
|
2021-06-24 09:41:02 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
if (elf_write_insn(file->elf, insn->sec,
|
|
|
|
insn->offset, insn->len,
|
|
|
|
arch_nop_insn(insn->len))) {
|
|
|
|
return -1;
|
|
|
|
}
|
2021-06-24 09:41:02 +00:00
|
|
|
|
2022-11-14 17:57:49 +00:00
|
|
|
insn->type = INSN_NOP;
|
|
|
|
}
|
2021-06-24 09:41:02 +00:00
|
|
|
|
2021-10-26 12:01:35 +00:00
|
|
|
list_add_tail(&insn->call_node, &file->mcount_loc_list);
|
2025-03-24 21:55:59 +00:00
|
|
|
return 0;
|
2021-06-24 09:41:02 +00:00
|
|
|
}
|
2022-03-08 15:30:49 +00:00
|
|
|
|
2025-02-07 12:15:35 +00:00
|
|
|
if (insn->type == INSN_CALL && !insn->sec->init &&
|
|
|
|
!insn->_call_dest->embedded_insn)
|
2022-09-15 11:11:09 +00:00
|
|
|
list_add_tail(&insn->call_node, &file->call_list);
|
|
|
|
|
2022-03-08 15:30:49 +00:00
|
|
|
if (!sibling && dead_end_function(file, sym))
|
|
|
|
insn->dead_end = true;
|
2025-03-24 21:55:59 +00:00
|
|
|
|
|
|
|
return 0;
|
2021-10-26 12:01:34 +00:00
|
|
|
}
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
static int add_call_dest(struct objtool_file *file, struct instruction *insn,
|
2021-10-26 12:01:34 +00:00
|
|
|
struct symbol *dest, bool sibling)
|
|
|
|
{
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
insn->_call_dest = dest;
|
2021-10-26 12:01:34 +00:00
|
|
|
if (!dest)
|
2025-03-24 21:55:59 +00:00
|
|
|
return 0;
|
2021-06-24 09:41:02 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Whatever stack impact regular CALLs have, should be undone
|
|
|
|
* by the RETURN of the called function.
|
|
|
|
*
|
|
|
|
* Annotated intra-function calls retain the stack_ops but
|
|
|
|
* are converted to JUMP, see read_intra_function_calls().
|
|
|
|
*/
|
|
|
|
remove_insn_ops(insn);
|
2021-10-26 12:01:34 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
return annotate_call_site(file, insn, sibling);
|
2021-06-24 09:41:02 +00:00
|
|
|
}
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
static int add_retpoline_call(struct objtool_file *file, struct instruction *insn)
|
2021-10-26 12:01:36 +00:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Retpoline calls/jumps are really dynamic calls/jumps in disguise,
|
|
|
|
* so convert them accordingly.
|
|
|
|
*/
|
|
|
|
switch (insn->type) {
|
|
|
|
case INSN_CALL:
|
|
|
|
insn->type = INSN_CALL_DYNAMIC;
|
|
|
|
break;
|
|
|
|
case INSN_JUMP_UNCONDITIONAL:
|
|
|
|
insn->type = INSN_JUMP_DYNAMIC;
|
|
|
|
break;
|
|
|
|
case INSN_JUMP_CONDITIONAL:
|
|
|
|
insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
|
|
|
|
break;
|
|
|
|
default:
|
2025-03-24 21:55:59 +00:00
|
|
|
return 0;
|
2021-10-26 12:01:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
insn->retpoline_safe = true;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Whatever stack impact regular CALLs have, should be undone
|
|
|
|
* by the RETURN of the called function.
|
|
|
|
*
|
|
|
|
* Annotated intra-function calls retain the stack_ops but
|
|
|
|
* are converted to JUMP, see read_intra_function_calls().
|
|
|
|
*/
|
|
|
|
remove_insn_ops(insn);
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
return annotate_call_site(file, insn, false);
|
2021-10-26 12:01:36 +00:00
|
|
|
}
|
2022-03-08 15:30:54 +00:00
|
|
|
|
2022-06-14 21:15:48 +00:00
|
|
|
static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add)
|
2022-06-14 21:15:38 +00:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Return thunk tail calls are really just returns in disguise,
|
|
|
|
* so convert them accordingly.
|
|
|
|
*/
|
|
|
|
insn->type = INSN_RETURN;
|
|
|
|
insn->retpoline_safe = true;
|
|
|
|
|
2022-06-14 21:15:48 +00:00
|
|
|
if (add)
|
|
|
|
list_add_tail(&insn->call_node, &file->return_thunk_list);
|
2022-06-14 21:15:38 +00:00
|
|
|
}
|
|
|
|
|
2022-07-11 09:49:50 +00:00
|
|
|
static bool is_first_func_insn(struct objtool_file *file,
|
|
|
|
struct instruction *insn, struct symbol *sym)
|
2022-03-08 15:30:54 +00:00
|
|
|
{
|
2022-07-11 09:49:50 +00:00
|
|
|
if (insn->offset == sym->offset)
|
2022-03-22 11:33:31 +00:00
|
|
|
return true;
|
|
|
|
|
2022-07-11 09:49:50 +00:00
|
|
|
/* Allow direct CALL/JMP past ENDBR */
|
2022-04-18 16:50:26 +00:00
|
|
|
if (opts.ibt) {
|
2022-03-22 11:33:31 +00:00
|
|
|
struct instruction *prev = prev_insn_same_sym(file, insn);
|
|
|
|
|
|
|
|
if (prev && prev->type == INSN_ENDBR &&
|
2022-07-11 09:49:50 +00:00
|
|
|
insn->offset == sym->offset + prev->len)
|
2022-03-22 11:33:31 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
2022-03-08 15:30:54 +00:00
|
|
|
}
|
|
|
|
|
2022-07-11 09:49:50 +00:00
|
|
|
/*
|
|
|
|
* A sibling call is a tail-call to another symbol -- to differentiate from a
|
|
|
|
* recursive tail-call which is to the same symbol.
|
|
|
|
*/
|
|
|
|
static bool jump_is_sibling_call(struct objtool_file *file,
|
|
|
|
struct instruction *from, struct instruction *to)
|
|
|
|
{
|
|
|
|
struct symbol *fs = from->sym;
|
|
|
|
struct symbol *ts = to->sym;
|
|
|
|
|
|
|
|
/* Not a sibling call if from/to a symbol hole */
|
|
|
|
if (!fs || !ts)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Not a sibling call if not targeting the start of a symbol. */
|
|
|
|
if (!is_first_func_insn(file, to, ts))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Disallow sibling calls into STT_NOTYPE */
|
|
|
|
if (ts->type == STT_NOTYPE)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Must not be self to be a sibling */
|
|
|
|
return fs->pfunc != ts->pfunc;
|
|
|
|
}
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
/*
|
|
|
|
* Find the destination instructions for all jumps.
|
|
|
|
*/
|
|
|
|
static int add_jump_destinations(struct objtool_file *file)
|
|
|
|
{
|
2022-04-11 23:10:30 +00:00
|
|
|
struct instruction *insn, *jump_dest;
|
objtool: Rename rela to reloc
Before supporting additional relocation types rename the relevant
types and functions from "rela" to "reloc". This work be done with
the following regex:
sed -e 's/struct rela/struct reloc/g' \
-e 's/\([_\*]\)rela\(s\{0,1\}\)/\1reloc\2/g' \
-e 's/tmprela\(s\{0,1\}\)/tmpreloc\1/g' \
-e 's/relasec/relocsec/g' \
-e 's/rela_list/reloc_list/g' \
-e 's/rela_hash/reloc_hash/g' \
-e 's/add_rela/add_reloc/g' \
-e 's/rela->/reloc->/g' \
-e '/rela[,\.]/{ s/\([^\.>]\)rela\([\.,]\)/\1reloc\2/g ; }' \
-e 's/rela =/reloc =/g' \
-e 's/relas =/relocs =/g' \
-e 's/relas\[/relocs[/g' \
-e 's/relaname =/relocname =/g' \
-e 's/= rela\;/= reloc\;/g' \
-e 's/= relas\;/= relocs\;/g' \
-e 's/= relaname\;/= relocname\;/g' \
-e 's/, rela)/, reloc)/g' \
-e 's/\([ @]\)rela\([ "]\)/\1reloc\2/g' \
-e 's/ rela$/ reloc/g' \
-e 's/, relaname/, relocname/g' \
-e 's/sec->rela/sec->reloc/g' \
-e 's/(\(!\{0,1\}\)rela/(\1reloc/g' \
-i \
arch.h \
arch/x86/decode.c \
check.c \
check.h \
elf.c \
elf.h \
orc_gen.c \
special.c
Notable exceptions which complicate the regex include gelf_*
library calls and standard/expected section names which still use
"rela" because they encode the type of relocation expected. Also, keep
"rela" in the struct because it encodes a specific type of relocation
we currently expect.
It will eventually turn into a member of an anonymous union when a
susequent patch adds implicit addend, or "rel", relocation support.
Signed-off-by: Matt Helsley <mhelsley@vmware.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-05-29 21:01:13 +00:00
|
|
|
struct reloc *reloc;
|
2017-06-28 15:11:05 +00:00
|
|
|
struct section *dest_sec;
|
|
|
|
unsigned long dest_off;
|
2025-03-24 21:55:59 +00:00
|
|
|
int ret;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
|
|
|
for_each_insn(file, insn) {
|
2025-04-01 04:26:37 +00:00
|
|
|
struct symbol *func = insn_func(insn);
|
|
|
|
|
2022-04-11 23:10:31 +00:00
|
|
|
if (insn->jump_dest) {
|
|
|
|
/*
|
|
|
|
* handle_group_alt() may have previously set
|
|
|
|
* 'jump_dest' for some alternatives.
|
|
|
|
*/
|
|
|
|
continue;
|
|
|
|
}
|
2020-02-10 18:32:39 +00:00
|
|
|
if (!is_static_jump(insn))
|
2017-06-28 15:11:05 +00:00
|
|
|
continue;
|
|
|
|
|
2021-03-26 15:12:13 +00:00
|
|
|
reloc = insn_reloc(file, insn);
|
objtool: Rename rela to reloc
Before supporting additional relocation types rename the relevant
types and functions from "rela" to "reloc". This work be done with
the following regex:
sed -e 's/struct rela/struct reloc/g' \
-e 's/\([_\*]\)rela\(s\{0,1\}\)/\1reloc\2/g' \
-e 's/tmprela\(s\{0,1\}\)/tmpreloc\1/g' \
-e 's/relasec/relocsec/g' \
-e 's/rela_list/reloc_list/g' \
-e 's/rela_hash/reloc_hash/g' \
-e 's/add_rela/add_reloc/g' \
-e 's/rela->/reloc->/g' \
-e '/rela[,\.]/{ s/\([^\.>]\)rela\([\.,]\)/\1reloc\2/g ; }' \
-e 's/rela =/reloc =/g' \
-e 's/relas =/relocs =/g' \
-e 's/relas\[/relocs[/g' \
-e 's/relaname =/relocname =/g' \
-e 's/= rela\;/= reloc\;/g' \
-e 's/= relas\;/= relocs\;/g' \
-e 's/= relaname\;/= relocname\;/g' \
-e 's/, rela)/, reloc)/g' \
-e 's/\([ @]\)rela\([ "]\)/\1reloc\2/g' \
-e 's/ rela$/ reloc/g' \
-e 's/, relaname/, relocname/g' \
-e 's/sec->rela/sec->reloc/g' \
-e 's/(\(!\{0,1\}\)rela/(\1reloc/g' \
-i \
arch.h \
arch/x86/decode.c \
check.c \
check.h \
elf.c \
elf.h \
orc_gen.c \
special.c
Notable exceptions which complicate the regex include gelf_*
library calls and standard/expected section names which still use
"rela" because they encode the type of relocation expected. Also, keep
"rela" in the struct because it encodes a specific type of relocation
we currently expect.
It will eventually turn into a member of an anonymous union when a
susequent patch adds implicit addend, or "rel", relocation support.
Signed-off-by: Matt Helsley <mhelsley@vmware.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-05-29 21:01:13 +00:00
|
|
|
if (!reloc) {
|
2017-06-28 15:11:05 +00:00
|
|
|
dest_sec = insn->sec;
|
2020-03-27 15:28:45 +00:00
|
|
|
dest_off = arch_jump_destination(insn);
|
objtool: Rename rela to reloc
Before supporting additional relocation types rename the relevant
types and functions from "rela" to "reloc". This work be done with
the following regex:
sed -e 's/struct rela/struct reloc/g' \
-e 's/\([_\*]\)rela\(s\{0,1\}\)/\1reloc\2/g' \
-e 's/tmprela\(s\{0,1\}\)/tmpreloc\1/g' \
-e 's/relasec/relocsec/g' \
-e 's/rela_list/reloc_list/g' \
-e 's/rela_hash/reloc_hash/g' \
-e 's/add_rela/add_reloc/g' \
-e 's/rela->/reloc->/g' \
-e '/rela[,\.]/{ s/\([^\.>]\)rela\([\.,]\)/\1reloc\2/g ; }' \
-e 's/rela =/reloc =/g' \
-e 's/relas =/relocs =/g' \
-e 's/relas\[/relocs[/g' \
-e 's/relaname =/relocname =/g' \
-e 's/= rela\;/= reloc\;/g' \
-e 's/= relas\;/= relocs\;/g' \
-e 's/= relaname\;/= relocname\;/g' \
-e 's/, rela)/, reloc)/g' \
-e 's/\([ @]\)rela\([ "]\)/\1reloc\2/g' \
-e 's/ rela$/ reloc/g' \
-e 's/, relaname/, relocname/g' \
-e 's/sec->rela/sec->reloc/g' \
-e 's/(\(!\{0,1\}\)rela/(\1reloc/g' \
-i \
arch.h \
arch/x86/decode.c \
check.c \
check.h \
elf.c \
elf.h \
orc_gen.c \
special.c
Notable exceptions which complicate the regex include gelf_*
library calls and standard/expected section names which still use
"rela" because they encode the type of relocation expected. Also, keep
"rela" in the struct because it encodes a specific type of relocation
we currently expect.
It will eventually turn into a member of an anonymous union when a
susequent patch adds implicit addend, or "rel", relocation support.
Signed-off-by: Matt Helsley <mhelsley@vmware.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-05-29 21:01:13 +00:00
|
|
|
} else if (reloc->sym->type == STT_SECTION) {
|
|
|
|
dest_sec = reloc->sym->sec;
|
2023-05-30 17:21:08 +00:00
|
|
|
dest_off = arch_dest_reloc_offset(reloc_addend(reloc));
|
2021-10-26 12:01:33 +00:00
|
|
|
} else if (reloc->sym->retpoline_thunk) {
|
2025-03-24 21:55:59 +00:00
|
|
|
ret = add_retpoline_call(file, insn);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2018-01-11 21:46:23 +00:00
|
|
|
continue;
|
2022-06-14 21:15:38 +00:00
|
|
|
} else if (reloc->sym->return_thunk) {
|
2022-06-14 21:15:48 +00:00
|
|
|
add_return_call(file, insn, true);
|
2022-06-14 21:15:38 +00:00
|
|
|
continue;
|
2025-04-01 04:26:37 +00:00
|
|
|
} else if (func) {
|
2022-04-11 23:10:30 +00:00
|
|
|
/*
|
|
|
|
* External sibling call or internal sibling call with
|
|
|
|
* STT_FUNC reloc.
|
|
|
|
*/
|
2025-03-24 21:55:59 +00:00
|
|
|
ret = add_call_dest(file, insn, reloc->sym, true);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2017-06-28 15:11:05 +00:00
|
|
|
continue;
|
2021-01-21 21:29:22 +00:00
|
|
|
} else if (reloc->sym->sec->idx) {
|
|
|
|
dest_sec = reloc->sym->sec;
|
|
|
|
dest_off = reloc->sym->sym.st_value +
|
2023-05-30 17:21:08 +00:00
|
|
|
arch_dest_reloc_offset(reloc_addend(reloc));
|
2021-01-21 21:29:22 +00:00
|
|
|
} else {
|
|
|
|
/* non-func asm code jumping to another file */
|
|
|
|
continue;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2022-04-11 23:10:30 +00:00
|
|
|
jump_dest = find_insn(file, dest_sec, dest_off);
|
|
|
|
if (!jump_dest) {
|
2022-06-14 21:15:48 +00:00
|
|
|
struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
|
|
|
|
|
|
|
|
/*
|
2023-08-14 11:44:32 +00:00
|
|
|
* This is a special case for retbleed_untrain_ret().
|
2022-06-14 21:15:48 +00:00
|
|
|
* It jumps to __x86_return_thunk(), but objtool
|
|
|
|
* can't find the thunk's starting RET
|
|
|
|
* instruction, because the RET is also in the
|
|
|
|
* middle of another instruction. Objtool only
|
|
|
|
* knows about the outer instruction.
|
|
|
|
*/
|
2023-08-14 11:44:29 +00:00
|
|
|
if (sym && sym->embedded_insn) {
|
2022-06-14 21:15:48 +00:00
|
|
|
add_return_call(file, insn, false);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2025-04-01 04:26:37 +00:00
|
|
|
/*
|
|
|
|
* GCOV/KCOV dead code can jump to the end of the
|
|
|
|
* function/section.
|
|
|
|
*/
|
|
|
|
if (file->ignore_unreachables && func &&
|
|
|
|
dest_sec == insn->sec &&
|
|
|
|
dest_off == func->offset + func->len)
|
|
|
|
continue;
|
|
|
|
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_INSN(insn, "can't find jump dest instruction at %s+0x%lx",
|
|
|
|
dest_sec->name, dest_off);
|
2017-06-28 15:11:05 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2018-06-01 12:23:51 +00:00
|
|
|
|
2023-10-12 02:47:37 +00:00
|
|
|
/*
|
|
|
|
* An intra-TU jump in retpoline.o might not have a relocation
|
|
|
|
* for its jump dest, in which case the above
|
|
|
|
* add_{retpoline,return}_call() didn't happen.
|
|
|
|
*/
|
|
|
|
if (jump_dest->sym && jump_dest->offset == jump_dest->sym->offset) {
|
|
|
|
if (jump_dest->sym->retpoline_thunk) {
|
2025-03-24 21:55:59 +00:00
|
|
|
ret = add_retpoline_call(file, insn);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2023-10-12 02:47:37 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (jump_dest->sym->return_thunk) {
|
|
|
|
add_return_call(file, insn, true);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-01 12:23:51 +00:00
|
|
|
/*
|
2019-03-06 11:58:15 +00:00
|
|
|
* Cross-function jump.
|
2018-06-01 12:23:51 +00:00
|
|
|
*/
|
2025-04-01 04:26:37 +00:00
|
|
|
if (func && insn_func(jump_dest) && func != insn_func(jump_dest)) {
|
2019-03-06 11:58:15 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* For GCC 8+, create parent/child links for any cold
|
|
|
|
* subfunctions. This is _mostly_ redundant with a
|
|
|
|
* similar initialization in read_symbols().
|
|
|
|
*
|
|
|
|
* If a function has aliases, we want the *first* such
|
|
|
|
* function in the symbol table to be the subfunction's
|
|
|
|
* parent. In that case we overwrite the
|
|
|
|
* initialization done in read_symbols().
|
|
|
|
*
|
|
|
|
* However this code can't completely replace the
|
|
|
|
* read_symbols() code because this doesn't detect the
|
|
|
|
* case where the parent function's only reference to a
|
2019-07-18 01:36:53 +00:00
|
|
|
* subfunction is through a jump table.
|
2019-03-06 11:58:15 +00:00
|
|
|
*/
|
2025-04-01 04:26:37 +00:00
|
|
|
if (!strstr(func->name, ".cold") &&
|
2022-09-22 20:03:50 +00:00
|
|
|
strstr(insn_func(jump_dest)->name, ".cold")) {
|
2025-04-01 04:26:37 +00:00
|
|
|
func->cfunc = insn_func(jump_dest);
|
|
|
|
insn_func(jump_dest)->pfunc = func;
|
2019-03-06 11:58:15 +00:00
|
|
|
}
|
2018-06-01 12:23:51 +00:00
|
|
|
}
|
2022-04-11 23:10:30 +00:00
|
|
|
|
2022-07-11 09:49:50 +00:00
|
|
|
if (jump_is_sibling_call(file, insn, jump_dest)) {
|
|
|
|
/*
|
|
|
|
* Internal sibling call without reloc or with
|
|
|
|
* STT_SECTION reloc.
|
|
|
|
*/
|
2025-03-24 21:55:59 +00:00
|
|
|
ret = add_call_dest(file, insn, insn_func(jump_dest), true);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2022-07-11 09:49:50 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2022-04-11 23:10:30 +00:00
|
|
|
insn->jump_dest = jump_dest;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-09-15 07:53:18 +00:00
|
|
|
static struct symbol *find_call_destination(struct section *sec, unsigned long offset)
|
|
|
|
{
|
|
|
|
struct symbol *call_dest;
|
|
|
|
|
|
|
|
call_dest = find_func_by_offset(sec, offset);
|
|
|
|
if (!call_dest)
|
|
|
|
call_dest = find_symbol_by_offset(sec, offset);
|
|
|
|
|
|
|
|
return call_dest;
|
|
|
|
}
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
/*
|
|
|
|
* Find the destination instructions for all calls.
|
|
|
|
*/
|
|
|
|
static int add_call_destinations(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
struct instruction *insn;
|
|
|
|
unsigned long dest_off;
|
2021-06-24 09:41:02 +00:00
|
|
|
struct symbol *dest;
|
objtool: Rename rela to reloc
Before supporting additional relocation types rename the relevant
types and functions from "rela" to "reloc". This work be done with
the following regex:
sed -e 's/struct rela/struct reloc/g' \
-e 's/\([_\*]\)rela\(s\{0,1\}\)/\1reloc\2/g' \
-e 's/tmprela\(s\{0,1\}\)/tmpreloc\1/g' \
-e 's/relasec/relocsec/g' \
-e 's/rela_list/reloc_list/g' \
-e 's/rela_hash/reloc_hash/g' \
-e 's/add_rela/add_reloc/g' \
-e 's/rela->/reloc->/g' \
-e '/rela[,\.]/{ s/\([^\.>]\)rela\([\.,]\)/\1reloc\2/g ; }' \
-e 's/rela =/reloc =/g' \
-e 's/relas =/relocs =/g' \
-e 's/relas\[/relocs[/g' \
-e 's/relaname =/relocname =/g' \
-e 's/= rela\;/= reloc\;/g' \
-e 's/= relas\;/= relocs\;/g' \
-e 's/= relaname\;/= relocname\;/g' \
-e 's/, rela)/, reloc)/g' \
-e 's/\([ @]\)rela\([ "]\)/\1reloc\2/g' \
-e 's/ rela$/ reloc/g' \
-e 's/, relaname/, relocname/g' \
-e 's/sec->rela/sec->reloc/g' \
-e 's/(\(!\{0,1\}\)rela/(\1reloc/g' \
-i \
arch.h \
arch/x86/decode.c \
check.c \
check.h \
elf.c \
elf.h \
orc_gen.c \
special.c
Notable exceptions which complicate the regex include gelf_*
library calls and standard/expected section names which still use
"rela" because they encode the type of relocation expected. Also, keep
"rela" in the struct because it encodes a specific type of relocation
we currently expect.
It will eventually turn into a member of an anonymous union when a
susequent patch adds implicit addend, or "rel", relocation support.
Signed-off-by: Matt Helsley <mhelsley@vmware.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-05-29 21:01:13 +00:00
|
|
|
struct reloc *reloc;
|
2025-03-24 21:55:59 +00:00
|
|
|
int ret;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
|
|
|
for_each_insn(file, insn) {
|
2025-03-24 21:55:53 +00:00
|
|
|
struct symbol *func = insn_func(insn);
|
2017-06-28 15:11:05 +00:00
|
|
|
if (insn->type != INSN_CALL)
|
|
|
|
continue;
|
|
|
|
|
2021-03-26 15:12:13 +00:00
|
|
|
reloc = insn_reloc(file, insn);
|
objtool: Rename rela to reloc
Before supporting additional relocation types rename the relevant
types and functions from "rela" to "reloc". This work be done with
the following regex:
sed -e 's/struct rela/struct reloc/g' \
-e 's/\([_\*]\)rela\(s\{0,1\}\)/\1reloc\2/g' \
-e 's/tmprela\(s\{0,1\}\)/tmpreloc\1/g' \
-e 's/relasec/relocsec/g' \
-e 's/rela_list/reloc_list/g' \
-e 's/rela_hash/reloc_hash/g' \
-e 's/add_rela/add_reloc/g' \
-e 's/rela->/reloc->/g' \
-e '/rela[,\.]/{ s/\([^\.>]\)rela\([\.,]\)/\1reloc\2/g ; }' \
-e 's/rela =/reloc =/g' \
-e 's/relas =/relocs =/g' \
-e 's/relas\[/relocs[/g' \
-e 's/relaname =/relocname =/g' \
-e 's/= rela\;/= reloc\;/g' \
-e 's/= relas\;/= relocs\;/g' \
-e 's/= relaname\;/= relocname\;/g' \
-e 's/, rela)/, reloc)/g' \
-e 's/\([ @]\)rela\([ "]\)/\1reloc\2/g' \
-e 's/ rela$/ reloc/g' \
-e 's/, relaname/, relocname/g' \
-e 's/sec->rela/sec->reloc/g' \
-e 's/(\(!\{0,1\}\)rela/(\1reloc/g' \
-i \
arch.h \
arch/x86/decode.c \
check.c \
check.h \
elf.c \
elf.h \
orc_gen.c \
special.c
Notable exceptions which complicate the regex include gelf_*
library calls and standard/expected section names which still use
"rela" because they encode the type of relocation expected. Also, keep
"rela" in the struct because it encodes a specific type of relocation
we currently expect.
It will eventually turn into a member of an anonymous union when a
susequent patch adds implicit addend, or "rel", relocation support.
Signed-off-by: Matt Helsley <mhelsley@vmware.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-05-29 21:01:13 +00:00
|
|
|
if (!reloc) {
|
2020-03-27 15:28:45 +00:00
|
|
|
dest_off = arch_jump_destination(insn);
|
2021-06-24 09:41:02 +00:00
|
|
|
dest = find_call_destination(insn->sec, dest_off);
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
ret = add_call_dest(file, insn, dest, false);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2018-01-30 04:00:39 +00:00
|
|
|
|
2025-03-24 21:55:53 +00:00
|
|
|
if (func && func->ignore)
|
2020-02-18 03:41:54 +00:00
|
|
|
continue;
|
|
|
|
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
if (!insn_call_dest(insn)) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_INSN(insn, "unannotated intra-function call");
|
2017-06-28 15:11:05 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2018-01-30 04:00:39 +00:00
|
|
|
|
2025-03-24 21:55:53 +00:00
|
|
|
if (func && insn_call_dest(insn)->type != STT_FUNC) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_INSN(insn, "unsupported call to non-function");
|
2020-02-18 03:41:54 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
objtool: Rename rela to reloc
Before supporting additional relocation types rename the relevant
types and functions from "rela" to "reloc". This work be done with
the following regex:
sed -e 's/struct rela/struct reloc/g' \
-e 's/\([_\*]\)rela\(s\{0,1\}\)/\1reloc\2/g' \
-e 's/tmprela\(s\{0,1\}\)/tmpreloc\1/g' \
-e 's/relasec/relocsec/g' \
-e 's/rela_list/reloc_list/g' \
-e 's/rela_hash/reloc_hash/g' \
-e 's/add_rela/add_reloc/g' \
-e 's/rela->/reloc->/g' \
-e '/rela[,\.]/{ s/\([^\.>]\)rela\([\.,]\)/\1reloc\2/g ; }' \
-e 's/rela =/reloc =/g' \
-e 's/relas =/relocs =/g' \
-e 's/relas\[/relocs[/g' \
-e 's/relaname =/relocname =/g' \
-e 's/= rela\;/= reloc\;/g' \
-e 's/= relas\;/= relocs\;/g' \
-e 's/= relaname\;/= relocname\;/g' \
-e 's/, rela)/, reloc)/g' \
-e 's/\([ @]\)rela\([ "]\)/\1reloc\2/g' \
-e 's/ rela$/ reloc/g' \
-e 's/, relaname/, relocname/g' \
-e 's/sec->rela/sec->reloc/g' \
-e 's/(\(!\{0,1\}\)rela/(\1reloc/g' \
-i \
arch.h \
arch/x86/decode.c \
check.c \
check.h \
elf.c \
elf.h \
orc_gen.c \
special.c
Notable exceptions which complicate the regex include gelf_*
library calls and standard/expected section names which still use
"rela" because they encode the type of relocation expected. Also, keep
"rela" in the struct because it encodes a specific type of relocation
we currently expect.
It will eventually turn into a member of an anonymous union when a
susequent patch adds implicit addend, or "rel", relocation support.
Signed-off-by: Matt Helsley <mhelsley@vmware.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-05-29 21:01:13 +00:00
|
|
|
} else if (reloc->sym->type == STT_SECTION) {
|
2023-05-30 17:21:08 +00:00
|
|
|
dest_off = arch_dest_reloc_offset(reloc_addend(reloc));
|
2021-06-24 09:41:02 +00:00
|
|
|
dest = find_call_destination(reloc->sym->sec, dest_off);
|
|
|
|
if (!dest) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_INSN(insn, "can't find call dest symbol at %s+0x%lx",
|
|
|
|
reloc->sym->sec->name, dest_off);
|
2017-06-28 15:11:05 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2021-03-26 15:12:03 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
ret = add_call_dest(file, insn, dest, false);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2021-06-24 09:41:02 +00:00
|
|
|
|
2021-10-26 12:01:33 +00:00
|
|
|
} else if (reloc->sym->retpoline_thunk) {
|
2025-03-24 21:55:59 +00:00
|
|
|
ret = add_retpoline_call(file, insn);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2021-03-26 15:12:03 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
} else {
|
|
|
|
ret = add_call_dest(file, insn, reloc->sym, false);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
* The .alternatives section requires some extra special care over and above
|
|
|
|
* other special sections because alternatives are patched in place.
|
2017-06-28 15:11:05 +00:00
|
|
|
*/
|
|
|
|
static int handle_group_alt(struct objtool_file *file,
|
|
|
|
struct special_alt *special_alt,
|
|
|
|
struct instruction *orig_insn,
|
|
|
|
struct instruction **new_insn)
|
|
|
|
{
|
2023-02-08 17:18:03 +00:00
|
|
|
struct instruction *last_new_insn = NULL, *insn, *nop = NULL;
|
2020-12-18 20:19:32 +00:00
|
|
|
struct alt_group *orig_alt_group, *new_alt_group;
|
2017-06-28 15:11:05 +00:00
|
|
|
unsigned long dest_off;
|
|
|
|
|
2023-02-08 17:18:03 +00:00
|
|
|
orig_alt_group = orig_insn->alt_group;
|
2020-12-18 20:19:32 +00:00
|
|
|
if (!orig_alt_group) {
|
2023-02-08 17:18:03 +00:00
|
|
|
struct instruction *last_orig_insn = NULL;
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
orig_alt_group = calloc(1, sizeof(*orig_alt_group));
|
2023-02-08 17:18:03 +00:00
|
|
|
if (!orig_alt_group) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_GLIBC("calloc");
|
2023-02-08 17:18:03 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
orig_alt_group->cfi = calloc(special_alt->orig_len,
|
|
|
|
sizeof(struct cfi_state *));
|
|
|
|
if (!orig_alt_group->cfi) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_GLIBC("calloc");
|
2023-02-08 17:18:03 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2023-02-08 17:18:03 +00:00
|
|
|
insn = orig_insn;
|
|
|
|
sec_for_each_insn_from(file, insn) {
|
|
|
|
if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
|
|
|
|
break;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2023-02-08 17:18:03 +00:00
|
|
|
insn->alt_group = orig_alt_group;
|
|
|
|
last_orig_insn = insn;
|
|
|
|
}
|
|
|
|
orig_alt_group->orig_group = NULL;
|
|
|
|
orig_alt_group->first_insn = orig_insn;
|
|
|
|
orig_alt_group->last_insn = last_orig_insn;
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
orig_alt_group->nop = NULL;
|
2025-03-24 21:55:54 +00:00
|
|
|
orig_alt_group->ignore = orig_insn->ignore_alts;
|
2023-02-08 17:18:03 +00:00
|
|
|
} else {
|
|
|
|
if (orig_alt_group->last_insn->offset + orig_alt_group->last_insn->len -
|
|
|
|
orig_alt_group->first_insn->offset != special_alt->orig_len) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_INSN(orig_insn, "weirdly overlapping alternative! %ld != %d",
|
|
|
|
orig_alt_group->last_insn->offset +
|
|
|
|
orig_alt_group->last_insn->len -
|
|
|
|
orig_alt_group->first_insn->offset,
|
|
|
|
special_alt->orig_len);
|
2023-02-08 17:18:03 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
2018-01-30 04:00:40 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
new_alt_group = calloc(1, sizeof(*new_alt_group));
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
if (!new_alt_group) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_GLIBC("calloc");
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
return -1;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
if (special_alt->new_len < special_alt->orig_len) {
|
|
|
|
/*
|
|
|
|
* Insert a fake nop at the end to make the replacement
|
|
|
|
* alt_group the same size as the original. This is needed to
|
|
|
|
* allow propagate_alt_cfi() to do its magic. When the last
|
|
|
|
* instruction affects the stack, the instruction after it (the
|
|
|
|
* nop) will propagate the new state to the shared CFI array.
|
|
|
|
*/
|
2025-03-24 21:55:59 +00:00
|
|
|
nop = calloc(1, sizeof(*nop));
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
if (!nop) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_GLIBC("calloc");
|
2018-01-30 04:00:40 +00:00
|
|
|
return -1;
|
|
|
|
}
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
memset(nop, 0, sizeof(*nop));
|
|
|
|
|
|
|
|
nop->sec = special_alt->new_sec;
|
|
|
|
nop->offset = special_alt->new_off + special_alt->new_len;
|
|
|
|
nop->len = special_alt->orig_len - special_alt->new_len;
|
|
|
|
nop->type = INSN_NOP;
|
2022-09-22 20:03:50 +00:00
|
|
|
nop->sym = orig_insn->sym;
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
nop->alt_group = new_alt_group;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
2018-01-30 04:00:40 +00:00
|
|
|
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
if (!special_alt->new_len) {
|
|
|
|
*new_insn = nop;
|
|
|
|
goto end;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
insn = *new_insn;
|
|
|
|
sec_for_each_insn_from(file, insn) {
|
2020-09-04 15:30:23 +00:00
|
|
|
struct reloc *alt_reloc;
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
if (insn->offset >= special_alt->new_off + special_alt->new_len)
|
|
|
|
break;
|
|
|
|
|
|
|
|
last_new_insn = insn;
|
|
|
|
|
2022-09-22 20:03:50 +00:00
|
|
|
insn->sym = orig_insn->sym;
|
2020-12-18 20:19:32 +00:00
|
|
|
insn->alt_group = new_alt_group;
|
2018-01-30 04:00:39 +00:00
|
|
|
|
2020-02-10 18:32:40 +00:00
|
|
|
/*
|
|
|
|
* Since alternative replacement code is copy/pasted by the
|
|
|
|
* kernel after applying relocations, generally such code can't
|
|
|
|
* have relative-address relocation references to outside the
|
|
|
|
* .altinstr_replacement section, unless the arch's
|
|
|
|
* alternatives code can adjust the relative offsets
|
|
|
|
* accordingly.
|
|
|
|
*/
|
2021-03-26 15:12:13 +00:00
|
|
|
alt_reloc = insn_reloc(file, insn);
|
2022-09-15 11:11:07 +00:00
|
|
|
if (alt_reloc && arch_pc_relative_reloc(alt_reloc) &&
|
2020-09-04 15:30:23 +00:00
|
|
|
!arch_support_alt_relocation(special_alt, insn, alt_reloc)) {
|
2020-02-10 18:32:40 +00:00
|
|
|
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_INSN(insn, "unsupported relocation in alternatives section");
|
2020-02-10 18:32:40 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2020-02-10 18:32:39 +00:00
|
|
|
if (!is_static_jump(insn))
|
2017-06-28 15:11:05 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!insn->immediate)
|
|
|
|
continue;
|
|
|
|
|
2020-03-27 15:28:45 +00:00
|
|
|
dest_off = arch_jump_destination(insn);
|
2022-04-11 23:10:31 +00:00
|
|
|
if (dest_off == special_alt->new_off + special_alt->new_len) {
|
2023-02-08 17:18:03 +00:00
|
|
|
insn->jump_dest = next_insn_same_sec(file, orig_alt_group->last_insn);
|
2022-04-11 23:10:31 +00:00
|
|
|
if (!insn->jump_dest) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_INSN(insn, "can't find alternative jump destination");
|
2022-04-11 23:10:31 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!last_new_insn) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_FUNC(special_alt->new_sec, special_alt->new_off,
|
|
|
|
"can't find last new alternative instruction");
|
2017-06-28 15:11:05 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
end:
|
2020-12-18 20:19:32 +00:00
|
|
|
new_alt_group->orig_group = orig_alt_group;
|
|
|
|
new_alt_group->first_insn = *new_insn;
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
new_alt_group->last_insn = last_new_insn;
|
|
|
|
new_alt_group->nop = nop;
|
2025-03-24 21:55:54 +00:00
|
|
|
new_alt_group->ignore = (*new_insn)->ignore_alts;
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
new_alt_group->cfi = orig_alt_group->cfi;
|
2017-06-28 15:11:05 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A jump table entry can either convert a nop to a jump or a jump to a nop.
|
|
|
|
* If the original instruction is a jump, make the alt entry an effective nop
|
|
|
|
* by just skipping the original instruction.
|
|
|
|
*/
|
|
|
|
static int handle_jump_alt(struct objtool_file *file,
|
|
|
|
struct special_alt *special_alt,
|
|
|
|
struct instruction *orig_insn,
|
|
|
|
struct instruction **new_insn)
|
|
|
|
{
|
2021-05-13 14:15:50 +00:00
|
|
|
if (orig_insn->type != INSN_JUMP_UNCONDITIONAL &&
|
|
|
|
orig_insn->type != INSN_NOP) {
|
2021-05-06 19:34:04 +00:00
|
|
|
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_INSN(orig_insn, "unsupported instruction at jump label");
|
2017-06-28 15:11:05 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2022-04-18 16:50:39 +00:00
|
|
|
if (opts.hack_jump_label && special_alt->key_addend & 2) {
|
2021-05-06 19:34:03 +00:00
|
|
|
struct reloc *reloc = insn_reloc(file, orig_insn);
|
|
|
|
|
2023-05-30 17:21:12 +00:00
|
|
|
if (reloc)
|
|
|
|
set_reloc_type(file->elf, reloc, R_NONE);
|
2025-03-24 21:55:59 +00:00
|
|
|
|
|
|
|
if (elf_write_insn(file->elf, orig_insn->sec,
|
|
|
|
orig_insn->offset, orig_insn->len,
|
|
|
|
arch_nop_insn(orig_insn->len))) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2021-05-06 19:34:03 +00:00
|
|
|
orig_insn->type = INSN_NOP;
|
2021-05-13 14:15:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (orig_insn->type == INSN_NOP) {
|
|
|
|
if (orig_insn->len == 2)
|
|
|
|
file->jl_nop_short++;
|
|
|
|
else
|
|
|
|
file->jl_nop_long++;
|
|
|
|
|
|
|
|
return 0;
|
2021-05-06 19:34:03 +00:00
|
|
|
}
|
|
|
|
|
2021-05-06 19:34:04 +00:00
|
|
|
if (orig_insn->len == 2)
|
|
|
|
file->jl_short++;
|
|
|
|
else
|
|
|
|
file->jl_long++;
|
|
|
|
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
*new_insn = next_insn_same_sec(file, orig_insn);
|
2017-06-28 15:11:05 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read all the special sections which have alternate instructions which can be
|
|
|
|
* patched in or redirected to at runtime. Each instruction having alternate
|
|
|
|
* instruction(s) has them added to its insn->alts list, which will be
|
|
|
|
* traversed in validate_branch().
|
|
|
|
*/
|
|
|
|
static int add_special_section_alts(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
struct list_head special_alts;
|
|
|
|
struct instruction *orig_insn, *new_insn;
|
|
|
|
struct special_alt *special_alt, *tmp;
|
|
|
|
struct alternative *alt;
|
|
|
|
int ret;
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
if (special_get_alts(file->elf, &special_alts))
|
|
|
|
return -1;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
|
|
|
list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
|
|
|
|
|
|
|
|
orig_insn = find_insn(file, special_alt->orig_sec,
|
|
|
|
special_alt->orig_off);
|
|
|
|
if (!orig_insn) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_FUNC(special_alt->orig_sec, special_alt->orig_off,
|
|
|
|
"special: can't find orig instruction");
|
2025-03-24 21:55:59 +00:00
|
|
|
return -1;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
new_insn = NULL;
|
|
|
|
if (!special_alt->group || special_alt->new_len) {
|
|
|
|
new_insn = find_insn(file, special_alt->new_sec,
|
|
|
|
special_alt->new_off);
|
|
|
|
if (!new_insn) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_FUNC(special_alt->new_sec, special_alt->new_off,
|
|
|
|
"special: can't find new instruction");
|
2025-03-24 21:55:59 +00:00
|
|
|
return -1;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (special_alt->group) {
|
2020-03-27 15:28:41 +00:00
|
|
|
if (!special_alt->orig_len) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_INSN(orig_insn, "empty alternative entry");
|
2020-03-27 15:28:41 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
ret = handle_group_alt(file, special_alt, orig_insn,
|
|
|
|
&new_insn);
|
|
|
|
if (ret)
|
2025-03-24 21:55:59 +00:00
|
|
|
return ret;
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
} else if (special_alt->jump_or_nop) {
|
|
|
|
ret = handle_jump_alt(file, special_alt, orig_insn,
|
|
|
|
&new_insn);
|
|
|
|
if (ret)
|
2025-03-24 21:55:59 +00:00
|
|
|
return ret;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
alt = calloc(1, sizeof(*alt));
|
2018-01-11 21:46:24 +00:00
|
|
|
if (!alt) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_GLIBC("calloc");
|
2025-03-24 21:55:59 +00:00
|
|
|
return -1;
|
2018-01-11 21:46:24 +00:00
|
|
|
}
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
alt->insn = new_insn;
|
objtool: Make instruction::alts a single-linked list
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
unsigned int len; /* 64 4 */
enum insn_type type; /* 68 4 */
long unsigned int immediate; /* 72 8 */
u16 dead_end:1; /* 80: 0 2 */
u16 ignore:1; /* 80: 1 2 */
u16 ignore_alts:1; /* 80: 2 2 */
u16 hint:1; /* 80: 3 2 */
u16 save:1; /* 80: 4 2 */
u16 restore:1; /* 80: 5 2 */
u16 retpoline_safe:1; /* 80: 6 2 */
u16 noendbr:1; /* 80: 7 2 */
u16 entry:1; /* 80: 8 2 */
/* XXX 7 bits hole, try to pack */
s8 instr; /* 82 1 */
u8 visited; /* 83 1 */
/* XXX 4 bytes hole, try to pack */
struct alt_group * alt_group; /* 88 8 */
struct symbol * call_dest; /* 96 8 */
struct instruction * jump_dest; /* 104 8 */
struct instruction * first_jump_src; /* 112 8 */
struct reloc * jump_table; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
struct reloc * reloc; /* 128 8 */
- struct list_head alts; /* 136 16 */
- struct symbol * sym; /* 152 8 */
- struct stack_op * stack_ops; /* 160 8 */
- struct cfi_state * cfi; /* 168 8 */
+ struct alternative * alts; /* 136 8 */
+ struct symbol * sym; /* 144 8 */
+ struct stack_op * stack_ops; /* 152 8 */
+ struct cfi_state * cfi; /* 160 8 */
- /* size: 176, cachelines: 3, members: 29 */
- /* sum members: 170, holes: 1, sum holes: 4 */
+ /* size: 168, cachelines: 3, members: 29 */
+ /* sum members: 162, holes: 1, sum holes: 4 */
/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
- /* last cacheline: 48 bytes */
+ /* last cacheline: 40 bytes */
};
pre: 5:58.50 real, 229.64 user, 128.65 sys, 26221520 mem
post: 5:48.86 real, 220.30 user, 128.34 sys, 24834672 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.430556498@infradead.org
2023-02-08 17:17:59 +00:00
|
|
|
alt->next = orig_insn->alts;
|
|
|
|
orig_insn->alts = alt;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
|
|
|
list_del(&special_alt->list);
|
|
|
|
free(special_alt);
|
|
|
|
}
|
|
|
|
|
2022-04-18 16:50:26 +00:00
|
|
|
if (opts.stats) {
|
2021-05-06 19:34:04 +00:00
|
|
|
printf("jl\\\tNOP\tJMP\n");
|
|
|
|
printf("short:\t%ld\t%ld\n", file->jl_nop_short, file->jl_short);
|
|
|
|
printf("long:\t%ld\t%ld\n", file->jl_nop_long, file->jl_long);
|
|
|
|
}
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
return 0;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
objtool: Handle PC relative relocation type
For the most part, an absolute relocation type is used for rodata.
In the case of STT_SECTION, reloc->sym->offset is always zero, for
the other symbol types, reloc_addend(reloc) is always zero, thus it
can use a simple statement "reloc->sym->offset + reloc_addend(reloc)"
to obtain the symbol offset for various symbol types.
When compiling on LoongArch, there exist PC relative relocation types
for rodata, it needs to calculate the symbol offset with "S + A - PC"
according to the spec of "ELF for the LoongArch Architecture".
If there is only one jump table in the rodata, the "PC" is the entry
address which is equal with the value of reloc_offset(reloc), at this
time, reloc_offset(table) is 0.
If there are many jump tables in the rodata, the "PC" is the offset
of the jump table's base address which is equal with the value of
reloc_offset(reloc) - reloc_offset(table).
So for LoongArch, if the relocation type is PC relative, it can use a
statement "reloc_offset(reloc) - reloc_offset(table)" to get the "PC"
value when calculating the symbol offset with "S + A - PC" for one or
many jump tables in the rodata.
Add an arch-specific function arch_jump_table_sym_offset() to assign
the symbol offset, for the most part that is an absolute relocation,
the default value is "reloc->sym->offset + reloc_addend(reloc)" in
the weak definition, it can be overridden by each architecture that
has different requirements.
Link: https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Link: https://lore.kernel.org/r/20250211115016.26913-4-yangtiezhu@loongson.cn
Acked-by: Huacai Chen <chenhuacai@loongson.cn>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
2025-02-11 11:50:12 +00:00
|
|
|
__weak unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table)
|
|
|
|
{
|
|
|
|
return reloc->sym->offset + reloc_addend(reloc);
|
|
|
|
}
|
|
|
|
|
2025-03-24 21:55:51 +00:00
|
|
|
static int add_jump_table(struct objtool_file *file, struct instruction *insn)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
2024-10-11 17:08:50 +00:00
|
|
|
unsigned long table_size = insn_jump_table_size(insn);
|
2022-09-22 20:03:50 +00:00
|
|
|
struct symbol *pfunc = insn_func(insn)->pfunc;
|
2023-05-30 17:21:09 +00:00
|
|
|
struct reloc *table = insn_jump_table(insn);
|
|
|
|
struct instruction *dest_insn;
|
2018-05-10 22:48:49 +00:00
|
|
|
unsigned int prev_offset = 0;
|
2023-05-30 17:21:09 +00:00
|
|
|
struct reloc *reloc = table;
|
|
|
|
struct alternative *alt;
|
2025-02-11 11:50:10 +00:00
|
|
|
unsigned long sym_offset;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2019-07-18 01:36:53 +00:00
|
|
|
/*
|
objtool: Rename rela to reloc
Before supporting additional relocation types rename the relevant
types and functions from "rela" to "reloc". This work be done with
the following regex:
sed -e 's/struct rela/struct reloc/g' \
-e 's/\([_\*]\)rela\(s\{0,1\}\)/\1reloc\2/g' \
-e 's/tmprela\(s\{0,1\}\)/tmpreloc\1/g' \
-e 's/relasec/relocsec/g' \
-e 's/rela_list/reloc_list/g' \
-e 's/rela_hash/reloc_hash/g' \
-e 's/add_rela/add_reloc/g' \
-e 's/rela->/reloc->/g' \
-e '/rela[,\.]/{ s/\([^\.>]\)rela\([\.,]\)/\1reloc\2/g ; }' \
-e 's/rela =/reloc =/g' \
-e 's/relas =/relocs =/g' \
-e 's/relas\[/relocs[/g' \
-e 's/relaname =/relocname =/g' \
-e 's/= rela\;/= reloc\;/g' \
-e 's/= relas\;/= relocs\;/g' \
-e 's/= relaname\;/= relocname\;/g' \
-e 's/, rela)/, reloc)/g' \
-e 's/\([ @]\)rela\([ "]\)/\1reloc\2/g' \
-e 's/ rela$/ reloc/g' \
-e 's/, relaname/, relocname/g' \
-e 's/sec->rela/sec->reloc/g' \
-e 's/(\(!\{0,1\}\)rela/(\1reloc/g' \
-i \
arch.h \
arch/x86/decode.c \
check.c \
check.h \
elf.c \
elf.h \
orc_gen.c \
special.c
Notable exceptions which complicate the regex include gelf_*
library calls and standard/expected section names which still use
"rela" because they encode the type of relocation expected. Also, keep
"rela" in the struct because it encodes a specific type of relocation
we currently expect.
It will eventually turn into a member of an anonymous union when a
susequent patch adds implicit addend, or "rel", relocation support.
Signed-off-by: Matt Helsley <mhelsley@vmware.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-05-29 21:01:13 +00:00
|
|
|
* Each @reloc is a switch table relocation which points to the target
|
2019-07-18 01:36:53 +00:00
|
|
|
* instruction.
|
|
|
|
*/
|
2023-05-30 17:21:02 +00:00
|
|
|
for_each_reloc_from(table->sec, reloc) {
|
objtool: Support repeated uses of the same C jump table
This fixes objtool for both a GCC issue and a Clang issue:
1) GCC issue:
kernel/bpf/core.o: warning: objtool: ___bpf_prog_run()+0x8d5: sibling call from callable instruction with modified stack frame
With CONFIG_RETPOLINE=n, GCC is doing the following optimization in
___bpf_prog_run().
Before:
select_insn:
jmp *jumptable(,%rax,8)
...
ALU64_ADD_X:
...
jmp select_insn
ALU_ADD_X:
...
jmp select_insn
After:
select_insn:
jmp *jumptable(, %rax, 8)
...
ALU64_ADD_X:
...
jmp *jumptable(, %rax, 8)
ALU_ADD_X:
...
jmp *jumptable(, %rax, 8)
This confuses objtool. It has never seen multiple indirect jump
sites which use the same jump table.
For GCC switch tables, the only way of detecting the size of a table
is by continuing to scan for more tables. The size of the previous
table can only be determined after another switch table is found, or
when the scan reaches the end of the function.
That logic was reused for C jump tables, and was based on the
assumption that each jump table only has a single jump site. The
above optimization breaks that assumption.
2) Clang issue:
drivers/usb/misc/sisusbvga/sisusb.o: warning: objtool: sisusb_write_mem_bulk()+0x588: can't find switch jump table
With clang 9, code can be generated where a function contains two
indirect jump instructions which use the same switch table.
The fix is the same for both issues: split the jump table parsing into
two passes.
In the first pass, locate the heads of all switch tables for the
function and mark their locations.
In the second pass, parse the switch tables and add them.
Fixes: e55a73251da3 ("bpf: Fix ORC unwinding in non-JIT BPF code")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/e995befaada9d4d8b2cf788ff3f566ba900d2b4d.1563413318.git.jpoimboe@redhat.com
Co-developed-by: Josh Poimboeuf <jpoimboe@redhat.com>
2019-07-18 01:36:54 +00:00
|
|
|
|
|
|
|
/* Check for the end of the table: */
|
2024-10-11 17:08:50 +00:00
|
|
|
if (table_size && reloc_offset(reloc) - reloc_offset(table) >= table_size)
|
|
|
|
break;
|
2025-03-24 21:55:51 +00:00
|
|
|
if (reloc != table && is_jump_table(reloc))
|
2017-06-28 15:11:05 +00:00
|
|
|
break;
|
|
|
|
|
2019-07-18 01:36:53 +00:00
|
|
|
/* Make sure the table entries are consecutive: */
|
2025-02-11 11:50:11 +00:00
|
|
|
if (prev_offset && reloc_offset(reloc) != prev_offset + arch_reloc_size(reloc))
|
2018-05-10 22:48:49 +00:00
|
|
|
break;
|
|
|
|
|
objtool: Handle PC relative relocation type
For the most part, an absolute relocation type is used for rodata.
In the case of STT_SECTION, reloc->sym->offset is always zero, for
the other symbol types, reloc_addend(reloc) is always zero, thus it
can use a simple statement "reloc->sym->offset + reloc_addend(reloc)"
to obtain the symbol offset for various symbol types.
When compiling on LoongArch, there exist PC relative relocation types
for rodata, it needs to calculate the symbol offset with "S + A - PC"
according to the spec of "ELF for the LoongArch Architecture".
If there is only one jump table in the rodata, the "PC" is the entry
address which is equal with the value of reloc_offset(reloc), at this
time, reloc_offset(table) is 0.
If there are many jump tables in the rodata, the "PC" is the offset
of the jump table's base address which is equal with the value of
reloc_offset(reloc) - reloc_offset(table).
So for LoongArch, if the relocation type is PC relative, it can use a
statement "reloc_offset(reloc) - reloc_offset(table)" to get the "PC"
value when calculating the symbol offset with "S + A - PC" for one or
many jump tables in the rodata.
Add an arch-specific function arch_jump_table_sym_offset() to assign
the symbol offset, for the most part that is an absolute relocation,
the default value is "reloc->sym->offset + reloc_addend(reloc)" in
the weak definition, it can be overridden by each architecture that
has different requirements.
Link: https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Link: https://lore.kernel.org/r/20250211115016.26913-4-yangtiezhu@loongson.cn
Acked-by: Huacai Chen <chenhuacai@loongson.cn>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
2025-02-11 11:50:12 +00:00
|
|
|
sym_offset = arch_jump_table_sym_offset(reloc, table);
|
2025-02-11 11:50:10 +00:00
|
|
|
|
2018-05-10 22:48:49 +00:00
|
|
|
/* Detect function pointers from contiguous objects: */
|
2025-02-11 11:50:10 +00:00
|
|
|
if (reloc->sym->sec == pfunc->sec && sym_offset == pfunc->offset)
|
2018-05-10 22:48:49 +00:00
|
|
|
break;
|
|
|
|
|
2025-01-14 21:57:58 +00:00
|
|
|
/*
|
|
|
|
* Clang sometimes leaves dangling unused jump table entries
|
|
|
|
* which point to the end of the function. Ignore them.
|
|
|
|
*/
|
|
|
|
if (reloc->sym->sec == pfunc->sec &&
|
2025-02-11 11:50:10 +00:00
|
|
|
sym_offset == pfunc->offset + pfunc->len)
|
2025-01-14 21:57:58 +00:00
|
|
|
goto next;
|
|
|
|
|
2025-02-11 11:50:10 +00:00
|
|
|
dest_insn = find_insn(file, reloc->sym->sec, sym_offset);
|
2019-07-18 01:36:53 +00:00
|
|
|
if (!dest_insn)
|
2017-06-28 15:11:05 +00:00
|
|
|
break;
|
|
|
|
|
2019-07-18 01:36:53 +00:00
|
|
|
/* Make sure the destination is in the same function: */
|
2022-09-22 20:03:50 +00:00
|
|
|
if (!insn_func(dest_insn) || insn_func(dest_insn)->pfunc != pfunc)
|
2018-05-10 03:39:15 +00:00
|
|
|
break;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
alt = calloc(1, sizeof(*alt));
|
2017-06-28 15:11:05 +00:00
|
|
|
if (!alt) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_GLIBC("calloc");
|
2017-06-28 15:11:05 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2019-07-18 01:36:53 +00:00
|
|
|
alt->insn = dest_insn;
|
objtool: Make instruction::alts a single-linked list
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
unsigned int len; /* 64 4 */
enum insn_type type; /* 68 4 */
long unsigned int immediate; /* 72 8 */
u16 dead_end:1; /* 80: 0 2 */
u16 ignore:1; /* 80: 1 2 */
u16 ignore_alts:1; /* 80: 2 2 */
u16 hint:1; /* 80: 3 2 */
u16 save:1; /* 80: 4 2 */
u16 restore:1; /* 80: 5 2 */
u16 retpoline_safe:1; /* 80: 6 2 */
u16 noendbr:1; /* 80: 7 2 */
u16 entry:1; /* 80: 8 2 */
/* XXX 7 bits hole, try to pack */
s8 instr; /* 82 1 */
u8 visited; /* 83 1 */
/* XXX 4 bytes hole, try to pack */
struct alt_group * alt_group; /* 88 8 */
struct symbol * call_dest; /* 96 8 */
struct instruction * jump_dest; /* 104 8 */
struct instruction * first_jump_src; /* 112 8 */
struct reloc * jump_table; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
struct reloc * reloc; /* 128 8 */
- struct list_head alts; /* 136 16 */
- struct symbol * sym; /* 152 8 */
- struct stack_op * stack_ops; /* 160 8 */
- struct cfi_state * cfi; /* 168 8 */
+ struct alternative * alts; /* 136 8 */
+ struct symbol * sym; /* 144 8 */
+ struct stack_op * stack_ops; /* 152 8 */
+ struct cfi_state * cfi; /* 160 8 */
- /* size: 176, cachelines: 3, members: 29 */
- /* sum members: 170, holes: 1, sum holes: 4 */
+ /* size: 168, cachelines: 3, members: 29 */
+ /* sum members: 162, holes: 1, sum holes: 4 */
/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
- /* last cacheline: 48 bytes */
+ /* last cacheline: 40 bytes */
};
pre: 5:58.50 real, 229.64 user, 128.65 sys, 26221520 mem
post: 5:48.86 real, 220.30 user, 128.34 sys, 24834672 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.430556498@infradead.org
2023-02-08 17:17:59 +00:00
|
|
|
alt->next = insn->alts;
|
|
|
|
insn->alts = alt;
|
2025-01-14 21:57:58 +00:00
|
|
|
next:
|
2023-05-30 17:21:06 +00:00
|
|
|
prev_offset = reloc_offset(reloc);
|
2018-05-10 22:48:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!prev_offset) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_INSN(insn, "can't find switch jump table");
|
2018-05-10 22:48:49 +00:00
|
|
|
return -1;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2020-09-04 15:30:24 +00:00
|
|
|
* find_jump_table() - Given a dynamic jump, find the switch jump table
|
|
|
|
* associated with it.
|
2017-06-28 15:11:05 +00:00
|
|
|
*/
|
2024-10-11 17:08:50 +00:00
|
|
|
static void find_jump_table(struct objtool_file *file, struct symbol *func,
|
|
|
|
struct instruction *insn)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
2020-09-04 15:30:24 +00:00
|
|
|
struct reloc *table_reloc;
|
2020-02-18 03:41:53 +00:00
|
|
|
struct instruction *dest_insn, *orig_insn = insn;
|
2024-10-11 17:08:50 +00:00
|
|
|
unsigned long table_size;
|
2025-02-11 11:50:10 +00:00
|
|
|
unsigned long sym_offset;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2018-02-08 13:02:32 +00:00
|
|
|
/*
|
|
|
|
* Backward search using the @first_jump_src links, these help avoid
|
|
|
|
* much of the 'in between' code. Which avoids us getting confused by
|
|
|
|
* it.
|
|
|
|
*/
|
2018-05-18 20:10:34 +00:00
|
|
|
for (;
|
2022-09-22 20:03:50 +00:00
|
|
|
insn && insn_func(insn) && insn_func(insn)->pfunc == func;
|
2020-04-28 21:45:16 +00:00
|
|
|
insn = insn->first_jump_src ?: prev_insn_same_sym(file, insn)) {
|
2018-02-08 13:02:32 +00:00
|
|
|
|
2018-05-18 20:10:34 +00:00
|
|
|
if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC)
|
2017-06-28 15:11:05 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
/* allow small jumps within the range */
|
|
|
|
if (insn->type == INSN_JUMP_UNCONDITIONAL &&
|
|
|
|
insn->jump_dest &&
|
|
|
|
(insn->jump_dest->offset <= insn->offset ||
|
|
|
|
insn->jump_dest->offset > orig_insn->offset))
|
2025-03-24 21:55:59 +00:00
|
|
|
break;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2024-10-11 17:08:50 +00:00
|
|
|
table_reloc = arch_find_switch_table(file, insn, &table_size);
|
objtool: Rename rela to reloc
Before supporting additional relocation types rename the relevant
types and functions from "rela" to "reloc". This work be done with
the following regex:
sed -e 's/struct rela/struct reloc/g' \
-e 's/\([_\*]\)rela\(s\{0,1\}\)/\1reloc\2/g' \
-e 's/tmprela\(s\{0,1\}\)/tmpreloc\1/g' \
-e 's/relasec/relocsec/g' \
-e 's/rela_list/reloc_list/g' \
-e 's/rela_hash/reloc_hash/g' \
-e 's/add_rela/add_reloc/g' \
-e 's/rela->/reloc->/g' \
-e '/rela[,\.]/{ s/\([^\.>]\)rela\([\.,]\)/\1reloc\2/g ; }' \
-e 's/rela =/reloc =/g' \
-e 's/relas =/relocs =/g' \
-e 's/relas\[/relocs[/g' \
-e 's/relaname =/relocname =/g' \
-e 's/= rela\;/= reloc\;/g' \
-e 's/= relas\;/= relocs\;/g' \
-e 's/= relaname\;/= relocname\;/g' \
-e 's/, rela)/, reloc)/g' \
-e 's/\([ @]\)rela\([ "]\)/\1reloc\2/g' \
-e 's/ rela$/ reloc/g' \
-e 's/, relaname/, relocname/g' \
-e 's/sec->rela/sec->reloc/g' \
-e 's/(\(!\{0,1\}\)rela/(\1reloc/g' \
-i \
arch.h \
arch/x86/decode.c \
check.c \
check.h \
elf.c \
elf.h \
orc_gen.c \
special.c
Notable exceptions which complicate the regex include gelf_*
library calls and standard/expected section names which still use
"rela" because they encode the type of relocation expected. Also, keep
"rela" in the struct because it encodes a specific type of relocation
we currently expect.
It will eventually turn into a member of an anonymous union when a
susequent patch adds implicit addend, or "rel", relocation support.
Signed-off-by: Matt Helsley <mhelsley@vmware.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-05-29 21:01:13 +00:00
|
|
|
if (!table_reloc)
|
2019-07-18 01:36:53 +00:00
|
|
|
continue;
|
2025-02-11 11:50:10 +00:00
|
|
|
|
|
|
|
sym_offset = table_reloc->sym->offset + reloc_addend(table_reloc);
|
|
|
|
|
|
|
|
dest_insn = find_insn(file, table_reloc->sym->sec, sym_offset);
|
2022-09-22 20:03:50 +00:00
|
|
|
if (!dest_insn || !insn_func(dest_insn) || insn_func(dest_insn)->pfunc != func)
|
2020-02-18 03:41:53 +00:00
|
|
|
continue;
|
2018-05-18 20:10:34 +00:00
|
|
|
|
2025-03-24 21:55:51 +00:00
|
|
|
set_jump_table(table_reloc);
|
2024-10-11 17:08:50 +00:00
|
|
|
orig_insn->_jump_table = table_reloc;
|
|
|
|
orig_insn->_jump_table_size = table_size;
|
2025-03-24 21:55:51 +00:00
|
|
|
|
2024-10-11 17:08:50 +00:00
|
|
|
break;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
objtool: Support repeated uses of the same C jump table
This fixes objtool for both a GCC issue and a Clang issue:
1) GCC issue:
kernel/bpf/core.o: warning: objtool: ___bpf_prog_run()+0x8d5: sibling call from callable instruction with modified stack frame
With CONFIG_RETPOLINE=n, GCC is doing the following optimization in
___bpf_prog_run().
Before:
select_insn:
jmp *jumptable(,%rax,8)
...
ALU64_ADD_X:
...
jmp select_insn
ALU_ADD_X:
...
jmp select_insn
After:
select_insn:
jmp *jumptable(, %rax, 8)
...
ALU64_ADD_X:
...
jmp *jumptable(, %rax, 8)
ALU_ADD_X:
...
jmp *jumptable(, %rax, 8)
This confuses objtool. It has never seen multiple indirect jump
sites which use the same jump table.
For GCC switch tables, the only way of detecting the size of a table
is by continuing to scan for more tables. The size of the previous
table can only be determined after another switch table is found, or
when the scan reaches the end of the function.
That logic was reused for C jump tables, and was based on the
assumption that each jump table only has a single jump site. The
above optimization breaks that assumption.
2) Clang issue:
drivers/usb/misc/sisusbvga/sisusb.o: warning: objtool: sisusb_write_mem_bulk()+0x588: can't find switch jump table
With clang 9, code can be generated where a function contains two
indirect jump instructions which use the same switch table.
The fix is the same for both issues: split the jump table parsing into
two passes.
In the first pass, locate the heads of all switch tables for the
function and mark their locations.
In the second pass, parse the switch tables and add them.
Fixes: e55a73251da3 ("bpf: Fix ORC unwinding in non-JIT BPF code")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/e995befaada9d4d8b2cf788ff3f566ba900d2b4d.1563413318.git.jpoimboe@redhat.com
Co-developed-by: Josh Poimboeuf <jpoimboe@redhat.com>
2019-07-18 01:36:54 +00:00
|
|
|
/*
|
|
|
|
* First pass: Mark the head of each jump table so that in the next pass,
|
|
|
|
* we know when a given jump table ends and the next one starts.
|
|
|
|
*/
|
|
|
|
static void mark_func_jump_tables(struct objtool_file *file,
|
|
|
|
struct symbol *func)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
objtool: Support repeated uses of the same C jump table
This fixes objtool for both a GCC issue and a Clang issue:
1) GCC issue:
kernel/bpf/core.o: warning: objtool: ___bpf_prog_run()+0x8d5: sibling call from callable instruction with modified stack frame
With CONFIG_RETPOLINE=n, GCC is doing the following optimization in
___bpf_prog_run().
Before:
select_insn:
jmp *jumptable(,%rax,8)
...
ALU64_ADD_X:
...
jmp select_insn
ALU_ADD_X:
...
jmp select_insn
After:
select_insn:
jmp *jumptable(, %rax, 8)
...
ALU64_ADD_X:
...
jmp *jumptable(, %rax, 8)
ALU_ADD_X:
...
jmp *jumptable(, %rax, 8)
This confuses objtool. It has never seen multiple indirect jump
sites which use the same jump table.
For GCC switch tables, the only way of detecting the size of a table
is by continuing to scan for more tables. The size of the previous
table can only be determined after another switch table is found, or
when the scan reaches the end of the function.
That logic was reused for C jump tables, and was based on the
assumption that each jump table only has a single jump site. The
above optimization breaks that assumption.
2) Clang issue:
drivers/usb/misc/sisusbvga/sisusb.o: warning: objtool: sisusb_write_mem_bulk()+0x588: can't find switch jump table
With clang 9, code can be generated where a function contains two
indirect jump instructions which use the same switch table.
The fix is the same for both issues: split the jump table parsing into
two passes.
In the first pass, locate the heads of all switch tables for the
function and mark their locations.
In the second pass, parse the switch tables and add them.
Fixes: e55a73251da3 ("bpf: Fix ORC unwinding in non-JIT BPF code")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/e995befaada9d4d8b2cf788ff3f566ba900d2b4d.1563413318.git.jpoimboe@redhat.com
Co-developed-by: Josh Poimboeuf <jpoimboe@redhat.com>
2019-07-18 01:36:54 +00:00
|
|
|
struct instruction *insn, *last = NULL;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2020-03-10 17:27:24 +00:00
|
|
|
func_for_each_insn(file, func, insn) {
|
2018-02-08 13:02:32 +00:00
|
|
|
if (!last)
|
|
|
|
last = insn;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Store back-pointers for unconditional forward jumps such
|
2019-07-18 01:36:53 +00:00
|
|
|
* that find_jump_table() can back-track using those and
|
2018-02-08 13:02:32 +00:00
|
|
|
* avoid some potentially confusing code.
|
|
|
|
*/
|
|
|
|
if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest &&
|
|
|
|
insn->offset > last->offset &&
|
|
|
|
insn->jump_dest->offset > insn->offset &&
|
|
|
|
!insn->jump_dest->first_jump_src) {
|
|
|
|
|
|
|
|
insn->jump_dest->first_jump_src = insn;
|
|
|
|
last = insn->jump_dest;
|
|
|
|
}
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
if (insn->type != INSN_JUMP_DYNAMIC)
|
|
|
|
continue;
|
|
|
|
|
2024-10-11 17:08:50 +00:00
|
|
|
find_jump_table(file, func, insn);
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
objtool: Support repeated uses of the same C jump table
This fixes objtool for both a GCC issue and a Clang issue:
1) GCC issue:
kernel/bpf/core.o: warning: objtool: ___bpf_prog_run()+0x8d5: sibling call from callable instruction with modified stack frame
With CONFIG_RETPOLINE=n, GCC is doing the following optimization in
___bpf_prog_run().
Before:
select_insn:
jmp *jumptable(,%rax,8)
...
ALU64_ADD_X:
...
jmp select_insn
ALU_ADD_X:
...
jmp select_insn
After:
select_insn:
jmp *jumptable(, %rax, 8)
...
ALU64_ADD_X:
...
jmp *jumptable(, %rax, 8)
ALU_ADD_X:
...
jmp *jumptable(, %rax, 8)
This confuses objtool. It has never seen multiple indirect jump
sites which use the same jump table.
For GCC switch tables, the only way of detecting the size of a table
is by continuing to scan for more tables. The size of the previous
table can only be determined after another switch table is found, or
when the scan reaches the end of the function.
That logic was reused for C jump tables, and was based on the
assumption that each jump table only has a single jump site. The
above optimization breaks that assumption.
2) Clang issue:
drivers/usb/misc/sisusbvga/sisusb.o: warning: objtool: sisusb_write_mem_bulk()+0x588: can't find switch jump table
With clang 9, code can be generated where a function contains two
indirect jump instructions which use the same switch table.
The fix is the same for both issues: split the jump table parsing into
two passes.
In the first pass, locate the heads of all switch tables for the
function and mark their locations.
In the second pass, parse the switch tables and add them.
Fixes: e55a73251da3 ("bpf: Fix ORC unwinding in non-JIT BPF code")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/e995befaada9d4d8b2cf788ff3f566ba900d2b4d.1563413318.git.jpoimboe@redhat.com
Co-developed-by: Josh Poimboeuf <jpoimboe@redhat.com>
2019-07-18 01:36:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int add_func_jump_tables(struct objtool_file *file,
|
|
|
|
struct symbol *func)
|
|
|
|
{
|
2025-03-24 21:55:51 +00:00
|
|
|
struct instruction *insn;
|
|
|
|
int ret;
|
objtool: Support repeated uses of the same C jump table
This fixes objtool for both a GCC issue and a Clang issue:
1) GCC issue:
kernel/bpf/core.o: warning: objtool: ___bpf_prog_run()+0x8d5: sibling call from callable instruction with modified stack frame
With CONFIG_RETPOLINE=n, GCC is doing the following optimization in
___bpf_prog_run().
Before:
select_insn:
jmp *jumptable(,%rax,8)
...
ALU64_ADD_X:
...
jmp select_insn
ALU_ADD_X:
...
jmp select_insn
After:
select_insn:
jmp *jumptable(, %rax, 8)
...
ALU64_ADD_X:
...
jmp *jumptable(, %rax, 8)
ALU_ADD_X:
...
jmp *jumptable(, %rax, 8)
This confuses objtool. It has never seen multiple indirect jump
sites which use the same jump table.
For GCC switch tables, the only way of detecting the size of a table
is by continuing to scan for more tables. The size of the previous
table can only be determined after another switch table is found, or
when the scan reaches the end of the function.
That logic was reused for C jump tables, and was based on the
assumption that each jump table only has a single jump site. The
above optimization breaks that assumption.
2) Clang issue:
drivers/usb/misc/sisusbvga/sisusb.o: warning: objtool: sisusb_write_mem_bulk()+0x588: can't find switch jump table
With clang 9, code can be generated where a function contains two
indirect jump instructions which use the same switch table.
The fix is the same for both issues: split the jump table parsing into
two passes.
In the first pass, locate the heads of all switch tables for the
function and mark their locations.
In the second pass, parse the switch tables and add them.
Fixes: e55a73251da3 ("bpf: Fix ORC unwinding in non-JIT BPF code")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/e995befaada9d4d8b2cf788ff3f566ba900d2b4d.1563413318.git.jpoimboe@redhat.com
Co-developed-by: Josh Poimboeuf <jpoimboe@redhat.com>
2019-07-18 01:36:54 +00:00
|
|
|
|
2020-03-10 17:27:24 +00:00
|
|
|
func_for_each_insn(file, func, insn) {
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
if (!insn_jump_table(insn))
|
objtool: Support repeated uses of the same C jump table
This fixes objtool for both a GCC issue and a Clang issue:
1) GCC issue:
kernel/bpf/core.o: warning: objtool: ___bpf_prog_run()+0x8d5: sibling call from callable instruction with modified stack frame
With CONFIG_RETPOLINE=n, GCC is doing the following optimization in
___bpf_prog_run().
Before:
select_insn:
jmp *jumptable(,%rax,8)
...
ALU64_ADD_X:
...
jmp select_insn
ALU_ADD_X:
...
jmp select_insn
After:
select_insn:
jmp *jumptable(, %rax, 8)
...
ALU64_ADD_X:
...
jmp *jumptable(, %rax, 8)
ALU_ADD_X:
...
jmp *jumptable(, %rax, 8)
This confuses objtool. It has never seen multiple indirect jump
sites which use the same jump table.
For GCC switch tables, the only way of detecting the size of a table
is by continuing to scan for more tables. The size of the previous
table can only be determined after another switch table is found, or
when the scan reaches the end of the function.
That logic was reused for C jump tables, and was based on the
assumption that each jump table only has a single jump site. The
above optimization breaks that assumption.
2) Clang issue:
drivers/usb/misc/sisusbvga/sisusb.o: warning: objtool: sisusb_write_mem_bulk()+0x588: can't find switch jump table
With clang 9, code can be generated where a function contains two
indirect jump instructions which use the same switch table.
The fix is the same for both issues: split the jump table parsing into
two passes.
In the first pass, locate the heads of all switch tables for the
function and mark their locations.
In the second pass, parse the switch tables and add them.
Fixes: e55a73251da3 ("bpf: Fix ORC unwinding in non-JIT BPF code")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/e995befaada9d4d8b2cf788ff3f566ba900d2b4d.1563413318.git.jpoimboe@redhat.com
Co-developed-by: Josh Poimboeuf <jpoimboe@redhat.com>
2019-07-18 01:36:54 +00:00
|
|
|
continue;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2025-03-24 21:55:51 +00:00
|
|
|
ret = add_jump_table(file, insn);
|
2017-06-28 15:11:05 +00:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2025-03-24 21:55:51 +00:00
|
|
|
return 0;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For some switch statements, gcc generates a jump table in the .rodata
|
|
|
|
* section which contains a list of addresses within the function to jump to.
|
|
|
|
* This finds these jump tables and adds them to the insn->alts lists.
|
|
|
|
*/
|
2019-07-18 01:36:53 +00:00
|
|
|
static int add_jump_table_alts(struct objtool_file *file)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
|
|
|
struct symbol *func;
|
|
|
|
int ret;
|
|
|
|
|
2018-09-07 13:12:01 +00:00
|
|
|
if (!file->rodata)
|
2017-06-28 15:11:05 +00:00
|
|
|
return 0;
|
|
|
|
|
2023-04-12 19:03:19 +00:00
|
|
|
for_each_sym(file, func) {
|
|
|
|
if (func->type != STT_FUNC)
|
|
|
|
continue;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2023-04-12 19:03:19 +00:00
|
|
|
mark_func_jump_tables(file, func);
|
|
|
|
ret = add_func_jump_tables(file, func);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-01-21 21:29:24 +00:00
|
|
|
static void set_func_state(struct cfi_state *state)
|
|
|
|
{
|
|
|
|
state->cfa = initial_func_cfi.cfa;
|
|
|
|
memcpy(&state->regs, &initial_func_cfi.regs,
|
|
|
|
CFI_NUM_REGS * sizeof(struct cfi_reg));
|
|
|
|
state->stack_size = initial_func_cfi.cfa.offset;
|
2023-03-01 15:13:12 +00:00
|
|
|
state->type = UNWIND_HINT_TYPE_CALL;
|
2021-01-21 21:29:24 +00:00
|
|
|
}
|
|
|
|
|
2017-07-11 15:33:43 +00:00
|
|
|
static int read_unwind_hints(struct objtool_file *file)
|
|
|
|
{
|
2021-06-24 09:41:01 +00:00
|
|
|
struct cfi_state cfi = init_cfi;
|
2023-05-30 17:20:55 +00:00
|
|
|
struct section *sec;
|
2017-07-11 15:33:43 +00:00
|
|
|
struct unwind_hint *hint;
|
|
|
|
struct instruction *insn;
|
2021-06-24 09:41:01 +00:00
|
|
|
struct reloc *reloc;
|
2024-03-11 14:23:47 +00:00
|
|
|
unsigned long offset;
|
2017-07-11 15:33:43 +00:00
|
|
|
int i;
|
|
|
|
|
|
|
|
sec = find_section_by_name(file->elf, ".discard.unwind_hints");
|
|
|
|
if (!sec)
|
|
|
|
return 0;
|
|
|
|
|
2023-05-30 17:20:55 +00:00
|
|
|
if (!sec->rsec) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR("missing .rela.discard.unwind_hints section");
|
2017-07-11 15:33:43 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2021-08-22 22:50:37 +00:00
|
|
|
if (sec->sh.sh_size % sizeof(struct unwind_hint)) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR("struct unwind_hint size mismatch");
|
2017-07-11 15:33:43 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
file->hints = true;
|
|
|
|
|
2021-08-22 22:50:37 +00:00
|
|
|
for (i = 0; i < sec->sh.sh_size / sizeof(struct unwind_hint); i++) {
|
2017-07-11 15:33:43 +00:00
|
|
|
hint = (struct unwind_hint *)sec->data->d_buf + i;
|
|
|
|
|
objtool: Rename rela to reloc
Before supporting additional relocation types rename the relevant
types and functions from "rela" to "reloc". This work be done with
the following regex:
sed -e 's/struct rela/struct reloc/g' \
-e 's/\([_\*]\)rela\(s\{0,1\}\)/\1reloc\2/g' \
-e 's/tmprela\(s\{0,1\}\)/tmpreloc\1/g' \
-e 's/relasec/relocsec/g' \
-e 's/rela_list/reloc_list/g' \
-e 's/rela_hash/reloc_hash/g' \
-e 's/add_rela/add_reloc/g' \
-e 's/rela->/reloc->/g' \
-e '/rela[,\.]/{ s/\([^\.>]\)rela\([\.,]\)/\1reloc\2/g ; }' \
-e 's/rela =/reloc =/g' \
-e 's/relas =/relocs =/g' \
-e 's/relas\[/relocs[/g' \
-e 's/relaname =/relocname =/g' \
-e 's/= rela\;/= reloc\;/g' \
-e 's/= relas\;/= relocs\;/g' \
-e 's/= relaname\;/= relocname\;/g' \
-e 's/, rela)/, reloc)/g' \
-e 's/\([ @]\)rela\([ "]\)/\1reloc\2/g' \
-e 's/ rela$/ reloc/g' \
-e 's/, relaname/, relocname/g' \
-e 's/sec->rela/sec->reloc/g' \
-e 's/(\(!\{0,1\}\)rela/(\1reloc/g' \
-i \
arch.h \
arch/x86/decode.c \
check.c \
check.h \
elf.c \
elf.h \
orc_gen.c \
special.c
Notable exceptions which complicate the regex include gelf_*
library calls and standard/expected section names which still use
"rela" because they encode the type of relocation expected. Also, keep
"rela" in the struct because it encodes a specific type of relocation
we currently expect.
It will eventually turn into a member of an anonymous union when a
susequent patch adds implicit addend, or "rel", relocation support.
Signed-off-by: Matt Helsley <mhelsley@vmware.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-05-29 21:01:13 +00:00
|
|
|
reloc = find_reloc_by_dest(file->elf, sec, i * sizeof(*hint));
|
|
|
|
if (!reloc) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR("can't find reloc for unwind_hints[%d]", i);
|
2017-07-11 15:33:43 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2024-03-11 14:23:47 +00:00
|
|
|
if (reloc->sym->type == STT_SECTION) {
|
|
|
|
offset = reloc_addend(reloc);
|
|
|
|
} else if (reloc->sym->local_label) {
|
|
|
|
offset = reloc->sym->offset;
|
|
|
|
} else {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR("unexpected relocation symbol type in %s", sec->rsec->name);
|
2024-03-11 14:23:47 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
insn = find_insn(file, reloc->sym->sec, offset);
|
2017-07-11 15:33:43 +00:00
|
|
|
if (!insn) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR("can't find insn for unwind_hints[%d]", i);
|
2017-07-11 15:33:43 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2021-01-21 21:29:24 +00:00
|
|
|
insn->hint = true;
|
2017-07-11 15:33:43 +00:00
|
|
|
|
2023-06-05 16:12:21 +00:00
|
|
|
if (hint->type == UNWIND_HINT_TYPE_UNDEFINED) {
|
|
|
|
insn->cfi = &force_undefined_cfi;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2022-06-24 10:52:40 +00:00
|
|
|
if (hint->type == UNWIND_HINT_TYPE_SAVE) {
|
|
|
|
insn->hint = false;
|
|
|
|
insn->save = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
|
|
|
|
insn->restore = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2022-06-14 21:16:03 +00:00
|
|
|
if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
|
2022-03-08 15:30:54 +00:00
|
|
|
struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
|
|
|
|
|
2022-06-14 21:16:03 +00:00
|
|
|
if (sym && sym->bind == STB_GLOBAL) {
|
|
|
|
if (opts.ibt && insn->type != INSN_ENDBR && !insn->noendbr) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_INSN(insn, "UNWIND_HINT_IRET_REGS without ENDBR");
|
2025-03-24 21:55:59 +00:00
|
|
|
return -1;
|
2022-06-14 21:16:03 +00:00
|
|
|
}
|
2022-03-08 15:30:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-21 21:29:24 +00:00
|
|
|
if (hint->type == UNWIND_HINT_TYPE_FUNC) {
|
2021-06-24 09:41:01 +00:00
|
|
|
insn->cfi = &func_cfi;
|
2017-07-11 15:33:43 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2021-06-24 09:41:01 +00:00
|
|
|
if (insn->cfi)
|
|
|
|
cfi = *(insn->cfi);
|
|
|
|
|
|
|
|
if (arch_decode_hint_reg(hint->sp_reg, &cfi.cfa.base)) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_INSN(insn, "unsupported unwind_hint sp base reg %d", hint->sp_reg);
|
2017-07-11 15:33:43 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2022-11-14 17:57:47 +00:00
|
|
|
cfi.cfa.offset = bswap_if_needed(file->elf, hint->sp_offset);
|
2021-06-24 09:41:01 +00:00
|
|
|
cfi.type = hint->type;
|
2023-02-16 20:34:41 +00:00
|
|
|
cfi.signal = hint->signal;
|
2021-06-24 09:41:01 +00:00
|
|
|
|
|
|
|
insn->cfi = cfi_hash_find_or_add(&cfi);
|
2017-07-11 15:33:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-11-28 09:38:58 +00:00
|
|
|
static int read_annotate(struct objtool_file *file,
|
|
|
|
int (*func)(struct objtool_file *file, int type, struct instruction *insn))
|
2022-03-08 15:30:52 +00:00
|
|
|
{
|
2024-11-28 09:38:52 +00:00
|
|
|
struct section *sec;
|
2022-03-08 15:30:52 +00:00
|
|
|
struct instruction *insn;
|
|
|
|
struct reloc *reloc;
|
2024-11-28 09:39:05 +00:00
|
|
|
uint64_t offset;
|
2024-11-28 09:38:54 +00:00
|
|
|
int type, ret;
|
2022-03-08 15:30:52 +00:00
|
|
|
|
2024-11-28 09:38:52 +00:00
|
|
|
sec = find_section_by_name(file->elf, ".discard.annotate_insn");
|
|
|
|
if (!sec)
|
2022-03-08 15:30:52 +00:00
|
|
|
return 0;
|
|
|
|
|
2024-11-28 09:38:52 +00:00
|
|
|
if (!sec->rsec)
|
|
|
|
return 0;
|
2022-03-08 15:30:52 +00:00
|
|
|
|
2024-11-28 09:38:52 +00:00
|
|
|
if (sec->sh.sh_entsize != 8) {
|
|
|
|
static bool warned = false;
|
2025-02-06 10:12:08 +00:00
|
|
|
if (!warned && opts.verbose) {
|
2024-11-28 09:38:52 +00:00
|
|
|
WARN("%s: dodgy linker, sh_entsize != 8", sec->name);
|
|
|
|
warned = true;
|
|
|
|
}
|
|
|
|
sec->sh.sh_entsize = 8;
|
2022-03-08 15:30:52 +00:00
|
|
|
}
|
|
|
|
|
2024-11-28 09:38:52 +00:00
|
|
|
for_each_reloc(sec->rsec, reloc) {
|
|
|
|
type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4);
|
2025-06-30 13:12:30 +00:00
|
|
|
type = bswap_if_needed(file->elf, type);
|
2018-01-16 09:24:06 +00:00
|
|
|
|
2024-11-28 09:39:05 +00:00
|
|
|
offset = reloc->sym->offset + reloc_addend(reloc);
|
|
|
|
insn = find_insn(file, reloc->sym->sec, offset);
|
2018-01-16 09:24:06 +00:00
|
|
|
|
|
|
|
if (!insn) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR("bad .discard.annotate_insn entry: %d of type %d", reloc_idx(reloc), type);
|
2018-01-16 09:24:06 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2024-11-28 09:38:58 +00:00
|
|
|
ret = func(file, type, insn);
|
2024-11-28 09:38:54 +00:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
2018-01-16 09:24:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-11-28 09:38:59 +00:00
|
|
|
static int __annotate_early(struct objtool_file *file, int type, struct instruction *insn)
|
2020-03-10 17:57:41 +00:00
|
|
|
{
|
2024-11-28 09:38:59 +00:00
|
|
|
switch (type) {
|
2025-03-24 21:55:54 +00:00
|
|
|
|
|
|
|
/* Must be before add_special_section_alts() */
|
2024-11-28 09:38:59 +00:00
|
|
|
case ANNOTYPE_IGNORE_ALTS:
|
|
|
|
insn->ignore_alts = true;
|
|
|
|
break;
|
2020-03-10 17:57:41 +00:00
|
|
|
|
2024-11-28 09:38:59 +00:00
|
|
|
/*
|
|
|
|
* Must be before read_unwind_hints() since that needs insn->noendbr.
|
|
|
|
*/
|
|
|
|
case ANNOTYPE_NOENDBR:
|
|
|
|
insn->noendbr = 1;
|
|
|
|
break;
|
2020-03-10 17:57:41 +00:00
|
|
|
|
2024-11-28 09:38:59 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2020-03-10 17:57:41 +00:00
|
|
|
|
2024-11-28 09:38:54 +00:00
|
|
|
return 0;
|
2022-03-08 15:30:52 +00:00
|
|
|
}
|
2020-03-10 17:57:41 +00:00
|
|
|
|
2024-11-28 09:38:58 +00:00
|
|
|
static int __annotate_ifc(struct objtool_file *file, int type, struct instruction *insn)
|
|
|
|
{
|
|
|
|
unsigned long dest_off;
|
2020-03-10 17:57:41 +00:00
|
|
|
|
2024-11-28 09:38:58 +00:00
|
|
|
if (type != ANNOTYPE_INTRA_FUNCTION_CALL)
|
2020-03-10 17:57:41 +00:00
|
|
|
return 0;
|
|
|
|
|
2024-11-28 09:38:58 +00:00
|
|
|
if (insn->type != INSN_CALL) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_INSN(insn, "intra_function_call not a direct call");
|
2024-11-28 09:38:58 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2020-03-10 17:57:41 +00:00
|
|
|
|
2024-11-28 09:38:58 +00:00
|
|
|
/*
|
|
|
|
* Treat intra-function CALLs as JMPs, but with a stack_op.
|
|
|
|
* See add_call_destinations(), which strips stack_ops from
|
|
|
|
* normal CALLs.
|
|
|
|
*/
|
|
|
|
insn->type = INSN_JUMP_UNCONDITIONAL;
|
2020-03-10 17:57:41 +00:00
|
|
|
|
2024-11-28 09:38:58 +00:00
|
|
|
dest_off = arch_jump_destination(insn);
|
|
|
|
insn->jump_dest = find_insn(file, insn->sec, dest_off);
|
|
|
|
if (!insn->jump_dest) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_INSN(insn, "can't find call dest at %s+0x%lx",
|
|
|
|
insn->sec->name, dest_off);
|
2024-11-28 09:38:58 +00:00
|
|
|
return -1;
|
2020-03-10 17:57:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-11-28 09:38:59 +00:00
|
|
|
static int __annotate_late(struct objtool_file *file, int type, struct instruction *insn)
|
2023-03-01 15:13:11 +00:00
|
|
|
{
|
2024-11-28 09:38:59 +00:00
|
|
|
switch (type) {
|
2024-11-28 09:39:07 +00:00
|
|
|
case ANNOTYPE_NOENDBR:
|
|
|
|
/* early */
|
|
|
|
break;
|
2023-03-01 15:13:11 +00:00
|
|
|
|
2024-11-28 09:38:59 +00:00
|
|
|
case ANNOTYPE_RETPOLINE_SAFE:
|
|
|
|
if (insn->type != INSN_JUMP_DYNAMIC &&
|
|
|
|
insn->type != INSN_CALL_DYNAMIC &&
|
|
|
|
insn->type != INSN_RETURN &&
|
|
|
|
insn->type != INSN_NOP) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop");
|
2023-03-01 15:13:11 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2024-11-28 09:38:59 +00:00
|
|
|
insn->retpoline_safe = true;
|
|
|
|
break;
|
2020-04-14 10:36:12 +00:00
|
|
|
|
2024-11-28 09:38:55 +00:00
|
|
|
case ANNOTYPE_INSTR_BEGIN:
|
|
|
|
insn->instr++;
|
|
|
|
break;
|
2020-04-14 10:36:12 +00:00
|
|
|
|
2024-11-28 09:38:55 +00:00
|
|
|
case ANNOTYPE_INSTR_END:
|
2020-03-10 17:57:41 +00:00
|
|
|
insn->instr--;
|
2024-11-28 09:38:55 +00:00
|
|
|
break;
|
2020-04-14 10:36:12 +00:00
|
|
|
|
2024-11-28 09:38:59 +00:00
|
|
|
case ANNOTYPE_UNRET_BEGIN:
|
|
|
|
insn->unret = 1;
|
|
|
|
break;
|
2020-04-14 10:36:12 +00:00
|
|
|
|
2024-11-28 09:39:07 +00:00
|
|
|
case ANNOTYPE_IGNORE_ALTS:
|
|
|
|
/* early */
|
|
|
|
break;
|
2020-04-14 10:36:12 +00:00
|
|
|
|
2024-11-28 09:39:07 +00:00
|
|
|
case ANNOTYPE_INTRA_FUNCTION_CALL:
|
|
|
|
/* ifc */
|
|
|
|
break;
|
2020-04-14 10:36:12 +00:00
|
|
|
|
2024-11-28 09:39:05 +00:00
|
|
|
case ANNOTYPE_REACHABLE:
|
|
|
|
insn->dead_end = false;
|
|
|
|
break;
|
2020-04-14 10:36:12 +00:00
|
|
|
|
2024-11-28 09:38:55 +00:00
|
|
|
default:
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_INSN(insn, "Unknown annotation type: %d", type);
|
2025-03-24 21:55:59 +00:00
|
|
|
return -1;
|
2020-04-14 10:36:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-11-30 11:44:31 +00:00
|
|
|
/*
|
|
|
|
* Return true if name matches an instrumentation function, where calls to that
|
|
|
|
* function from noinstr code can safely be removed, but compilers won't do so.
|
|
|
|
*/
|
|
|
|
static bool is_profiling_func(const char *name)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Many compilers cannot disable KCOV with a function attribute.
|
|
|
|
*/
|
|
|
|
if (!strncmp(name, "__sanitizer_cov_", 16))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Some compilers currently do not remove __tsan_func_entry/exit nor
|
|
|
|
* __tsan_atomic_signal_fence (used for barrier instrumentation) with
|
|
|
|
* the __no_sanitize_thread attribute, remove them. Once the kernel's
|
|
|
|
* minimum Clang version is 14.0, this can be removed.
|
|
|
|
*/
|
|
|
|
if (!strncmp(name, "__tsan_func_", 12) ||
|
|
|
|
!strcmp(name, "__tsan_atomic_signal_fence"))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-10-26 12:01:33 +00:00
|
|
|
static int classify_symbols(struct objtool_file *file)
|
2020-08-18 13:57:45 +00:00
|
|
|
{
|
|
|
|
struct symbol *func;
|
|
|
|
|
2023-04-12 19:03:19 +00:00
|
|
|
for_each_sym(file, func) {
|
2024-03-11 14:23:47 +00:00
|
|
|
if (func->type == STT_NOTYPE && strstarts(func->name, ".L"))
|
|
|
|
func->local_label = true;
|
|
|
|
|
2023-04-12 19:03:19 +00:00
|
|
|
if (func->bind != STB_GLOBAL)
|
|
|
|
continue;
|
2021-10-26 12:01:33 +00:00
|
|
|
|
2023-04-12 19:03:19 +00:00
|
|
|
if (!strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
|
|
|
|
strlen(STATIC_CALL_TRAMP_PREFIX_STR)))
|
|
|
|
func->static_call_tramp = true;
|
2021-10-26 12:01:33 +00:00
|
|
|
|
2023-04-12 19:03:19 +00:00
|
|
|
if (arch_is_retpoline(func))
|
|
|
|
func->retpoline_thunk = true;
|
2021-10-26 12:01:33 +00:00
|
|
|
|
2023-04-12 19:03:19 +00:00
|
|
|
if (arch_is_rethunk(func))
|
|
|
|
func->return_thunk = true;
|
2022-06-14 21:15:38 +00:00
|
|
|
|
2023-08-14 11:44:29 +00:00
|
|
|
if (arch_is_embedded_insn(func))
|
|
|
|
func->embedded_insn = true;
|
|
|
|
|
2023-04-12 19:03:19 +00:00
|
|
|
if (arch_ftrace_match(func->name))
|
|
|
|
func->fentry = true;
|
2021-10-26 12:01:33 +00:00
|
|
|
|
2023-04-12 19:03:19 +00:00
|
|
|
if (is_profiling_func(func->name))
|
|
|
|
func->profiling_func = true;
|
2020-08-18 13:57:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-09-07 13:12:01 +00:00
|
|
|
static void mark_rodata(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
struct section *sec;
|
|
|
|
bool found = false;
|
|
|
|
|
|
|
|
/*
|
2019-06-28 01:50:46 +00:00
|
|
|
* Search for the following rodata sections, each of which can
|
|
|
|
* potentially contain jump tables:
|
|
|
|
*
|
|
|
|
* - .rodata: can contain GCC switch tables
|
|
|
|
* - .rodata.<func>: same, if -fdata-sections is being used
|
objtool: Fix C jump table annotations for Clang
A C jump table (such as the one used by the BPF interpreter) is a const
global array of absolute code addresses, and this means that the actual
values in the table may not be known until the kernel is booted (e.g.,
when using KASLR or when the kernel VA space is sized dynamically).
When using PIE codegen, the compiler will default to placing such const
global objects in .data.rel.ro (which is annotated as writable), rather
than .rodata (which is annotated as read-only). As C jump tables are
explicitly emitted into .rodata, this used to result in warnings for
LoongArch builds (which uses PIE codegen for the entire kernel) like
Warning: setting incorrect section attributes for .rodata..c_jump_table
due to the fact that the explicitly specified .rodata section inherited
the read-write annotation that the compiler uses for such objects when
using PIE codegen.
This warning was suppressed by explicitly adding the read-only
annotation to the __attribute__((section(""))) string, by commit
c5b1184decc8 ("compiler.h: specify correct attribute for .rodata..c_jump_table")
Unfortunately, this hack does not work on Clang's integrated assembler,
which happily interprets the appended section type and permission
specifiers as part of the section name, which therefore no longer
matches the hard-coded pattern '.rodata..c_jump_table' that objtool
expects, causing it to emit a warning
kernel/bpf/core.o: warning: objtool: ___bpf_prog_run+0x20: sibling call from callable instruction with modified stack frame
Work around this, by emitting C jump tables into .data.rel.ro instead,
which is treated as .rodata by the linker script for all builds, not
just PIE based ones.
Fixes: c5b1184decc8 ("compiler.h: specify correct attribute for .rodata..c_jump_table")
Tested-by: Tiezhu Yang <yangtiezhu@loongson.cn> # on LoongArch
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250221135704.431269-6-ardb+git@google.com
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
2025-02-21 13:57:07 +00:00
|
|
|
* - .data.rel.ro.c_jump_table: contains C annotated jump tables
|
2019-06-28 01:50:46 +00:00
|
|
|
*
|
|
|
|
* .rodata.str1.* sections are ignored; they don't contain jump tables.
|
2018-09-07 13:12:01 +00:00
|
|
|
*/
|
|
|
|
for_each_sec(file, sec) {
|
objtool: Fix C jump table annotations for Clang
A C jump table (such as the one used by the BPF interpreter) is a const
global array of absolute code addresses, and this means that the actual
values in the table may not be known until the kernel is booted (e.g.,
when using KASLR or when the kernel VA space is sized dynamically).
When using PIE codegen, the compiler will default to placing such const
global objects in .data.rel.ro (which is annotated as writable), rather
than .rodata (which is annotated as read-only). As C jump tables are
explicitly emitted into .rodata, this used to result in warnings for
LoongArch builds (which uses PIE codegen for the entire kernel) like
Warning: setting incorrect section attributes for .rodata..c_jump_table
due to the fact that the explicitly specified .rodata section inherited
the read-write annotation that the compiler uses for such objects when
using PIE codegen.
This warning was suppressed by explicitly adding the read-only
annotation to the __attribute__((section(""))) string, by commit
c5b1184decc8 ("compiler.h: specify correct attribute for .rodata..c_jump_table")
Unfortunately, this hack does not work on Clang's integrated assembler,
which happily interprets the appended section type and permission
specifiers as part of the section name, which therefore no longer
matches the hard-coded pattern '.rodata..c_jump_table' that objtool
expects, causing it to emit a warning
kernel/bpf/core.o: warning: objtool: ___bpf_prog_run+0x20: sibling call from callable instruction with modified stack frame
Work around this, by emitting C jump tables into .data.rel.ro instead,
which is treated as .rodata by the linker script for all builds, not
just PIE based ones.
Fixes: c5b1184decc8 ("compiler.h: specify correct attribute for .rodata..c_jump_table")
Tested-by: Tiezhu Yang <yangtiezhu@loongson.cn> # on LoongArch
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250221135704.431269-6-ardb+git@google.com
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
2025-02-21 13:57:07 +00:00
|
|
|
if ((!strncmp(sec->name, ".rodata", 7) &&
|
|
|
|
!strstr(sec->name, ".str1.")) ||
|
|
|
|
!strncmp(sec->name, ".data.rel.ro", 12)) {
|
2018-09-07 13:12:01 +00:00
|
|
|
sec->rodata = true;
|
|
|
|
found = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
file->rodata = found;
|
|
|
|
}
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
static int decode_sections(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
2018-09-07 13:12:01 +00:00
|
|
|
mark_rodata(file);
|
|
|
|
|
2021-06-24 09:41:23 +00:00
|
|
|
ret = init_pv_ops(file);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2022-09-22 20:03:50 +00:00
|
|
|
/*
|
|
|
|
* Must be before add_{jump_call}_destination.
|
|
|
|
*/
|
|
|
|
ret = classify_symbols(file);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
ret = decode_instructions(file);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
ret = add_ignores(file);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2019-02-25 11:50:09 +00:00
|
|
|
add_uaccess_safe(file);
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2024-11-28 09:38:59 +00:00
|
|
|
ret = read_annotate(file, __annotate_early);
|
2022-03-08 15:30:52 +00:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2021-03-26 15:12:12 +00:00
|
|
|
/*
|
2022-04-11 23:10:31 +00:00
|
|
|
* Must be before add_jump_destinations(), which depends on 'func'
|
|
|
|
* being set for alternatives, to enable proper sibling call detection.
|
2021-03-26 15:12:12 +00:00
|
|
|
*/
|
2022-11-14 17:57:50 +00:00
|
|
|
if (opts.stackval || opts.orc || opts.uaccess || opts.noinstr) {
|
|
|
|
ret = add_special_section_alts(file);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2022-04-11 23:10:31 +00:00
|
|
|
ret = add_jump_destinations(file);
|
2017-06-28 15:11:05 +00:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2021-03-26 15:12:05 +00:00
|
|
|
/*
|
|
|
|
* Must be before add_call_destination(); it changes INSN_CALL to
|
|
|
|
* INSN_JUMP.
|
|
|
|
*/
|
2024-11-28 09:38:58 +00:00
|
|
|
ret = read_annotate(file, __annotate_ifc);
|
2020-04-14 10:36:12 +00:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2018-01-30 04:00:39 +00:00
|
|
|
ret = add_call_destinations(file);
|
2017-06-28 15:11:05 +00:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2019-07-18 01:36:53 +00:00
|
|
|
ret = add_jump_table_alts(file);
|
2017-06-28 15:11:05 +00:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2017-07-11 15:33:43 +00:00
|
|
|
ret = read_unwind_hints(file);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2024-11-28 09:39:05 +00:00
|
|
|
/*
|
|
|
|
* Must be after add_call_destinations() such that it can override
|
|
|
|
* dead_end_function() marks.
|
|
|
|
*/
|
2024-11-28 09:38:59 +00:00
|
|
|
ret = read_annotate(file, __annotate_late);
|
2023-03-01 15:13:11 +00:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-08-16 11:59:21 +00:00
|
|
|
static bool is_special_call(struct instruction *insn)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
2023-08-16 11:59:21 +00:00
|
|
|
if (insn->type == INSN_CALL) {
|
|
|
|
struct symbol *dest = insn_call_dest(insn);
|
|
|
|
|
|
|
|
if (!dest)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (dest->fentry || dest->embedded_insn)
|
|
|
|
return true;
|
|
|
|
}
|
2017-06-28 15:11:05 +00:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-04-01 14:38:19 +00:00
|
|
|
static bool has_modified_stack_frame(struct instruction *insn, struct insn_state *state)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
2020-03-25 13:04:45 +00:00
|
|
|
struct cfi_state *cfi = &state->cfi;
|
2017-06-28 15:11:07 +00:00
|
|
|
int i;
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
if (cfi->cfa.base != initial_func_cfi.cfa.base || cfi->drap)
|
2020-04-01 14:38:19 +00:00
|
|
|
return true;
|
|
|
|
|
2021-01-21 21:29:24 +00:00
|
|
|
if (cfi->cfa.offset != initial_func_cfi.cfa.offset)
|
2017-06-28 15:11:07 +00:00
|
|
|
return true;
|
|
|
|
|
2021-01-21 21:29:24 +00:00
|
|
|
if (cfi->stack_size != initial_func_cfi.cfa.offset)
|
2020-04-01 14:38:19 +00:00
|
|
|
return true;
|
|
|
|
|
|
|
|
for (i = 0; i < CFI_NUM_REGS; i++) {
|
2020-03-25 13:04:45 +00:00
|
|
|
if (cfi->regs[i].base != initial_func_cfi.regs[i].base ||
|
|
|
|
cfi->regs[i].offset != initial_func_cfi.regs[i].offset)
|
2017-06-28 15:11:07 +00:00
|
|
|
return true;
|
2020-04-01 14:38:19 +00:00
|
|
|
}
|
2017-06-28 15:11:07 +00:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-10-14 07:38:00 +00:00
|
|
|
static bool check_reg_frame_pos(const struct cfi_reg *reg,
|
|
|
|
int expected_offset)
|
|
|
|
{
|
|
|
|
return reg->base == CFI_CFA &&
|
|
|
|
reg->offset == expected_offset;
|
|
|
|
}
|
|
|
|
|
2017-06-28 15:11:07 +00:00
|
|
|
static bool has_valid_stack_frame(struct insn_state *state)
|
|
|
|
{
|
2020-03-25 13:04:45 +00:00
|
|
|
struct cfi_state *cfi = &state->cfi;
|
|
|
|
|
2020-10-14 07:38:00 +00:00
|
|
|
if (cfi->cfa.base == CFI_BP &&
|
|
|
|
check_reg_frame_pos(&cfi->regs[CFI_BP], -cfi->cfa.offset) &&
|
|
|
|
check_reg_frame_pos(&cfi->regs[CFI_RA], -cfi->cfa.offset + 8))
|
2017-06-28 15:11:07 +00:00
|
|
|
return true;
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
if (cfi->drap && cfi->regs[CFI_BP].base == CFI_BP)
|
2017-06-28 15:11:07 +00:00
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
static int update_cfi_state_regs(struct instruction *insn,
|
|
|
|
struct cfi_state *cfi,
|
2020-03-27 15:28:47 +00:00
|
|
|
struct stack_op *op)
|
2017-07-11 15:33:42 +00:00
|
|
|
{
|
2020-03-25 13:04:45 +00:00
|
|
|
struct cfi_reg *cfa = &cfi->cfa;
|
2017-07-11 15:33:42 +00:00
|
|
|
|
2020-04-25 10:03:00 +00:00
|
|
|
if (cfa->base != CFI_SP && cfa->base != CFI_SP_INDIRECT)
|
2017-07-11 15:33:42 +00:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* push */
|
2019-02-25 11:50:09 +00:00
|
|
|
if (op->dest.type == OP_DEST_PUSH || op->dest.type == OP_DEST_PUSHF)
|
2017-07-11 15:33:42 +00:00
|
|
|
cfa->offset += 8;
|
|
|
|
|
|
|
|
/* pop */
|
2019-02-25 11:50:09 +00:00
|
|
|
if (op->src.type == OP_SRC_POP || op->src.type == OP_SRC_POPF)
|
2017-07-11 15:33:42 +00:00
|
|
|
cfa->offset -= 8;
|
|
|
|
|
|
|
|
/* add immediate to sp */
|
|
|
|
if (op->dest.type == OP_DEST_REG && op->src.type == OP_SRC_ADD &&
|
|
|
|
op->dest.reg == CFI_SP && op->src.reg == CFI_SP)
|
|
|
|
cfa->offset -= op->src.offset;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
static void save_reg(struct cfi_state *cfi, unsigned char reg, int base, int offset)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
objtool: Track DRAP separately from callee-saved registers
When GCC realigns a function's stack, it sometimes uses %r13 as the DRAP
register, like:
push %r13
lea 0x10(%rsp), %r13
and $0xfffffffffffffff0, %rsp
pushq -0x8(%r13)
push %rbp
mov %rsp, %rbp
push %r13
...
mov -0x8(%rbp),%r13
leaveq
lea -0x10(%r13), %rsp
pop %r13
retq
Since %r13 was pushed onto the stack twice, its two stack locations need
to be stored separately. The first push of %r13 is its original value,
and the second push of %r13 is the caller's stack frame address.
Since %r13 is a callee-saved register, we need to track the stack
location of its original value separately from the DRAP register.
This fixes the following false positive warning:
lib/ubsan.o: warning: objtool: val_to_string.constprop.7()+0x97: leave instruction with modified stack frame
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: baa41469a7b9 ("objtool: Implement stack validation 2.0")
Link: http://lkml.kernel.org/r/3da23a6d4c5b3c1e21fc2ccc21a73941b97ff20a.1502401017.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-08-10 21:37:26 +00:00
|
|
|
if (arch_callee_saved_reg(reg) &&
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->regs[reg].base == CFI_UNDEFINED) {
|
|
|
|
cfi->regs[reg].base = base;
|
|
|
|
cfi->regs[reg].offset = offset;
|
2017-06-28 15:11:07 +00:00
|
|
|
}
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
static void restore_reg(struct cfi_state *cfi, unsigned char reg)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->regs[reg].base = initial_func_cfi.regs[reg].base;
|
|
|
|
cfi->regs[reg].offset = initial_func_cfi.regs[reg].offset;
|
2017-06-28 15:11:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A note about DRAP stack alignment:
|
|
|
|
*
|
|
|
|
* GCC has the concept of a DRAP register, which is used to help keep track of
|
|
|
|
* the stack pointer when aligning the stack. r10 or r13 is used as the DRAP
|
|
|
|
* register. The typical DRAP pattern is:
|
|
|
|
*
|
|
|
|
* 4c 8d 54 24 08 lea 0x8(%rsp),%r10
|
|
|
|
* 48 83 e4 c0 and $0xffffffffffffffc0,%rsp
|
|
|
|
* 41 ff 72 f8 pushq -0x8(%r10)
|
|
|
|
* 55 push %rbp
|
|
|
|
* 48 89 e5 mov %rsp,%rbp
|
|
|
|
* (more pushes)
|
|
|
|
* 41 52 push %r10
|
|
|
|
* ...
|
|
|
|
* 41 5a pop %r10
|
|
|
|
* (more pops)
|
|
|
|
* 5d pop %rbp
|
|
|
|
* 49 8d 62 f8 lea -0x8(%r10),%rsp
|
|
|
|
* c3 retq
|
|
|
|
*
|
|
|
|
* There are some variations in the epilogues, like:
|
|
|
|
*
|
|
|
|
* 5b pop %rbx
|
|
|
|
* 41 5a pop %r10
|
|
|
|
* 41 5c pop %r12
|
|
|
|
* 41 5d pop %r13
|
|
|
|
* 41 5e pop %r14
|
|
|
|
* c9 leaveq
|
|
|
|
* 49 8d 62 f8 lea -0x8(%r10),%rsp
|
|
|
|
* c3 retq
|
|
|
|
*
|
|
|
|
* and:
|
|
|
|
*
|
|
|
|
* 4c 8b 55 e8 mov -0x18(%rbp),%r10
|
|
|
|
* 48 8b 5d e0 mov -0x20(%rbp),%rbx
|
|
|
|
* 4c 8b 65 f0 mov -0x10(%rbp),%r12
|
|
|
|
* 4c 8b 6d f8 mov -0x8(%rbp),%r13
|
|
|
|
* c9 leaveq
|
|
|
|
* 49 8d 62 f8 lea -0x8(%r10),%rsp
|
|
|
|
* c3 retq
|
|
|
|
*
|
|
|
|
* Sometimes r13 is used as the DRAP register, in which case it's saved and
|
|
|
|
* restored beforehand:
|
|
|
|
*
|
|
|
|
* 41 55 push %r13
|
|
|
|
* 4c 8d 6c 24 10 lea 0x10(%rsp),%r13
|
|
|
|
* 48 83 e4 f0 and $0xfffffffffffffff0,%rsp
|
|
|
|
* ...
|
|
|
|
* 49 8d 65 f0 lea -0x10(%r13),%rsp
|
|
|
|
* 41 5d pop %r13
|
|
|
|
* c3 retq
|
|
|
|
*/
|
2021-02-11 12:03:28 +00:00
|
|
|
static int update_cfi_state(struct instruction *insn,
|
|
|
|
struct instruction *next_insn,
|
|
|
|
struct cfi_state *cfi, struct stack_op *op)
|
2017-06-28 15:11:07 +00:00
|
|
|
{
|
2020-03-25 13:04:45 +00:00
|
|
|
struct cfi_reg *cfa = &cfi->cfa;
|
|
|
|
struct cfi_reg *regs = cfi->regs;
|
2017-06-28 15:11:07 +00:00
|
|
|
|
2023-06-05 16:12:21 +00:00
|
|
|
/* ignore UNWIND_HINT_UNDEFINED regions */
|
|
|
|
if (cfi->force_undefined)
|
|
|
|
return 0;
|
|
|
|
|
2017-06-28 15:11:07 +00:00
|
|
|
/* stack operations don't make sense with an undefined CFA */
|
|
|
|
if (cfa->base == CFI_UNDEFINED) {
|
2022-09-22 20:03:50 +00:00
|
|
|
if (insn_func(insn)) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "undefined stack state");
|
2025-03-24 21:55:59 +00:00
|
|
|
return 1;
|
2017-06-28 15:11:07 +00:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-09-04 15:30:27 +00:00
|
|
|
if (cfi->type == UNWIND_HINT_TYPE_REGS ||
|
|
|
|
cfi->type == UNWIND_HINT_TYPE_REGS_PARTIAL)
|
2020-03-25 13:04:45 +00:00
|
|
|
return update_cfi_state_regs(insn, cfi, op);
|
2017-07-11 15:33:42 +00:00
|
|
|
|
2017-06-28 15:11:07 +00:00
|
|
|
switch (op->dest.type) {
|
|
|
|
|
|
|
|
case OP_DEST_REG:
|
|
|
|
switch (op->src.type) {
|
|
|
|
|
|
|
|
case OP_SRC_REG:
|
2017-09-20 21:24:32 +00:00
|
|
|
if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP &&
|
|
|
|
cfa->base == CFI_SP &&
|
2020-10-14 07:38:00 +00:00
|
|
|
check_reg_frame_pos(®s[CFI_BP], -cfa->offset)) {
|
2017-09-20 21:24:32 +00:00
|
|
|
|
|
|
|
/* mov %rsp, %rbp */
|
|
|
|
cfa->base = op->dest.reg;
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->bp_scratch = false;
|
2017-09-20 21:24:32 +00:00
|
|
|
}
|
2017-08-29 17:51:03 +00:00
|
|
|
|
2017-09-20 21:24:32 +00:00
|
|
|
else if (op->src.reg == CFI_SP &&
|
2020-03-25 13:04:45 +00:00
|
|
|
op->dest.reg == CFI_BP && cfi->drap) {
|
2017-08-29 17:51:03 +00:00
|
|
|
|
2017-09-20 21:24:32 +00:00
|
|
|
/* drap: mov %rsp, %rbp */
|
|
|
|
regs[CFI_BP].base = CFI_BP;
|
2020-03-25 13:04:45 +00:00
|
|
|
regs[CFI_BP].offset = -cfi->stack_size;
|
|
|
|
cfi->bp_scratch = false;
|
2017-09-20 21:24:32 +00:00
|
|
|
}
|
2017-08-29 17:51:03 +00:00
|
|
|
|
2017-09-20 21:24:32 +00:00
|
|
|
else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
|
|
|
|
|
|
|
|
/*
|
|
|
|
* mov %rsp, %reg
|
|
|
|
*
|
|
|
|
* This is needed for the rare case where GCC
|
|
|
|
* does:
|
|
|
|
*
|
|
|
|
* mov %rsp, %rax
|
|
|
|
* ...
|
|
|
|
* mov %rax, %rsp
|
|
|
|
*/
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->vals[op->dest.reg].base = CFI_CFA;
|
|
|
|
cfi->vals[op->dest.reg].offset = -cfi->stack_size;
|
2017-08-29 17:51:03 +00:00
|
|
|
}
|
|
|
|
|
2018-03-22 18:00:37 +00:00
|
|
|
else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP &&
|
2021-02-09 20:41:13 +00:00
|
|
|
(cfa->base == CFI_BP || cfa->base == cfi->drap_reg)) {
|
2018-03-22 18:00:37 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* mov %rbp, %rsp
|
|
|
|
*
|
|
|
|
* Restore the original stack pointer (Clang).
|
|
|
|
*/
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->stack_size = -cfi->regs[CFI_BP].offset;
|
2018-03-22 18:00:37 +00:00
|
|
|
}
|
|
|
|
|
2017-08-29 17:51:03 +00:00
|
|
|
else if (op->dest.reg == cfa->base) {
|
|
|
|
|
|
|
|
/* mov %reg, %rsp */
|
|
|
|
if (cfa->base == CFI_SP &&
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->vals[op->src.reg].base == CFI_CFA) {
|
2017-08-29 17:51:03 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This is needed for the rare case
|
|
|
|
* where GCC does something dumb like:
|
|
|
|
*
|
|
|
|
* lea 0x8(%rsp), %rcx
|
|
|
|
* ...
|
|
|
|
* mov %rcx, %rsp
|
|
|
|
*/
|
2020-03-25 13:04:45 +00:00
|
|
|
cfa->offset = -cfi->vals[op->src.reg].offset;
|
|
|
|
cfi->stack_size = cfa->offset;
|
2017-08-29 17:51:03 +00:00
|
|
|
|
2021-02-03 11:02:17 +00:00
|
|
|
} else if (cfa->base == CFI_SP &&
|
|
|
|
cfi->vals[op->src.reg].base == CFI_SP_INDIRECT &&
|
|
|
|
cfi->vals[op->src.reg].offset == cfa->offset) {
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Stack swizzle:
|
|
|
|
*
|
|
|
|
* 1: mov %rsp, (%[tos])
|
|
|
|
* 2: mov %[tos], %rsp
|
|
|
|
* ...
|
|
|
|
* 3: pop %rsp
|
|
|
|
*
|
|
|
|
* Where:
|
|
|
|
*
|
|
|
|
* 1 - places a pointer to the previous
|
|
|
|
* stack at the Top-of-Stack of the
|
|
|
|
* new stack.
|
|
|
|
*
|
|
|
|
* 2 - switches to the new stack.
|
|
|
|
*
|
|
|
|
* 3 - pops the Top-of-Stack to restore
|
|
|
|
* the original stack.
|
|
|
|
*
|
|
|
|
* Note: we set base to SP_INDIRECT
|
|
|
|
* here and preserve offset. Therefore
|
|
|
|
* when the unwinder reaches ToS it
|
|
|
|
* will dereference SP and then add the
|
|
|
|
* offset to find the next frame, IOW:
|
|
|
|
* (%rsp) + offset.
|
|
|
|
*/
|
|
|
|
cfa->base = CFI_SP_INDIRECT;
|
|
|
|
|
2017-08-29 17:51:03 +00:00
|
|
|
} else {
|
|
|
|
cfa->base = CFI_UNDEFINED;
|
|
|
|
cfa->offset = 0;
|
|
|
|
}
|
2017-06-28 15:11:07 +00:00
|
|
|
}
|
|
|
|
|
2021-02-18 16:14:10 +00:00
|
|
|
else if (op->dest.reg == CFI_SP &&
|
|
|
|
cfi->vals[op->src.reg].base == CFI_SP_INDIRECT &&
|
|
|
|
cfi->vals[op->src.reg].offset == cfa->offset) {
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The same stack swizzle case 2) as above. But
|
|
|
|
* because we can't change cfa->base, case 3)
|
|
|
|
* will become a regular POP. Pretend we're a
|
|
|
|
* PUSH so things don't go unbalanced.
|
|
|
|
*/
|
|
|
|
cfi->stack_size += 8;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-06-28 15:11:07 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case OP_SRC_ADD:
|
|
|
|
if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) {
|
|
|
|
|
|
|
|
/* add imm, %rsp */
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->stack_size -= op->src.offset;
|
2017-06-28 15:11:07 +00:00
|
|
|
if (cfa->base == CFI_SP)
|
|
|
|
cfa->offset -= op->src.offset;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
objtool: Handle frame pointer related instructions
After commit a0f7085f6a63 ("LoongArch: Add RANDOMIZE_KSTACK_OFFSET
support"), there are three new instructions "addi.d $fp, $sp, 32",
"sub.d $sp, $sp, $t0" and "addi.d $sp, $fp, -32" for the secondary
stack in do_syscall(), then there is a objtool warning "return with
modified stack frame" and no handle_syscall() which is the previous
frame of do_syscall() in the call trace when executing the command
"echo l > /proc/sysrq-trigger".
objdump shows something like this:
0000000000000000 <do_syscall>:
0: 02ff8063 addi.d $sp, $sp, -32
4: 29c04076 st.d $fp, $sp, 16
8: 29c02077 st.d $s0, $sp, 8
c: 29c06061 st.d $ra, $sp, 24
10: 02c08076 addi.d $fp, $sp, 32
...
74: 0011b063 sub.d $sp, $sp, $t0
...
a8: 4c000181 jirl $ra, $t0, 0
...
dc: 02ff82c3 addi.d $sp, $fp, -32
e0: 28c06061 ld.d $ra, $sp, 24
e4: 28c04076 ld.d $fp, $sp, 16
e8: 28c02077 ld.d $s0, $sp, 8
ec: 02c08063 addi.d $sp, $sp, 32
f0: 4c000020 jirl $zero, $ra, 0
The instruction "sub.d $sp, $sp, $t0" changes the stack bottom and the
new stack size is a random value, in order to find the return address of
do_syscall() which is stored in the original stack frame after executing
"jirl $ra, $t0, 0", it should use fp which points to the original stack
top.
At the beginning, the thought is tended to decode the secondary stack
instruction "sub.d $sp, $sp, $t0" and set it as a label, then check this
label for the two frame pointer instructions to change the cfa base and
cfa offset during the period of secondary stack in update_cfi_state().
This is valid for GCC but invalid for Clang due to there are different
secondary stack instructions for ClangBuiltLinux on LoongArch, something
like this:
0000000000000000 <do_syscall>:
...
88: 00119064 sub.d $a0, $sp, $a0
8c: 00150083 or $sp, $a0, $zero
...
Actually, it equals to a single instruction "sub.d $sp, $sp, $a0", but
there is no proper condition to check it as a label like GCC, and so the
beginning thought is not a good way.
Essentially, there are two special frame pointer instructions which are
"addi.d $fp, $sp, imm" and "addi.d $sp, $fp, imm", the first one points
fp to the original stack top and the second one restores the original
stack bottom from fp.
Based on the above analysis, in order to avoid adding an arch-specific
update_cfi_state(), we just add a member "frame_pointer" in the "struct
symbol" as a label to avoid affecting the current normal case, then set
it as true only if there is "addi.d $sp, $fp, imm". The last is to check
this label for the two frame pointer instructions to change the cfa base
and cfa offset in update_cfi_state().
Tested with the following two configs:
(1) CONFIG_RANDOMIZE_KSTACK_OFFSET=y &&
CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=n
(2) CONFIG_RANDOMIZE_KSTACK_OFFSET=y &&
CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
By the way, there is no effect for x86 with this patch, tested on the
x86 machine with Fedora 40 system.
Cc: stable@vger.kernel.org # 6.9+
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2024-09-17 14:23:09 +00:00
|
|
|
if (op->dest.reg == CFI_BP && op->src.reg == CFI_SP &&
|
|
|
|
insn->sym->frame_pointer) {
|
|
|
|
/* addi.d fp,sp,imm on LoongArch */
|
|
|
|
if (cfa->base == CFI_SP && cfa->offset == op->src.offset) {
|
|
|
|
cfa->base = CFI_BP;
|
|
|
|
cfa->offset = 0;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2017-06-28 15:11:07 +00:00
|
|
|
|
objtool: Handle frame pointer related instructions
After commit a0f7085f6a63 ("LoongArch: Add RANDOMIZE_KSTACK_OFFSET
support"), there are three new instructions "addi.d $fp, $sp, 32",
"sub.d $sp, $sp, $t0" and "addi.d $sp, $fp, -32" for the secondary
stack in do_syscall(), then there is a objtool warning "return with
modified stack frame" and no handle_syscall() which is the previous
frame of do_syscall() in the call trace when executing the command
"echo l > /proc/sysrq-trigger".
objdump shows something like this:
0000000000000000 <do_syscall>:
0: 02ff8063 addi.d $sp, $sp, -32
4: 29c04076 st.d $fp, $sp, 16
8: 29c02077 st.d $s0, $sp, 8
c: 29c06061 st.d $ra, $sp, 24
10: 02c08076 addi.d $fp, $sp, 32
...
74: 0011b063 sub.d $sp, $sp, $t0
...
a8: 4c000181 jirl $ra, $t0, 0
...
dc: 02ff82c3 addi.d $sp, $fp, -32
e0: 28c06061 ld.d $ra, $sp, 24
e4: 28c04076 ld.d $fp, $sp, 16
e8: 28c02077 ld.d $s0, $sp, 8
ec: 02c08063 addi.d $sp, $sp, 32
f0: 4c000020 jirl $zero, $ra, 0
The instruction "sub.d $sp, $sp, $t0" changes the stack bottom and the
new stack size is a random value, in order to find the return address of
do_syscall() which is stored in the original stack frame after executing
"jirl $ra, $t0, 0", it should use fp which points to the original stack
top.
At the beginning, the thought is tended to decode the secondary stack
instruction "sub.d $sp, $sp, $t0" and set it as a label, then check this
label for the two frame pointer instructions to change the cfa base and
cfa offset during the period of secondary stack in update_cfi_state().
This is valid for GCC but invalid for Clang due to there are different
secondary stack instructions for ClangBuiltLinux on LoongArch, something
like this:
0000000000000000 <do_syscall>:
...
88: 00119064 sub.d $a0, $sp, $a0
8c: 00150083 or $sp, $a0, $zero
...
Actually, it equals to a single instruction "sub.d $sp, $sp, $a0", but
there is no proper condition to check it as a label like GCC, and so the
beginning thought is not a good way.
Essentially, there are two special frame pointer instructions which are
"addi.d $fp, $sp, imm" and "addi.d $sp, $fp, imm", the first one points
fp to the original stack top and the second one restores the original
stack bottom from fp.
Based on the above analysis, in order to avoid adding an arch-specific
update_cfi_state(), we just add a member "frame_pointer" in the "struct
symbol" as a label to avoid affecting the current normal case, then set
it as true only if there is "addi.d $sp, $fp, imm". The last is to check
this label for the two frame pointer instructions to change the cfa base
and cfa offset in update_cfi_state().
Tested with the following two configs:
(1) CONFIG_RANDOMIZE_KSTACK_OFFSET=y &&
CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=n
(2) CONFIG_RANDOMIZE_KSTACK_OFFSET=y &&
CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
By the way, there is no effect for x86 with this patch, tested on the
x86 machine with Fedora 40 system.
Cc: stable@vger.kernel.org # 6.9+
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2024-09-17 14:23:09 +00:00
|
|
|
if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) {
|
|
|
|
/* addi.d sp,fp,imm on LoongArch */
|
|
|
|
if (cfa->base == CFI_BP && cfa->offset == 0) {
|
|
|
|
if (insn->sym->frame_pointer) {
|
|
|
|
cfa->base = CFI_SP;
|
|
|
|
cfa->offset = -op->src.offset;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* lea disp(%rbp), %rsp */
|
|
|
|
cfi->stack_size = -(op->src.offset + regs[CFI_BP].offset);
|
|
|
|
}
|
2017-06-28 15:11:07 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2017-08-29 17:51:03 +00:00
|
|
|
if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
|
2017-06-28 15:11:07 +00:00
|
|
|
|
|
|
|
/* drap: lea disp(%rsp), %drap */
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->drap_reg = op->dest.reg;
|
2017-08-29 17:51:03 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* lea disp(%rsp), %reg
|
|
|
|
*
|
|
|
|
* This is needed for the rare case where GCC
|
|
|
|
* does something dumb like:
|
|
|
|
*
|
|
|
|
* lea 0x8(%rsp), %rcx
|
|
|
|
* ...
|
|
|
|
* mov %rcx, %rsp
|
|
|
|
*/
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->vals[op->dest.reg].base = CFI_CFA;
|
|
|
|
cfi->vals[op->dest.reg].offset = \
|
|
|
|
-cfi->stack_size + op->src.offset;
|
2017-08-29 17:51:03 +00:00
|
|
|
|
2017-06-28 15:11:07 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
if (cfi->drap && op->dest.reg == CFI_SP &&
|
|
|
|
op->src.reg == cfi->drap_reg) {
|
2017-06-28 15:11:07 +00:00
|
|
|
|
|
|
|
/* drap: lea disp(%drap), %rsp */
|
|
|
|
cfa->base = CFI_SP;
|
2020-03-25 13:04:45 +00:00
|
|
|
cfa->offset = cfi->stack_size = -op->src.offset;
|
|
|
|
cfi->drap_reg = CFI_UNDEFINED;
|
|
|
|
cfi->drap = false;
|
2017-06-28 15:11:07 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2021-02-11 12:03:28 +00:00
|
|
|
if (op->dest.reg == cfi->cfa.base && !(next_insn && next_insn->hint)) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "unsupported stack register modification");
|
2017-06-28 15:11:07 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
case OP_SRC_AND:
|
|
|
|
if (op->dest.reg != CFI_SP ||
|
2020-03-25 13:04:45 +00:00
|
|
|
(cfi->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) ||
|
|
|
|
(cfi->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "unsupported stack pointer realignment");
|
2017-06-28 15:11:07 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
if (cfi->drap_reg != CFI_UNDEFINED) {
|
2017-06-28 15:11:07 +00:00
|
|
|
/* drap: and imm, %rsp */
|
2020-03-25 13:04:45 +00:00
|
|
|
cfa->base = cfi->drap_reg;
|
|
|
|
cfa->offset = cfi->stack_size = 0;
|
|
|
|
cfi->drap = true;
|
2017-06-28 15:11:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Older versions of GCC (4.8ish) realign the stack
|
|
|
|
* without DRAP, with a frame pointer.
|
|
|
|
*/
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
case OP_SRC_POP:
|
2019-02-25 11:50:09 +00:00
|
|
|
case OP_SRC_POPF:
|
2021-02-03 11:02:17 +00:00
|
|
|
if (op->dest.reg == CFI_SP && cfa->base == CFI_SP_INDIRECT) {
|
|
|
|
|
|
|
|
/* pop %rsp; # restore from a stack swizzle */
|
|
|
|
cfa->base = CFI_SP;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
if (!cfi->drap && op->dest.reg == cfa->base) {
|
2017-06-28 15:11:07 +00:00
|
|
|
|
|
|
|
/* pop %rbp */
|
|
|
|
cfa->base = CFI_SP;
|
|
|
|
}
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
if (cfi->drap && cfa->base == CFI_BP_INDIRECT &&
|
|
|
|
op->dest.reg == cfi->drap_reg &&
|
|
|
|
cfi->drap_offset == -cfi->stack_size) {
|
2017-06-28 15:11:07 +00:00
|
|
|
|
objtool: Track DRAP separately from callee-saved registers
When GCC realigns a function's stack, it sometimes uses %r13 as the DRAP
register, like:
push %r13
lea 0x10(%rsp), %r13
and $0xfffffffffffffff0, %rsp
pushq -0x8(%r13)
push %rbp
mov %rsp, %rbp
push %r13
...
mov -0x8(%rbp),%r13
leaveq
lea -0x10(%r13), %rsp
pop %r13
retq
Since %r13 was pushed onto the stack twice, its two stack locations need
to be stored separately. The first push of %r13 is its original value,
and the second push of %r13 is the caller's stack frame address.
Since %r13 is a callee-saved register, we need to track the stack
location of its original value separately from the DRAP register.
This fixes the following false positive warning:
lib/ubsan.o: warning: objtool: val_to_string.constprop.7()+0x97: leave instruction with modified stack frame
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: baa41469a7b9 ("objtool: Implement stack validation 2.0")
Link: http://lkml.kernel.org/r/3da23a6d4c5b3c1e21fc2ccc21a73941b97ff20a.1502401017.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-08-10 21:37:26 +00:00
|
|
|
/* drap: pop %drap */
|
2020-03-25 13:04:45 +00:00
|
|
|
cfa->base = cfi->drap_reg;
|
objtool: Track DRAP separately from callee-saved registers
When GCC realigns a function's stack, it sometimes uses %r13 as the DRAP
register, like:
push %r13
lea 0x10(%rsp), %r13
and $0xfffffffffffffff0, %rsp
pushq -0x8(%r13)
push %rbp
mov %rsp, %rbp
push %r13
...
mov -0x8(%rbp),%r13
leaveq
lea -0x10(%r13), %rsp
pop %r13
retq
Since %r13 was pushed onto the stack twice, its two stack locations need
to be stored separately. The first push of %r13 is its original value,
and the second push of %r13 is the caller's stack frame address.
Since %r13 is a callee-saved register, we need to track the stack
location of its original value separately from the DRAP register.
This fixes the following false positive warning:
lib/ubsan.o: warning: objtool: val_to_string.constprop.7()+0x97: leave instruction with modified stack frame
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: baa41469a7b9 ("objtool: Implement stack validation 2.0")
Link: http://lkml.kernel.org/r/3da23a6d4c5b3c1e21fc2ccc21a73941b97ff20a.1502401017.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-08-10 21:37:26 +00:00
|
|
|
cfa->offset = 0;
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->drap_offset = -1;
|
2017-06-28 15:11:07 +00:00
|
|
|
|
2021-02-09 20:41:13 +00:00
|
|
|
} else if (cfi->stack_size == -regs[op->dest.reg].offset) {
|
2017-06-28 15:11:07 +00:00
|
|
|
|
objtool: Track DRAP separately from callee-saved registers
When GCC realigns a function's stack, it sometimes uses %r13 as the DRAP
register, like:
push %r13
lea 0x10(%rsp), %r13
and $0xfffffffffffffff0, %rsp
pushq -0x8(%r13)
push %rbp
mov %rsp, %rbp
push %r13
...
mov -0x8(%rbp),%r13
leaveq
lea -0x10(%r13), %rsp
pop %r13
retq
Since %r13 was pushed onto the stack twice, its two stack locations need
to be stored separately. The first push of %r13 is its original value,
and the second push of %r13 is the caller's stack frame address.
Since %r13 is a callee-saved register, we need to track the stack
location of its original value separately from the DRAP register.
This fixes the following false positive warning:
lib/ubsan.o: warning: objtool: val_to_string.constprop.7()+0x97: leave instruction with modified stack frame
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: baa41469a7b9 ("objtool: Implement stack validation 2.0")
Link: http://lkml.kernel.org/r/3da23a6d4c5b3c1e21fc2ccc21a73941b97ff20a.1502401017.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-08-10 21:37:26 +00:00
|
|
|
/* pop %reg */
|
2020-03-25 13:04:45 +00:00
|
|
|
restore_reg(cfi, op->dest.reg);
|
2017-06-28 15:11:07 +00:00
|
|
|
}
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->stack_size -= 8;
|
2017-06-28 15:11:07 +00:00
|
|
|
if (cfa->base == CFI_SP)
|
|
|
|
cfa->offset -= 8;
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
case OP_SRC_REG_INDIRECT:
|
2020-10-14 07:38:02 +00:00
|
|
|
if (!cfi->drap && op->dest.reg == cfa->base &&
|
|
|
|
op->dest.reg == CFI_BP) {
|
|
|
|
|
|
|
|
/* mov disp(%rsp), %rbp */
|
|
|
|
cfa->base = CFI_SP;
|
|
|
|
cfa->offset = cfi->stack_size;
|
|
|
|
}
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
if (cfi->drap && op->src.reg == CFI_BP &&
|
|
|
|
op->src.offset == cfi->drap_offset) {
|
objtool: Track DRAP separately from callee-saved registers
When GCC realigns a function's stack, it sometimes uses %r13 as the DRAP
register, like:
push %r13
lea 0x10(%rsp), %r13
and $0xfffffffffffffff0, %rsp
pushq -0x8(%r13)
push %rbp
mov %rsp, %rbp
push %r13
...
mov -0x8(%rbp),%r13
leaveq
lea -0x10(%r13), %rsp
pop %r13
retq
Since %r13 was pushed onto the stack twice, its two stack locations need
to be stored separately. The first push of %r13 is its original value,
and the second push of %r13 is the caller's stack frame address.
Since %r13 is a callee-saved register, we need to track the stack
location of its original value separately from the DRAP register.
This fixes the following false positive warning:
lib/ubsan.o: warning: objtool: val_to_string.constprop.7()+0x97: leave instruction with modified stack frame
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: baa41469a7b9 ("objtool: Implement stack validation 2.0")
Link: http://lkml.kernel.org/r/3da23a6d4c5b3c1e21fc2ccc21a73941b97ff20a.1502401017.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-08-10 21:37:26 +00:00
|
|
|
|
|
|
|
/* drap: mov disp(%rbp), %drap */
|
2020-03-25 13:04:45 +00:00
|
|
|
cfa->base = cfi->drap_reg;
|
objtool: Track DRAP separately from callee-saved registers
When GCC realigns a function's stack, it sometimes uses %r13 as the DRAP
register, like:
push %r13
lea 0x10(%rsp), %r13
and $0xfffffffffffffff0, %rsp
pushq -0x8(%r13)
push %rbp
mov %rsp, %rbp
push %r13
...
mov -0x8(%rbp),%r13
leaveq
lea -0x10(%r13), %rsp
pop %r13
retq
Since %r13 was pushed onto the stack twice, its two stack locations need
to be stored separately. The first push of %r13 is its original value,
and the second push of %r13 is the caller's stack frame address.
Since %r13 is a callee-saved register, we need to track the stack
location of its original value separately from the DRAP register.
This fixes the following false positive warning:
lib/ubsan.o: warning: objtool: val_to_string.constprop.7()+0x97: leave instruction with modified stack frame
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: baa41469a7b9 ("objtool: Implement stack validation 2.0")
Link: http://lkml.kernel.org/r/3da23a6d4c5b3c1e21fc2ccc21a73941b97ff20a.1502401017.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-08-10 21:37:26 +00:00
|
|
|
cfa->offset = 0;
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->drap_offset = -1;
|
objtool: Track DRAP separately from callee-saved registers
When GCC realigns a function's stack, it sometimes uses %r13 as the DRAP
register, like:
push %r13
lea 0x10(%rsp), %r13
and $0xfffffffffffffff0, %rsp
pushq -0x8(%r13)
push %rbp
mov %rsp, %rbp
push %r13
...
mov -0x8(%rbp),%r13
leaveq
lea -0x10(%r13), %rsp
pop %r13
retq
Since %r13 was pushed onto the stack twice, its two stack locations need
to be stored separately. The first push of %r13 is its original value,
and the second push of %r13 is the caller's stack frame address.
Since %r13 is a callee-saved register, we need to track the stack
location of its original value separately from the DRAP register.
This fixes the following false positive warning:
lib/ubsan.o: warning: objtool: val_to_string.constprop.7()+0x97: leave instruction with modified stack frame
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: baa41469a7b9 ("objtool: Implement stack validation 2.0")
Link: http://lkml.kernel.org/r/3da23a6d4c5b3c1e21fc2ccc21a73941b97ff20a.1502401017.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-08-10 21:37:26 +00:00
|
|
|
}
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
if (cfi->drap && op->src.reg == CFI_BP &&
|
2017-06-28 15:11:07 +00:00
|
|
|
op->src.offset == regs[op->dest.reg].offset) {
|
|
|
|
|
|
|
|
/* drap: mov disp(%rbp), %reg */
|
2020-03-25 13:04:45 +00:00
|
|
|
restore_reg(cfi, op->dest.reg);
|
2017-06-28 15:11:07 +00:00
|
|
|
|
|
|
|
} else if (op->src.reg == cfa->base &&
|
|
|
|
op->src.offset == regs[op->dest.reg].offset + cfa->offset) {
|
|
|
|
|
|
|
|
/* mov disp(%rbp), %reg */
|
|
|
|
/* mov disp(%rsp), %reg */
|
2020-03-25 13:04:45 +00:00
|
|
|
restore_reg(cfi, op->dest.reg);
|
2020-10-14 07:38:02 +00:00
|
|
|
|
|
|
|
} else if (op->src.reg == CFI_SP &&
|
|
|
|
op->src.offset == regs[op->dest.reg].offset + cfi->stack_size) {
|
|
|
|
|
|
|
|
/* mov disp(%rsp), %reg */
|
|
|
|
restore_reg(cfi, op->dest.reg);
|
2017-06-28 15:11:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "unknown stack-related instruction");
|
2017-06-28 15:11:07 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
case OP_DEST_PUSH:
|
2019-02-25 11:50:09 +00:00
|
|
|
case OP_DEST_PUSHF:
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->stack_size += 8;
|
2017-06-28 15:11:07 +00:00
|
|
|
if (cfa->base == CFI_SP)
|
|
|
|
cfa->offset += 8;
|
|
|
|
|
|
|
|
if (op->src.type != OP_SRC_REG)
|
|
|
|
break;
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
if (cfi->drap) {
|
|
|
|
if (op->src.reg == cfa->base && op->src.reg == cfi->drap_reg) {
|
2017-06-28 15:11:07 +00:00
|
|
|
|
|
|
|
/* drap: push %drap */
|
|
|
|
cfa->base = CFI_BP_INDIRECT;
|
2020-03-25 13:04:45 +00:00
|
|
|
cfa->offset = -cfi->stack_size;
|
2017-06-28 15:11:07 +00:00
|
|
|
|
objtool: Track DRAP separately from callee-saved registers
When GCC realigns a function's stack, it sometimes uses %r13 as the DRAP
register, like:
push %r13
lea 0x10(%rsp), %r13
and $0xfffffffffffffff0, %rsp
pushq -0x8(%r13)
push %rbp
mov %rsp, %rbp
push %r13
...
mov -0x8(%rbp),%r13
leaveq
lea -0x10(%r13), %rsp
pop %r13
retq
Since %r13 was pushed onto the stack twice, its two stack locations need
to be stored separately. The first push of %r13 is its original value,
and the second push of %r13 is the caller's stack frame address.
Since %r13 is a callee-saved register, we need to track the stack
location of its original value separately from the DRAP register.
This fixes the following false positive warning:
lib/ubsan.o: warning: objtool: val_to_string.constprop.7()+0x97: leave instruction with modified stack frame
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: baa41469a7b9 ("objtool: Implement stack validation 2.0")
Link: http://lkml.kernel.org/r/3da23a6d4c5b3c1e21fc2ccc21a73941b97ff20a.1502401017.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-08-10 21:37:26 +00:00
|
|
|
/* save drap so we know when to restore it */
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->drap_offset = -cfi->stack_size;
|
2017-06-28 15:11:07 +00:00
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
} else if (op->src.reg == CFI_BP && cfa->base == cfi->drap_reg) {
|
2017-06-28 15:11:07 +00:00
|
|
|
|
|
|
|
/* drap: push %rbp */
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->stack_size = 0;
|
2017-06-28 15:11:07 +00:00
|
|
|
|
2020-09-15 07:53:16 +00:00
|
|
|
} else {
|
2017-06-28 15:11:07 +00:00
|
|
|
|
|
|
|
/* drap: push %reg */
|
2020-03-25 13:04:45 +00:00
|
|
|
save_reg(cfi, op->src.reg, CFI_BP, -cfi->stack_size);
|
2017-06-28 15:11:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
/* push %reg */
|
2020-03-25 13:04:45 +00:00
|
|
|
save_reg(cfi, op->src.reg, CFI_CFA, -cfi->stack_size);
|
2017-06-28 15:11:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* detect when asm code uses rbp as a scratch register */
|
2022-09-22 20:03:50 +00:00
|
|
|
if (opts.stackval && insn_func(insn) && op->src.reg == CFI_BP &&
|
2017-06-28 15:11:07 +00:00
|
|
|
cfa->base != CFI_BP)
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->bp_scratch = true;
|
2017-06-28 15:11:07 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case OP_DEST_REG_INDIRECT:
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
if (cfi->drap) {
|
|
|
|
if (op->src.reg == cfa->base && op->src.reg == cfi->drap_reg) {
|
2017-06-28 15:11:07 +00:00
|
|
|
|
|
|
|
/* drap: mov %drap, disp(%rbp) */
|
|
|
|
cfa->base = CFI_BP_INDIRECT;
|
|
|
|
cfa->offset = op->dest.offset;
|
|
|
|
|
objtool: Track DRAP separately from callee-saved registers
When GCC realigns a function's stack, it sometimes uses %r13 as the DRAP
register, like:
push %r13
lea 0x10(%rsp), %r13
and $0xfffffffffffffff0, %rsp
pushq -0x8(%r13)
push %rbp
mov %rsp, %rbp
push %r13
...
mov -0x8(%rbp),%r13
leaveq
lea -0x10(%r13), %rsp
pop %r13
retq
Since %r13 was pushed onto the stack twice, its two stack locations need
to be stored separately. The first push of %r13 is its original value,
and the second push of %r13 is the caller's stack frame address.
Since %r13 is a callee-saved register, we need to track the stack
location of its original value separately from the DRAP register.
This fixes the following false positive warning:
lib/ubsan.o: warning: objtool: val_to_string.constprop.7()+0x97: leave instruction with modified stack frame
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: baa41469a7b9 ("objtool: Implement stack validation 2.0")
Link: http://lkml.kernel.org/r/3da23a6d4c5b3c1e21fc2ccc21a73941b97ff20a.1502401017.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-08-10 21:37:26 +00:00
|
|
|
/* save drap offset so we know when to restore it */
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->drap_offset = op->dest.offset;
|
2020-09-15 07:53:16 +00:00
|
|
|
} else {
|
2017-06-28 15:11:07 +00:00
|
|
|
|
|
|
|
/* drap: mov reg, disp(%rbp) */
|
2020-03-25 13:04:45 +00:00
|
|
|
save_reg(cfi, op->src.reg, CFI_BP, op->dest.offset);
|
2017-06-28 15:11:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
} else if (op->dest.reg == cfa->base) {
|
|
|
|
|
|
|
|
/* mov reg, disp(%rbp) */
|
|
|
|
/* mov reg, disp(%rsp) */
|
2020-03-25 13:04:45 +00:00
|
|
|
save_reg(cfi, op->src.reg, CFI_CFA,
|
|
|
|
op->dest.offset - cfi->cfa.offset);
|
2020-10-14 07:38:02 +00:00
|
|
|
|
|
|
|
} else if (op->dest.reg == CFI_SP) {
|
|
|
|
|
|
|
|
/* mov reg, disp(%rsp) */
|
|
|
|
save_reg(cfi, op->src.reg, CFI_CFA,
|
|
|
|
op->dest.offset - cfi->stack_size);
|
2021-02-03 11:02:17 +00:00
|
|
|
|
|
|
|
} else if (op->src.reg == CFI_SP && op->dest.offset == 0) {
|
|
|
|
|
|
|
|
/* mov %rsp, (%reg); # setup a stack swizzle. */
|
|
|
|
cfi->vals[op->dest.reg].base = CFI_SP_INDIRECT;
|
|
|
|
cfi->vals[op->dest.reg].offset = cfa->offset;
|
2017-06-28 15:11:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
case OP_DEST_MEM:
|
2019-02-25 11:50:09 +00:00
|
|
|
if (op->src.type != OP_SRC_POP && op->src.type != OP_SRC_POPF) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "unknown stack-related memory operation");
|
2017-06-28 15:11:07 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* pop mem */
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi->stack_size -= 8;
|
2017-06-28 15:11:07 +00:00
|
|
|
if (cfa->base == CFI_SP)
|
|
|
|
cfa->offset -= 8;
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "unknown stack-related instruction");
|
2017-06-28 15:11:07 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
/*
|
|
|
|
* The stack layouts of alternatives instructions can sometimes diverge when
|
|
|
|
* they have stack modifications. That's fine as long as the potential stack
|
|
|
|
* layouts don't conflict at any given potential instruction boundary.
|
|
|
|
*
|
|
|
|
* Flatten the CFIs of the different alternative code streams (both original
|
|
|
|
* and replacement) into a single shared CFI array which can be used to detect
|
|
|
|
* conflicts and nicely feed a linear array of ORC entries to the unwinder.
|
|
|
|
*/
|
|
|
|
static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn)
|
2020-03-27 15:28:47 +00:00
|
|
|
{
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
struct cfi_state **alt_cfi;
|
|
|
|
int group_off;
|
2020-03-27 15:28:47 +00:00
|
|
|
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
if (!insn->alt_group)
|
|
|
|
return 0;
|
2020-03-27 15:28:47 +00:00
|
|
|
|
2021-06-24 09:41:01 +00:00
|
|
|
if (!insn->cfi) {
|
|
|
|
WARN("CFI missing");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
alt_cfi = insn->alt_group->cfi;
|
|
|
|
group_off = insn->offset - insn->alt_group->first_insn->offset;
|
2020-03-27 15:28:47 +00:00
|
|
|
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
if (!alt_cfi[group_off]) {
|
2021-06-24 09:41:01 +00:00
|
|
|
alt_cfi[group_off] = insn->cfi;
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
} else {
|
2021-06-24 09:41:01 +00:00
|
|
|
if (cficmp(alt_cfi[group_off], insn->cfi)) {
|
2023-02-08 17:18:03 +00:00
|
|
|
struct alt_group *orig_group = insn->alt_group->orig_group ?: insn->alt_group;
|
|
|
|
struct instruction *orig = orig_group->first_insn;
|
2025-03-24 21:55:59 +00:00
|
|
|
WARN_INSN(orig, "stack layout conflict in alternatives: %s",
|
|
|
|
offstr(insn->sec, insn->offset));
|
2020-05-08 10:34:33 +00:00
|
|
|
return -1;
|
|
|
|
}
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-02-11 12:03:28 +00:00
|
|
|
static int handle_insn_ops(struct instruction *insn,
|
|
|
|
struct instruction *next_insn,
|
|
|
|
struct insn_state *state)
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
{
|
|
|
|
struct stack_op *op;
|
2025-03-24 21:55:59 +00:00
|
|
|
int ret;
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
|
objtool: Make instruction::stack_ops a single-linked list
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
unsigned int len; /* 64 4 */
enum insn_type type; /* 68 4 */
long unsigned int immediate; /* 72 8 */
u16 dead_end:1; /* 80: 0 2 */
u16 ignore:1; /* 80: 1 2 */
u16 ignore_alts:1; /* 80: 2 2 */
u16 hint:1; /* 80: 3 2 */
u16 save:1; /* 80: 4 2 */
u16 restore:1; /* 80: 5 2 */
u16 retpoline_safe:1; /* 80: 6 2 */
u16 noendbr:1; /* 80: 7 2 */
u16 entry:1; /* 80: 8 2 */
/* XXX 7 bits hole, try to pack */
s8 instr; /* 82 1 */
u8 visited; /* 83 1 */
/* XXX 4 bytes hole, try to pack */
struct alt_group * alt_group; /* 88 8 */
struct symbol * call_dest; /* 96 8 */
struct instruction * jump_dest; /* 104 8 */
struct instruction * first_jump_src; /* 112 8 */
struct reloc * jump_table; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
struct reloc * reloc; /* 128 8 */
struct list_head alts; /* 136 16 */
struct symbol * sym; /* 152 8 */
- struct list_head stack_ops; /* 160 16 */
- struct cfi_state * cfi; /* 176 8 */
+ struct stack_op * stack_ops; /* 160 8 */
+ struct cfi_state * cfi; /* 168 8 */
- /* size: 184, cachelines: 3, members: 29 */
- /* sum members: 178, holes: 1, sum holes: 4 */
+ /* size: 176, cachelines: 3, members: 29 */
+ /* sum members: 170, holes: 1, sum holes: 4 */
/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
- /* last cacheline: 56 bytes */
+ /* last cacheline: 48 bytes */
};
pre: 5:58.22 real, 226.69 user, 131.22 sys, 26221520 mem
post: 5:58.50 real, 229.64 user, 128.65 sys, 26221520 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.362196959@infradead.org
2023-02-08 17:17:58 +00:00
|
|
|
for (op = insn->stack_ops; op; op = op->next) {
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
ret = update_cfi_state(insn, next_insn, &state->cfi, op);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2020-05-08 10:34:33 +00:00
|
|
|
|
2025-03-24 21:55:58 +00:00
|
|
|
if (!opts.uaccess || !insn->alt_group)
|
2021-03-08 14:46:04 +00:00
|
|
|
continue;
|
|
|
|
|
2020-03-27 15:28:47 +00:00
|
|
|
if (op->dest.type == OP_DEST_PUSHF) {
|
|
|
|
if (!state->uaccess_stack) {
|
|
|
|
state->uaccess_stack = 1;
|
|
|
|
} else if (state->uaccess_stack >> 31) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "PUSHF stack exhausted");
|
2020-03-27 15:28:47 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
state->uaccess_stack <<= 1;
|
|
|
|
state->uaccess_stack |= state->uaccess;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (op->src.type == OP_SRC_POPF) {
|
|
|
|
if (state->uaccess_stack) {
|
|
|
|
state->uaccess = state->uaccess_stack & 1;
|
|
|
|
state->uaccess_stack >>= 1;
|
|
|
|
if (state->uaccess_stack == 1)
|
|
|
|
state->uaccess_stack = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2)
|
2017-06-28 15:11:07 +00:00
|
|
|
{
|
2021-06-24 09:41:01 +00:00
|
|
|
struct cfi_state *cfi1 = insn->cfi;
|
2017-06-28 15:11:07 +00:00
|
|
|
int i;
|
|
|
|
|
2021-06-24 09:41:01 +00:00
|
|
|
if (!cfi1) {
|
|
|
|
WARN("CFI missing");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
if (memcmp(&cfi1->cfa, &cfi2->cfa, sizeof(cfi1->cfa))) {
|
|
|
|
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "stack state mismatch: cfa1=%d%+d cfa2=%d%+d",
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi1->cfa.base, cfi1->cfa.offset,
|
|
|
|
cfi2->cfa.base, cfi2->cfa.offset);
|
2025-03-24 21:55:59 +00:00
|
|
|
return false;
|
2017-06-28 15:11:07 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (memcmp(&cfi1->regs, &cfi2->regs, sizeof(cfi1->regs))) {
|
2017-06-28 15:11:07 +00:00
|
|
|
for (i = 0; i < CFI_NUM_REGS; i++) {
|
2025-03-24 21:55:59 +00:00
|
|
|
|
|
|
|
if (!memcmp(&cfi1->regs[i], &cfi2->regs[i], sizeof(struct cfi_reg)))
|
2017-06-28 15:11:07 +00:00
|
|
|
continue;
|
|
|
|
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "stack state mismatch: reg1[%d]=%d%+d reg2[%d]=%d%+d",
|
2020-03-25 13:04:45 +00:00
|
|
|
i, cfi1->regs[i].base, cfi1->regs[i].offset,
|
|
|
|
i, cfi2->regs[i].base, cfi2->regs[i].offset);
|
2017-06-28 15:11:07 +00:00
|
|
|
}
|
2025-03-24 21:55:59 +00:00
|
|
|
return false;
|
|
|
|
}
|
2017-06-28 15:11:07 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
if (cfi1->type != cfi2->type) {
|
2020-03-25 13:04:45 +00:00
|
|
|
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "stack state mismatch: type1=%d type2=%d",
|
|
|
|
cfi1->type, cfi2->type);
|
2025-03-24 21:55:59 +00:00
|
|
|
return false;
|
|
|
|
}
|
2020-03-25 13:04:45 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
if (cfi1->drap != cfi2->drap ||
|
2020-03-25 13:04:45 +00:00
|
|
|
(cfi1->drap && cfi1->drap_reg != cfi2->drap_reg) ||
|
|
|
|
(cfi1->drap && cfi1->drap_offset != cfi2->drap_offset)) {
|
2017-07-11 15:33:42 +00:00
|
|
|
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "stack state mismatch: drap1=%d(%d,%d) drap2=%d(%d,%d)",
|
2020-03-25 13:04:45 +00:00
|
|
|
cfi1->drap, cfi1->drap_reg, cfi1->drap_offset,
|
|
|
|
cfi2->drap, cfi2->drap_reg, cfi2->drap_offset);
|
2025-03-24 21:55:59 +00:00
|
|
|
return false;
|
|
|
|
}
|
2017-06-28 15:11:07 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
return true;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2019-02-25 11:50:09 +00:00
|
|
|
static inline bool func_uaccess_safe(struct symbol *func)
|
|
|
|
{
|
|
|
|
if (func)
|
2019-07-18 01:36:48 +00:00
|
|
|
return func->uaccess_safe;
|
2019-02-25 11:50:09 +00:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-07-18 01:36:52 +00:00
|
|
|
static inline const char *call_dest_name(struct instruction *insn)
|
2019-02-25 11:50:09 +00:00
|
|
|
{
|
objtool: Fix truncated string warning
On GCC 12, the build fails due to a possible truncated string:
check.c: In function 'validate_call':
check.c:2865:58: error: '%d' directive output may be truncated writing between 1 and 10 bytes into a region of size 9 [-Werror=format-truncation=]
2865 | snprintf(pvname, sizeof(pvname), "pv_ops[%d]", idx);
| ^~
In theory it's a valid bug:
static char pvname[16];
int idx;
...
idx = (rel->addend / sizeof(void *));
snprintf(pvname, sizeof(pvname), "pv_ops[%d]", idx);
There are only 7 chars for %d while it could take up to 9, so the
printed "pv_ops[%d]" string could get truncated.
In reality the bug should never happen, because pv_ops only has ~80
entries, so 7 chars for the integer is more than enough. Still, it's
worth fixing. Bump the buffer size by 2 bytes to silence the warning.
[ jpoimboe: changed size to 19; massaged changelog ]
Fixes: db2b0c5d7b6f ("objtool: Support pv_opsindirect calls for noinstr")
Reported-by: Adam Borowski <kilobyte@angband.pl>
Reported-by: Martin Liška <mliska@suse.cz>
Signed-off-by: Sergei Trofimovich <slyich@gmail.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lore.kernel.org/r/20220120233748.2062559-1-slyich@gmail.com
2022-01-20 23:37:48 +00:00
|
|
|
static char pvname[19];
|
2023-05-30 17:20:55 +00:00
|
|
|
struct reloc *reloc;
|
2021-06-24 09:41:23 +00:00
|
|
|
int idx;
|
|
|
|
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
if (insn_call_dest(insn))
|
|
|
|
return insn_call_dest(insn)->name;
|
2019-02-25 11:50:09 +00:00
|
|
|
|
2023-05-30 17:20:55 +00:00
|
|
|
reloc = insn_reloc(NULL, insn);
|
|
|
|
if (reloc && !strcmp(reloc->sym->name, "pv_ops")) {
|
2023-05-30 17:21:08 +00:00
|
|
|
idx = (reloc_addend(reloc) / sizeof(void *));
|
2021-06-24 09:41:23 +00:00
|
|
|
snprintf(pvname, sizeof(pvname), "pv_ops[%d]", idx);
|
|
|
|
return pvname;
|
|
|
|
}
|
|
|
|
|
2019-02-25 11:50:09 +00:00
|
|
|
return "{dynamic}";
|
|
|
|
}
|
|
|
|
|
2021-06-24 09:41:23 +00:00
|
|
|
static bool pv_call_dest(struct objtool_file *file, struct instruction *insn)
|
|
|
|
{
|
|
|
|
struct symbol *target;
|
2023-05-30 17:20:55 +00:00
|
|
|
struct reloc *reloc;
|
2021-06-24 09:41:23 +00:00
|
|
|
int idx;
|
|
|
|
|
2023-05-30 17:20:55 +00:00
|
|
|
reloc = insn_reloc(file, insn);
|
|
|
|
if (!reloc || strcmp(reloc->sym->name, "pv_ops"))
|
2021-06-24 09:41:23 +00:00
|
|
|
return false;
|
|
|
|
|
2023-05-30 17:21:08 +00:00
|
|
|
idx = (arch_dest_reloc_offset(reloc_addend(reloc)) / sizeof(void *));
|
2021-06-24 09:41:23 +00:00
|
|
|
|
|
|
|
if (file->pv_ops[idx].clean)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
file->pv_ops[idx].clean = true;
|
|
|
|
|
|
|
|
list_for_each_entry(target, &file->pv_ops[idx].targets, pv_target) {
|
|
|
|
if (!target->sec->noinstr) {
|
|
|
|
WARN("pv_ops[%d]: %s", idx, target->name);
|
|
|
|
file->pv_ops[idx].clean = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return file->pv_ops[idx].clean;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool noinstr_call_dest(struct objtool_file *file,
|
|
|
|
struct instruction *insn,
|
|
|
|
struct symbol *func)
|
2020-06-03 18:09:06 +00:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We can't deal with indirect function calls at present;
|
|
|
|
* assume they're instrumented.
|
|
|
|
*/
|
2021-06-24 09:41:23 +00:00
|
|
|
if (!func) {
|
|
|
|
if (file->pv_ops)
|
|
|
|
return pv_call_dest(file, insn);
|
|
|
|
|
2020-06-03 18:09:06 +00:00
|
|
|
return false;
|
2021-06-24 09:41:23 +00:00
|
|
|
}
|
2020-06-03 18:09:06 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If the symbol is from a noinstr section; we good.
|
|
|
|
*/
|
|
|
|
if (func->sec->noinstr)
|
|
|
|
return true;
|
|
|
|
|
2023-01-12 19:43:31 +00:00
|
|
|
/*
|
|
|
|
* If the symbol is a static_call trampoline, we can't tell.
|
|
|
|
*/
|
|
|
|
if (func->static_call_tramp)
|
|
|
|
return true;
|
|
|
|
|
2020-06-03 18:09:06 +00:00
|
|
|
/*
|
|
|
|
* The __ubsan_handle_*() calls are like WARN(), they only happen when
|
|
|
|
* something 'BAD' happened. At the risk of taking the machine down,
|
|
|
|
* let them proceed to get the message out.
|
|
|
|
*/
|
|
|
|
if (!strncmp(func->name, "__ubsan_handle_", 15))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-06-24 09:41:23 +00:00
|
|
|
static int validate_call(struct objtool_file *file,
|
|
|
|
struct instruction *insn,
|
|
|
|
struct insn_state *state)
|
2019-02-25 11:50:09 +00:00
|
|
|
{
|
2020-03-10 17:57:41 +00:00
|
|
|
if (state->noinstr && state->instr <= 0 &&
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
!noinstr_call_dest(file, insn, insn_call_dest(insn))) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "call to %s() leaves .noinstr.text section", call_dest_name(insn));
|
2020-03-10 17:57:41 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
if (state->uaccess && !func_uaccess_safe(insn_call_dest(insn))) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "call to %s() with UACCESS enabled", call_dest_name(insn));
|
2019-02-25 11:50:09 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2019-02-25 10:10:55 +00:00
|
|
|
if (state->df) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "call to %s() with DF set", call_dest_name(insn));
|
2019-02-25 10:10:55 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2019-02-25 11:50:09 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-06-24 09:41:23 +00:00
|
|
|
static int validate_sibling_call(struct objtool_file *file,
|
|
|
|
struct instruction *insn,
|
|
|
|
struct insn_state *state)
|
2019-03-06 11:58:15 +00:00
|
|
|
{
|
2022-07-11 09:49:50 +00:00
|
|
|
if (insn_func(insn) && has_modified_stack_frame(insn, state)) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "sibling call from callable instruction with modified stack frame");
|
2019-03-06 11:58:15 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-06-24 09:41:23 +00:00
|
|
|
return validate_call(file, insn, state);
|
2019-03-06 11:58:15 +00:00
|
|
|
}
|
|
|
|
|
2020-03-10 17:07:44 +00:00
|
|
|
static int validate_return(struct symbol *func, struct instruction *insn, struct insn_state *state)
|
|
|
|
{
|
2020-03-10 17:57:41 +00:00
|
|
|
if (state->noinstr && state->instr > 0) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "return with instrumentation enabled");
|
2020-03-10 17:57:41 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2020-03-10 17:07:44 +00:00
|
|
|
if (state->uaccess && !func_uaccess_safe(func)) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "return with UACCESS enabled");
|
2020-03-10 17:07:44 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!state->uaccess && func_uaccess_safe(func)) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "return with UACCESS disabled from a UACCESS-safe function");
|
2020-03-10 17:07:44 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (state->df) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "return with DF set");
|
2020-03-10 17:07:44 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2020-04-01 14:38:19 +00:00
|
|
|
if (func && has_modified_stack_frame(insn, state)) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "return with modified stack frame");
|
2020-03-10 17:07:44 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2020-03-25 13:04:45 +00:00
|
|
|
if (state->cfi.bp_scratch) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "BP used as a scratch register");
|
2020-03-10 17:07:44 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
static struct instruction *next_insn_to_validate(struct objtool_file *file,
|
|
|
|
struct instruction *insn)
|
2020-04-28 17:37:01 +00:00
|
|
|
{
|
2020-12-18 20:19:32 +00:00
|
|
|
struct alt_group *alt_group = insn->alt_group;
|
2020-04-28 17:37:01 +00:00
|
|
|
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
/*
|
|
|
|
* Simulate the fact that alternatives are patched in-place. When the
|
|
|
|
* end of a replacement alt_group is reached, redirect objtool flow to
|
|
|
|
* the end of the original alt_group.
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
*
|
|
|
|
* insn->alts->insn -> alt_group->first_insn
|
|
|
|
* ...
|
|
|
|
* alt_group->last_insn
|
|
|
|
* [alt_group->nop] -> next(orig_group->last_insn)
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
*/
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
if (alt_group) {
|
|
|
|
if (alt_group->nop) {
|
|
|
|
/* ->nop implies ->orig_group */
|
|
|
|
if (insn == alt_group->last_insn)
|
|
|
|
return alt_group->nop;
|
|
|
|
if (insn == alt_group->nop)
|
|
|
|
goto next_orig;
|
|
|
|
}
|
|
|
|
if (insn == alt_group->last_insn && alt_group->orig_group)
|
|
|
|
goto next_orig;
|
|
|
|
}
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
|
|
|
|
return next_insn_same_sec(file, insn);
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
|
|
|
|
next_orig:
|
|
|
|
return next_insn_same_sec(file, alt_group->orig_group->last_insn);
|
2020-04-28 17:37:01 +00:00
|
|
|
}
|
|
|
|
|
2025-04-08 08:21:14 +00:00
|
|
|
static bool skip_alt_group(struct instruction *insn)
|
|
|
|
{
|
|
|
|
struct instruction *alt_insn = insn->alts ? insn->alts->insn : NULL;
|
|
|
|
|
|
|
|
/* ANNOTATE_IGNORE_ALTERNATIVE */
|
|
|
|
if (insn->alt_group && insn->alt_group->ignore)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For NOP patched with CLAC/STAC, only follow the latter to avoid
|
|
|
|
* impossible code paths combining patched CLAC with unpatched STAC
|
|
|
|
* or vice versa.
|
|
|
|
*
|
|
|
|
* ANNOTATE_IGNORE_ALTERNATIVE could have been used here, but Linus
|
|
|
|
* requested not to do that to avoid hurting .s file readability
|
|
|
|
* around CLAC/STAC alternative sites.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (!alt_insn)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Don't override ASM_{CLAC,STAC}_UNSAFE */
|
|
|
|
if (alt_insn->alt_group && alt_insn->alt_group->ignore)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return alt_insn->type == INSN_CLAC || alt_insn->type == INSN_STAC;
|
|
|
|
}
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
/*
|
|
|
|
* Follow the branch starting at the given instruction, and recursively follow
|
|
|
|
* any other branches (jumps). Meanwhile, track the frame pointer state at
|
|
|
|
* each instruction and validate all the rules described in
|
2022-06-26 09:11:01 +00:00
|
|
|
* tools/objtool/Documentation/objtool.txt.
|
2017-06-28 15:11:05 +00:00
|
|
|
*/
|
2019-07-18 01:36:47 +00:00
|
|
|
static int validate_branch(struct objtool_file *file, struct symbol *func,
|
2020-04-02 08:15:51 +00:00
|
|
|
struct instruction *insn, struct insn_state state)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
|
|
|
struct alternative *alt;
|
2021-06-24 09:41:01 +00:00
|
|
|
struct instruction *next_insn, *prev_insn = NULL;
|
2017-06-28 15:11:05 +00:00
|
|
|
struct section *sec;
|
2019-07-24 22:47:26 +00:00
|
|
|
u8 visited;
|
2017-06-28 15:11:05 +00:00
|
|
|
int ret;
|
|
|
|
|
2025-03-24 21:55:53 +00:00
|
|
|
if (func && func->ignore)
|
|
|
|
return 0;
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
sec = insn->sec;
|
|
|
|
|
|
|
|
while (1) {
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
next_insn = next_insn_to_validate(file, insn);
|
2017-07-11 15:33:43 +00:00
|
|
|
|
2022-09-22 20:03:50 +00:00
|
|
|
if (func && insn_func(insn) && func != insn_func(insn)->pfunc) {
|
2022-09-08 21:54:59 +00:00
|
|
|
/* Ignore KCFI type preambles, which always fall through */
|
2022-10-28 13:50:42 +00:00
|
|
|
if (!strncmp(func->name, "__cfi_", 6) ||
|
|
|
|
!strncmp(func->name, "__pfx_", 6))
|
2022-09-08 21:54:59 +00:00
|
|
|
return 0;
|
|
|
|
|
2025-03-24 21:55:57 +00:00
|
|
|
if (file->ignore_unreachables)
|
|
|
|
return 0;
|
|
|
|
|
2017-08-11 17:24:15 +00:00
|
|
|
WARN("%s() falls through to next function %s()",
|
2022-09-22 20:03:50 +00:00
|
|
|
func->name, insn_func(insn)->name);
|
2025-04-01 04:26:39 +00:00
|
|
|
func->warned = 1;
|
2025-03-24 21:55:59 +00:00
|
|
|
|
2017-08-11 17:24:15 +00:00
|
|
|
return 1;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2022-06-14 21:16:03 +00:00
|
|
|
visited = VISITED_BRANCH << state.uaccess;
|
|
|
|
if (insn->visited & VISITED_BRANCH_MASK) {
|
2020-03-25 13:04:45 +00:00
|
|
|
if (!insn->hint && !insn_cfi_match(insn, &state.cfi))
|
2017-06-28 15:11:05 +00:00
|
|
|
return 1;
|
|
|
|
|
2019-07-24 22:47:26 +00:00
|
|
|
if (insn->visited & visited)
|
2019-02-25 11:50:09 +00:00
|
|
|
return 0;
|
2021-06-24 09:41:01 +00:00
|
|
|
} else {
|
|
|
|
nr_insns_visited++;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2020-03-10 17:57:41 +00:00
|
|
|
if (state.noinstr)
|
|
|
|
state.instr += insn->instr;
|
|
|
|
|
2021-06-24 09:41:01 +00:00
|
|
|
if (insn->hint) {
|
2022-06-24 10:52:40 +00:00
|
|
|
if (insn->restore) {
|
|
|
|
struct instruction *save_insn, *i;
|
|
|
|
|
|
|
|
i = insn;
|
|
|
|
save_insn = NULL;
|
|
|
|
|
|
|
|
sym_for_each_insn_continue_reverse(file, func, i) {
|
|
|
|
if (i->save) {
|
|
|
|
save_insn = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!save_insn) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "no corresponding CFI save for CFI restore");
|
2022-06-24 10:52:40 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!save_insn->visited) {
|
2024-02-27 07:35:27 +00:00
|
|
|
/*
|
|
|
|
* If the restore hint insn is at the
|
|
|
|
* beginning of a basic block and was
|
|
|
|
* branched to from elsewhere, and the
|
|
|
|
* save insn hasn't been visited yet,
|
|
|
|
* defer following this branch for now.
|
|
|
|
* It will be seen later via the
|
|
|
|
* straight-line path.
|
|
|
|
*/
|
|
|
|
if (!prev_insn)
|
|
|
|
return 0;
|
|
|
|
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "objtool isn't smart enough to handle this CFI save/restore combo");
|
2022-06-24 10:52:40 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
insn->cfi = save_insn->cfi;
|
|
|
|
nr_cfi_reused++;
|
|
|
|
}
|
|
|
|
|
2021-06-24 09:41:01 +00:00
|
|
|
state.cfi = *insn->cfi;
|
|
|
|
} else {
|
|
|
|
/* XXX track if we actually changed state.cfi */
|
|
|
|
|
|
|
|
if (prev_insn && !cficmp(prev_insn->cfi, &state.cfi)) {
|
|
|
|
insn->cfi = prev_insn->cfi;
|
|
|
|
nr_cfi_reused++;
|
|
|
|
} else {
|
|
|
|
insn->cfi = cfi_hash_find_or_add(&state.cfi);
|
|
|
|
}
|
|
|
|
}
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2019-07-24 22:47:26 +00:00
|
|
|
insn->visited |= visited;
|
2017-06-28 15:11:07 +00:00
|
|
|
|
objtool: Support stack layout changes in alternatives
The ORC unwinder showed a warning [1] which revealed the stack layout
didn't match what was expected. The problem was that paravirt patching
had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed
the stack layout between the PUSHF and the POP, so unwinding from an
interrupt which occurred between those two instructions would fail.
Part of the agreed upon solution was to rework the custom paravirt
patching code to use alternatives instead, since objtool already knows
how to read alternatives (and converging runtime patching infrastructure
is always a good thing anyway). But the main problem still remains,
which is that runtime patching can change the stack layout.
Making stack layout changes in alternatives was disallowed with commit
7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt
is going to be doing it, it needs to be supported.
One way to do so would be to modify the ORC table when the code gets
patched. But ORC is simple -- a good thing! -- and it's best to leave
it alone.
Instead, support stack layout changes by "flattening" all possible stack
states (CFI) from parallel alternative code streams into a single set of
linear states. The only necessary limitation is that CFI conflicts are
disallowed at all possible instruction boundaries.
For example, this scenario is allowed:
Alt1 Alt2 Alt3
0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF
0x01 POP %RAX
0x02 NOP
...
0x05 NOP
...
0x07 <insn>
The unwind information for offset-0x00 is identical for all 3
alternatives. Similarly offset-0x05 and higher also are identical (and
the same as 0x00). However offset-0x01 has deviating CFI, but that is
only relevant for Alt3, neither of the other alternative instruction
streams will ever hit that offset.
This scenario is NOT allowed:
Alt1 Alt2
0x00 CALL *pv_ops.save_fl PUSHF
0x01 NOP6
...
0x07 NOP POP %RAX
The problem here is that offset-0x7, which is an instruction boundary in
both possible instruction patch streams, has two conflicting stack
layouts.
[ The above examples were stolen from Peter Zijlstra. ]
The new flattened CFI array is used both for the detection of conflicts
(like the second example above) and the generation of linear ORC
entries.
BTW, another benefit of these changes is that, thanks to some related
cleanups (new fake nops and alt_group struct) objtool can finally be rid
of fake jumps, which were a constant source of headaches.
[1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
2020-12-18 20:26:21 +00:00
|
|
|
if (propagate_alt_cfi(file, insn))
|
|
|
|
return 1;
|
|
|
|
|
2025-03-24 21:55:54 +00:00
|
|
|
if (insn->alts) {
|
objtool: Make instruction::alts a single-linked list
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
unsigned int len; /* 64 4 */
enum insn_type type; /* 68 4 */
long unsigned int immediate; /* 72 8 */
u16 dead_end:1; /* 80: 0 2 */
u16 ignore:1; /* 80: 1 2 */
u16 ignore_alts:1; /* 80: 2 2 */
u16 hint:1; /* 80: 3 2 */
u16 save:1; /* 80: 4 2 */
u16 restore:1; /* 80: 5 2 */
u16 retpoline_safe:1; /* 80: 6 2 */
u16 noendbr:1; /* 80: 7 2 */
u16 entry:1; /* 80: 8 2 */
/* XXX 7 bits hole, try to pack */
s8 instr; /* 82 1 */
u8 visited; /* 83 1 */
/* XXX 4 bytes hole, try to pack */
struct alt_group * alt_group; /* 88 8 */
struct symbol * call_dest; /* 96 8 */
struct instruction * jump_dest; /* 104 8 */
struct instruction * first_jump_src; /* 112 8 */
struct reloc * jump_table; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
struct reloc * reloc; /* 128 8 */
- struct list_head alts; /* 136 16 */
- struct symbol * sym; /* 152 8 */
- struct stack_op * stack_ops; /* 160 8 */
- struct cfi_state * cfi; /* 168 8 */
+ struct alternative * alts; /* 136 8 */
+ struct symbol * sym; /* 144 8 */
+ struct stack_op * stack_ops; /* 152 8 */
+ struct cfi_state * cfi; /* 160 8 */
- /* size: 176, cachelines: 3, members: 29 */
- /* sum members: 170, holes: 1, sum holes: 4 */
+ /* size: 168, cachelines: 3, members: 29 */
+ /* sum members: 162, holes: 1, sum holes: 4 */
/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
- /* last cacheline: 48 bytes */
+ /* last cacheline: 40 bytes */
};
pre: 5:58.50 real, 229.64 user, 128.65 sys, 26221520 mem
post: 5:48.86 real, 220.30 user, 128.34 sys, 24834672 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.430556498@infradead.org
2023-02-08 17:17:59 +00:00
|
|
|
for (alt = insn->alts; alt; alt = alt->next) {
|
2019-07-18 01:36:47 +00:00
|
|
|
ret = validate_branch(file, func, alt->insn, state);
|
objtool: Add --backtrace support
For when you want to know the path that reached your fail state:
$ ./objtool check --no-fp --backtrace arch/x86/lib/usercopy_64.o
arch/x86/lib/usercopy_64.o: warning: objtool: .altinstr_replacement+0x3: UACCESS disable without MEMOPs: __clear_user()
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x3a: (alt)
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x2e: (branch)
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x18: (branch)
arch/x86/lib/usercopy_64.o: warning: objtool: .altinstr_replacement+0xffffffffffffffff: (branch)
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x5: (alt)
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x0: <=== (func)
0000000000000000 <__clear_user>:
0: e8 00 00 00 00 callq 5 <__clear_user+0x5>
1: R_X86_64_PLT32 __fentry__-0x4
5: 90 nop
6: 90 nop
7: 90 nop
8: 48 89 f0 mov %rsi,%rax
b: 48 c1 ee 03 shr $0x3,%rsi
f: 83 e0 07 and $0x7,%eax
12: 48 89 f1 mov %rsi,%rcx
15: 48 85 c9 test %rcx,%rcx
18: 74 0f je 29 <__clear_user+0x29>
1a: 48 c7 07 00 00 00 00 movq $0x0,(%rdi)
21: 48 83 c7 08 add $0x8,%rdi
25: ff c9 dec %ecx
27: 75 f1 jne 1a <__clear_user+0x1a>
29: 48 89 c1 mov %rax,%rcx
2c: 85 c9 test %ecx,%ecx
2e: 74 0a je 3a <__clear_user+0x3a>
30: c6 07 00 movb $0x0,(%rdi)
33: 48 ff c7 inc %rdi
36: ff c9 dec %ecx
38: 75 f6 jne 30 <__clear_user+0x30>
3a: 90 nop
3b: 90 nop
3c: 90 nop
3d: 48 89 c8 mov %rcx,%rax
40: c3 retq
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2019-03-01 10:15:49 +00:00
|
|
|
if (ret) {
|
2023-04-18 21:27:49 +00:00
|
|
|
BT_INSN(insn, "(alt)");
|
objtool: Add --backtrace support
For when you want to know the path that reached your fail state:
$ ./objtool check --no-fp --backtrace arch/x86/lib/usercopy_64.o
arch/x86/lib/usercopy_64.o: warning: objtool: .altinstr_replacement+0x3: UACCESS disable without MEMOPs: __clear_user()
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x3a: (alt)
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x2e: (branch)
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x18: (branch)
arch/x86/lib/usercopy_64.o: warning: objtool: .altinstr_replacement+0xffffffffffffffff: (branch)
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x5: (alt)
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x0: <=== (func)
0000000000000000 <__clear_user>:
0: e8 00 00 00 00 callq 5 <__clear_user+0x5>
1: R_X86_64_PLT32 __fentry__-0x4
5: 90 nop
6: 90 nop
7: 90 nop
8: 48 89 f0 mov %rsi,%rax
b: 48 c1 ee 03 shr $0x3,%rsi
f: 83 e0 07 and $0x7,%eax
12: 48 89 f1 mov %rsi,%rcx
15: 48 85 c9 test %rcx,%rcx
18: 74 0f je 29 <__clear_user+0x29>
1a: 48 c7 07 00 00 00 00 movq $0x0,(%rdi)
21: 48 83 c7 08 add $0x8,%rdi
25: ff c9 dec %ecx
27: 75 f1 jne 1a <__clear_user+0x1a>
29: 48 89 c1 mov %rax,%rcx
2c: 85 c9 test %ecx,%ecx
2e: 74 0a je 3a <__clear_user+0x3a>
30: c6 07 00 movb $0x0,(%rdi)
33: 48 ff c7 inc %rdi
36: ff c9 dec %ecx
38: 75 f6 jne 30 <__clear_user+0x30>
3a: 90 nop
3b: 90 nop
3c: 90 nop
3d: 48 89 c8 mov %rcx,%rax
40: c3 retq
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2019-03-01 10:15:49 +00:00
|
|
|
return ret;
|
|
|
|
}
|
2018-01-30 04:00:39 +00:00
|
|
|
}
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2025-04-08 08:21:14 +00:00
|
|
|
if (skip_alt_group(insn))
|
2025-03-24 21:55:54 +00:00
|
|
|
return 0;
|
|
|
|
|
2021-02-11 12:03:28 +00:00
|
|
|
if (handle_insn_ops(insn, next_insn, &state))
|
2020-04-24 14:16:41 +00:00
|
|
|
return 1;
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
switch (insn->type) {
|
|
|
|
|
|
|
|
case INSN_RETURN:
|
2020-03-10 17:07:44 +00:00
|
|
|
return validate_return(func, insn, &state);
|
2017-06-28 15:11:05 +00:00
|
|
|
|
|
|
|
case INSN_CALL:
|
2019-02-25 11:50:09 +00:00
|
|
|
case INSN_CALL_DYNAMIC:
|
2021-06-24 09:41:23 +00:00
|
|
|
ret = validate_call(file, insn, &state);
|
2019-02-25 11:50:09 +00:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2023-08-16 11:59:21 +00:00
|
|
|
if (opts.stackval && func && !is_special_call(insn) &&
|
2019-07-18 01:36:51 +00:00
|
|
|
!has_valid_stack_frame(&state)) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "call without frame pointer save/setup");
|
2017-06-28 15:11:05 +00:00
|
|
|
return 1;
|
|
|
|
}
|
2019-07-18 01:36:51 +00:00
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case INSN_JUMP_CONDITIONAL:
|
|
|
|
case INSN_JUMP_UNCONDITIONAL:
|
2021-01-21 21:29:22 +00:00
|
|
|
if (is_sibling_call(insn)) {
|
2021-06-24 09:41:23 +00:00
|
|
|
ret = validate_sibling_call(file, insn, &state);
|
2017-06-28 15:11:05 +00:00
|
|
|
if (ret)
|
2019-03-06 11:58:15 +00:00
|
|
|
return ret;
|
2017-07-07 14:19:42 +00:00
|
|
|
|
2019-07-18 01:36:52 +00:00
|
|
|
} else if (insn->jump_dest) {
|
2019-07-18 01:36:47 +00:00
|
|
|
ret = validate_branch(file, func,
|
|
|
|
insn->jump_dest, state);
|
objtool: Add --backtrace support
For when you want to know the path that reached your fail state:
$ ./objtool check --no-fp --backtrace arch/x86/lib/usercopy_64.o
arch/x86/lib/usercopy_64.o: warning: objtool: .altinstr_replacement+0x3: UACCESS disable without MEMOPs: __clear_user()
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x3a: (alt)
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x2e: (branch)
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x18: (branch)
arch/x86/lib/usercopy_64.o: warning: objtool: .altinstr_replacement+0xffffffffffffffff: (branch)
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x5: (alt)
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x0: <=== (func)
0000000000000000 <__clear_user>:
0: e8 00 00 00 00 callq 5 <__clear_user+0x5>
1: R_X86_64_PLT32 __fentry__-0x4
5: 90 nop
6: 90 nop
7: 90 nop
8: 48 89 f0 mov %rsi,%rax
b: 48 c1 ee 03 shr $0x3,%rsi
f: 83 e0 07 and $0x7,%eax
12: 48 89 f1 mov %rsi,%rcx
15: 48 85 c9 test %rcx,%rcx
18: 74 0f je 29 <__clear_user+0x29>
1a: 48 c7 07 00 00 00 00 movq $0x0,(%rdi)
21: 48 83 c7 08 add $0x8,%rdi
25: ff c9 dec %ecx
27: 75 f1 jne 1a <__clear_user+0x1a>
29: 48 89 c1 mov %rax,%rcx
2c: 85 c9 test %ecx,%ecx
2e: 74 0a je 3a <__clear_user+0x3a>
30: c6 07 00 movb $0x0,(%rdi)
33: 48 ff c7 inc %rdi
36: ff c9 dec %ecx
38: 75 f6 jne 30 <__clear_user+0x30>
3a: 90 nop
3b: 90 nop
3c: 90 nop
3d: 48 89 c8 mov %rcx,%rax
40: c3 retq
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2019-03-01 10:15:49 +00:00
|
|
|
if (ret) {
|
2023-04-18 21:27:49 +00:00
|
|
|
BT_INSN(insn, "(branch)");
|
objtool: Add --backtrace support
For when you want to know the path that reached your fail state:
$ ./objtool check --no-fp --backtrace arch/x86/lib/usercopy_64.o
arch/x86/lib/usercopy_64.o: warning: objtool: .altinstr_replacement+0x3: UACCESS disable without MEMOPs: __clear_user()
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x3a: (alt)
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x2e: (branch)
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x18: (branch)
arch/x86/lib/usercopy_64.o: warning: objtool: .altinstr_replacement+0xffffffffffffffff: (branch)
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x5: (alt)
arch/x86/lib/usercopy_64.o: warning: objtool: __clear_user()+0x0: <=== (func)
0000000000000000 <__clear_user>:
0: e8 00 00 00 00 callq 5 <__clear_user+0x5>
1: R_X86_64_PLT32 __fentry__-0x4
5: 90 nop
6: 90 nop
7: 90 nop
8: 48 89 f0 mov %rsi,%rax
b: 48 c1 ee 03 shr $0x3,%rsi
f: 83 e0 07 and $0x7,%eax
12: 48 89 f1 mov %rsi,%rcx
15: 48 85 c9 test %rcx,%rcx
18: 74 0f je 29 <__clear_user+0x29>
1a: 48 c7 07 00 00 00 00 movq $0x0,(%rdi)
21: 48 83 c7 08 add $0x8,%rdi
25: ff c9 dec %ecx
27: 75 f1 jne 1a <__clear_user+0x1a>
29: 48 89 c1 mov %rax,%rcx
2c: 85 c9 test %ecx,%ecx
2e: 74 0a je 3a <__clear_user+0x3a>
30: c6 07 00 movb $0x0,(%rdi)
33: 48 ff c7 inc %rdi
36: ff c9 dec %ecx
38: 75 f6 jne 30 <__clear_user+0x30>
3a: 90 nop
3b: 90 nop
3c: 90 nop
3d: 48 89 c8 mov %rcx,%rax
40: c3 retq
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2019-03-01 10:15:49 +00:00
|
|
|
return ret;
|
|
|
|
}
|
2017-07-07 14:19:42 +00:00
|
|
|
}
|
2017-06-28 15:11:05 +00:00
|
|
|
|
|
|
|
if (insn->type == INSN_JUMP_UNCONDITIONAL)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
case INSN_JUMP_DYNAMIC:
|
2019-07-18 01:36:57 +00:00
|
|
|
case INSN_JUMP_DYNAMIC_CONDITIONAL:
|
2021-01-21 21:29:22 +00:00
|
|
|
if (is_sibling_call(insn)) {
|
2021-06-24 09:41:23 +00:00
|
|
|
ret = validate_sibling_call(file, insn, &state);
|
2019-03-06 11:58:15 +00:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2019-07-18 01:36:57 +00:00
|
|
|
if (insn->type == INSN_JUMP_DYNAMIC)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
break;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2025-04-08 07:02:14 +00:00
|
|
|
case INSN_SYSCALL:
|
2025-04-08 07:02:16 +00:00
|
|
|
if (func && (!next_insn || !next_insn->hint)) {
|
|
|
|
WARN_INSN(insn, "unsupported instruction in callable function");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
2025-04-08 07:02:14 +00:00
|
|
|
case INSN_SYSRET:
|
2025-04-08 07:02:16 +00:00
|
|
|
if (func && (!next_insn || !next_insn->hint)) {
|
|
|
|
WARN_INSN(insn, "unsupported instruction in callable function");
|
|
|
|
return 1;
|
2017-07-11 15:33:43 +00:00
|
|
|
}
|
2025-04-08 07:02:16 +00:00
|
|
|
|
2017-07-11 15:33:43 +00:00
|
|
|
return 0;
|
|
|
|
|
2019-02-25 11:50:09 +00:00
|
|
|
case INSN_STAC:
|
2025-03-24 21:55:58 +00:00
|
|
|
if (!opts.uaccess)
|
|
|
|
break;
|
|
|
|
|
2019-02-25 11:50:09 +00:00
|
|
|
if (state.uaccess) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "recursive UACCESS enable");
|
2019-02-25 11:50:09 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
state.uaccess = true;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case INSN_CLAC:
|
2025-03-24 21:55:58 +00:00
|
|
|
if (!opts.uaccess)
|
|
|
|
break;
|
|
|
|
|
2019-07-18 01:36:47 +00:00
|
|
|
if (!state.uaccess && func) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "redundant UACCESS disable");
|
2019-02-25 11:50:09 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (func_uaccess_safe(func) && !state.uaccess_stack) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "UACCESS-safe disables UACCESS");
|
2019-02-25 11:50:09 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
state.uaccess = false;
|
2017-06-28 15:11:07 +00:00
|
|
|
break;
|
|
|
|
|
2019-02-25 10:10:55 +00:00
|
|
|
case INSN_STD:
|
2021-01-21 21:29:17 +00:00
|
|
|
if (state.df) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "recursive STD");
|
2021-01-21 21:29:17 +00:00
|
|
|
return 1;
|
|
|
|
}
|
2019-02-25 10:10:55 +00:00
|
|
|
|
|
|
|
state.df = true;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case INSN_CLD:
|
2021-01-21 21:29:17 +00:00
|
|
|
if (!state.df && func) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "redundant CLD");
|
2021-01-21 21:29:17 +00:00
|
|
|
return 1;
|
|
|
|
}
|
2019-02-25 10:10:55 +00:00
|
|
|
|
|
|
|
state.df = false;
|
2017-06-28 15:11:07 +00:00
|
|
|
break;
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (insn->dead_end)
|
|
|
|
return 0;
|
|
|
|
|
2017-09-19 02:43:30 +00:00
|
|
|
if (!next_insn) {
|
2020-03-25 13:04:45 +00:00
|
|
|
if (state.cfi.cfa.base == CFI_UNDEFINED)
|
2017-09-19 02:43:30 +00:00
|
|
|
return 0;
|
2025-03-24 21:55:57 +00:00
|
|
|
if (file->ignore_unreachables)
|
|
|
|
return 0;
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
WARN("%s%sunexpected end of section %s",
|
2025-04-01 04:26:38 +00:00
|
|
|
func ? func->name : "", func ? "(): " : "",
|
2025-03-24 21:55:59 +00:00
|
|
|
sec->name);
|
2017-06-28 15:11:05 +00:00
|
|
|
return 1;
|
|
|
|
}
|
2017-09-19 02:43:30 +00:00
|
|
|
|
2021-06-24 09:41:01 +00:00
|
|
|
prev_insn = insn;
|
2017-09-19 02:43:30 +00:00
|
|
|
insn = next_insn;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
static int validate_unwind_hint(struct objtool_file *file,
|
|
|
|
struct instruction *insn,
|
|
|
|
struct insn_state *state)
|
|
|
|
{
|
2025-03-24 21:55:53 +00:00
|
|
|
if (insn->hint && !insn->visited) {
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
int ret = validate_branch(file, insn_func(insn), insn, *state);
|
2023-04-18 21:27:49 +00:00
|
|
|
if (ret)
|
|
|
|
BT_INSN(insn, "<=== (hint)");
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-03-23 17:26:03 +00:00
|
|
|
static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
|
2017-07-11 15:33:43 +00:00
|
|
|
{
|
|
|
|
struct instruction *insn;
|
|
|
|
struct insn_state state;
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
int warnings = 0;
|
2017-07-11 15:33:43 +00:00
|
|
|
|
|
|
|
if (!file->hints)
|
|
|
|
return 0;
|
|
|
|
|
2022-04-18 16:50:43 +00:00
|
|
|
init_insn_state(file, &state, sec);
|
2017-07-11 15:33:43 +00:00
|
|
|
|
2020-03-23 17:26:03 +00:00
|
|
|
if (sec) {
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
sec_for_each_insn(file, sec, insn)
|
|
|
|
warnings += validate_unwind_hint(file, insn, &state);
|
2020-03-23 17:26:03 +00:00
|
|
|
} else {
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
for_each_insn(file, insn)
|
|
|
|
warnings += validate_unwind_hint(file, insn, &state);
|
2017-07-11 15:33:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return warnings;
|
|
|
|
}
|
|
|
|
|
2022-06-14 21:16:03 +00:00
|
|
|
/*
|
|
|
|
* Validate rethunk entry constraint: must untrain RET before the first RET.
|
|
|
|
*
|
2023-03-01 15:13:11 +00:00
|
|
|
* Follow every branch (intra-function) and ensure VALIDATE_UNRET_END comes
|
2022-06-14 21:16:03 +00:00
|
|
|
* before an actual RET instruction.
|
|
|
|
*/
|
2023-03-01 15:13:11 +00:00
|
|
|
static int validate_unret(struct objtool_file *file, struct instruction *insn)
|
2022-06-14 21:16:03 +00:00
|
|
|
{
|
|
|
|
struct instruction *next, *dest;
|
2023-05-30 07:56:49 +00:00
|
|
|
int ret;
|
2022-06-14 21:16:03 +00:00
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
next = next_insn_to_validate(file, insn);
|
|
|
|
|
2023-03-01 15:13:11 +00:00
|
|
|
if (insn->visited & VISITED_UNRET)
|
2022-06-14 21:16:03 +00:00
|
|
|
return 0;
|
|
|
|
|
2023-03-01 15:13:11 +00:00
|
|
|
insn->visited |= VISITED_UNRET;
|
2022-06-14 21:16:03 +00:00
|
|
|
|
2025-03-24 21:55:54 +00:00
|
|
|
if (insn->alts) {
|
2022-06-14 21:16:03 +00:00
|
|
|
struct alternative *alt;
|
objtool: Make instruction::alts a single-linked list
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
unsigned int len; /* 64 4 */
enum insn_type type; /* 68 4 */
long unsigned int immediate; /* 72 8 */
u16 dead_end:1; /* 80: 0 2 */
u16 ignore:1; /* 80: 1 2 */
u16 ignore_alts:1; /* 80: 2 2 */
u16 hint:1; /* 80: 3 2 */
u16 save:1; /* 80: 4 2 */
u16 restore:1; /* 80: 5 2 */
u16 retpoline_safe:1; /* 80: 6 2 */
u16 noendbr:1; /* 80: 7 2 */
u16 entry:1; /* 80: 8 2 */
/* XXX 7 bits hole, try to pack */
s8 instr; /* 82 1 */
u8 visited; /* 83 1 */
/* XXX 4 bytes hole, try to pack */
struct alt_group * alt_group; /* 88 8 */
struct symbol * call_dest; /* 96 8 */
struct instruction * jump_dest; /* 104 8 */
struct instruction * first_jump_src; /* 112 8 */
struct reloc * jump_table; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
struct reloc * reloc; /* 128 8 */
- struct list_head alts; /* 136 16 */
- struct symbol * sym; /* 152 8 */
- struct stack_op * stack_ops; /* 160 8 */
- struct cfi_state * cfi; /* 168 8 */
+ struct alternative * alts; /* 136 8 */
+ struct symbol * sym; /* 144 8 */
+ struct stack_op * stack_ops; /* 152 8 */
+ struct cfi_state * cfi; /* 160 8 */
- /* size: 176, cachelines: 3, members: 29 */
- /* sum members: 170, holes: 1, sum holes: 4 */
+ /* size: 168, cachelines: 3, members: 29 */
+ /* sum members: 162, holes: 1, sum holes: 4 */
/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
- /* last cacheline: 48 bytes */
+ /* last cacheline: 40 bytes */
};
pre: 5:58.50 real, 229.64 user, 128.65 sys, 26221520 mem
post: 5:48.86 real, 220.30 user, 128.34 sys, 24834672 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.430556498@infradead.org
2023-02-08 17:17:59 +00:00
|
|
|
for (alt = insn->alts; alt; alt = alt->next) {
|
2023-03-01 15:13:11 +00:00
|
|
|
ret = validate_unret(file, alt->insn);
|
2022-06-14 21:16:03 +00:00
|
|
|
if (ret) {
|
2023-04-18 21:27:49 +00:00
|
|
|
BT_INSN(insn, "(alt)");
|
2022-06-14 21:16:03 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (insn->type) {
|
|
|
|
|
|
|
|
case INSN_CALL_DYNAMIC:
|
|
|
|
case INSN_JUMP_DYNAMIC:
|
|
|
|
case INSN_JUMP_DYNAMIC_CONDITIONAL:
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "early indirect call");
|
2022-06-14 21:16:03 +00:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
case INSN_JUMP_UNCONDITIONAL:
|
|
|
|
case INSN_JUMP_CONDITIONAL:
|
|
|
|
if (!is_sibling_call(insn)) {
|
|
|
|
if (!insn->jump_dest) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "unresolved jump target after linking?!?");
|
2025-03-24 21:55:59 +00:00
|
|
|
return 1;
|
2022-06-14 21:16:03 +00:00
|
|
|
}
|
2023-03-01 15:13:11 +00:00
|
|
|
ret = validate_unret(file, insn->jump_dest);
|
2022-06-14 21:16:03 +00:00
|
|
|
if (ret) {
|
2023-04-18 21:27:49 +00:00
|
|
|
BT_INSN(insn, "(branch%s)",
|
|
|
|
insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : "");
|
2022-06-14 21:16:03 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (insn->type == INSN_JUMP_UNCONDITIONAL)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* fallthrough */
|
|
|
|
case INSN_CALL:
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
dest = find_insn(file, insn_call_dest(insn)->sec,
|
|
|
|
insn_call_dest(insn)->offset);
|
2022-06-14 21:16:03 +00:00
|
|
|
if (!dest) {
|
|
|
|
WARN("Unresolved function after linking!?: %s",
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
insn_call_dest(insn)->name);
|
2025-03-24 21:55:59 +00:00
|
|
|
return 1;
|
2022-06-14 21:16:03 +00:00
|
|
|
}
|
|
|
|
|
2023-03-01 15:13:11 +00:00
|
|
|
ret = validate_unret(file, dest);
|
2022-06-14 21:16:03 +00:00
|
|
|
if (ret) {
|
2023-04-18 21:27:49 +00:00
|
|
|
BT_INSN(insn, "(call)");
|
2022-06-14 21:16:03 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If a call returns without error, it must have seen UNTRAIN_RET.
|
|
|
|
* Therefore any non-error return is a success.
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
case INSN_RETURN:
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "RET before UNTRAIN");
|
2022-06-14 21:16:03 +00:00
|
|
|
return 1;
|
|
|
|
|
2025-04-08 07:02:14 +00:00
|
|
|
case INSN_SYSCALL:
|
2025-04-08 07:02:16 +00:00
|
|
|
break;
|
|
|
|
|
2025-04-08 07:02:14 +00:00
|
|
|
case INSN_SYSRET:
|
2025-04-08 07:02:13 +00:00
|
|
|
return 0;
|
|
|
|
|
2022-06-14 21:16:03 +00:00
|
|
|
case INSN_NOP:
|
|
|
|
if (insn->retpoline_safe)
|
|
|
|
return 0;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2025-04-08 07:02:15 +00:00
|
|
|
if (insn->dead_end)
|
|
|
|
return 0;
|
|
|
|
|
2022-06-14 21:16:03 +00:00
|
|
|
if (!next) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "teh end!");
|
2025-03-24 21:55:59 +00:00
|
|
|
return 1;
|
2022-06-14 21:16:03 +00:00
|
|
|
}
|
|
|
|
insn = next;
|
|
|
|
}
|
|
|
|
|
2023-05-30 07:56:49 +00:00
|
|
|
return 0;
|
2022-06-14 21:16:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2023-03-01 15:13:11 +00:00
|
|
|
* Validate that all branches starting at VALIDATE_UNRET_BEGIN encounter
|
|
|
|
* VALIDATE_UNRET_END before RET.
|
2022-06-14 21:16:03 +00:00
|
|
|
*/
|
2023-03-01 15:13:11 +00:00
|
|
|
static int validate_unrets(struct objtool_file *file)
|
2022-06-14 21:16:03 +00:00
|
|
|
{
|
|
|
|
struct instruction *insn;
|
2025-03-24 21:55:59 +00:00
|
|
|
int warnings = 0;
|
2022-06-14 21:16:03 +00:00
|
|
|
|
|
|
|
for_each_insn(file, insn) {
|
2023-03-01 15:13:11 +00:00
|
|
|
if (!insn->unret)
|
2022-06-14 21:16:03 +00:00
|
|
|
continue;
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
warnings += validate_unret(file, insn);
|
2022-06-14 21:16:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return warnings;
|
|
|
|
}
|
|
|
|
|
2018-01-16 09:24:06 +00:00
|
|
|
static int validate_retpoline(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
struct instruction *insn;
|
|
|
|
int warnings = 0;
|
|
|
|
|
|
|
|
for_each_insn(file, insn) {
|
|
|
|
if (insn->type != INSN_JUMP_DYNAMIC &&
|
2022-06-14 21:15:59 +00:00
|
|
|
insn->type != INSN_CALL_DYNAMIC &&
|
|
|
|
insn->type != INSN_RETURN)
|
2018-01-16 09:24:06 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
if (insn->retpoline_safe)
|
|
|
|
continue;
|
|
|
|
|
2022-09-15 11:11:08 +00:00
|
|
|
if (insn->sec->init)
|
2018-01-31 09:18:28 +00:00
|
|
|
continue;
|
|
|
|
|
2022-06-14 21:15:59 +00:00
|
|
|
if (insn->type == INSN_RETURN) {
|
2022-06-27 22:21:17 +00:00
|
|
|
if (opts.rethunk) {
|
2023-11-21 16:07:37 +00:00
|
|
|
WARN_INSN(insn, "'naked' return found in MITIGATION_RETHUNK build");
|
2025-03-24 21:55:59 +00:00
|
|
|
warnings++;
|
|
|
|
}
|
|
|
|
continue;
|
2022-06-14 21:15:59 +00:00
|
|
|
}
|
2018-01-16 09:24:06 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
WARN_INSN(insn, "indirect %s found in MITIGATION_RETPOLINE build",
|
|
|
|
insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
|
2018-01-16 09:24:06 +00:00
|
|
|
warnings++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return warnings;
|
|
|
|
}
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
static bool is_kasan_insn(struct instruction *insn)
|
|
|
|
{
|
|
|
|
return (insn->type == INSN_CALL &&
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
!strcmp(insn_call_dest(insn)->name, "__asan_handle_no_return"));
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool is_ubsan_insn(struct instruction *insn)
|
|
|
|
{
|
|
|
|
return (insn->type == INSN_CALL &&
|
objtool: Union instruction::{call_dest,jump_table}
The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.
struct instruction {
struct list_head list; /* 0 16 */
struct hlist_node hash; /* 16 16 */
struct list_head call_node; /* 32 16 */
struct section * sec; /* 48 8 */
long unsigned int offset; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int immediate; /* 64 8 */
unsigned int len; /* 72 4 */
u8 type; /* 76 1 */
/* Bitfield combined with previous fields */
u16 dead_end:1; /* 76: 8 2 */
u16 ignore:1; /* 76: 9 2 */
u16 ignore_alts:1; /* 76:10 2 */
u16 hint:1; /* 76:11 2 */
u16 save:1; /* 76:12 2 */
u16 restore:1; /* 76:13 2 */
u16 retpoline_safe:1; /* 76:14 2 */
u16 noendbr:1; /* 76:15 2 */
u16 entry:1; /* 78: 0 2 */
u16 visited:4; /* 78: 1 2 */
u16 no_reloc:1; /* 78: 5 2 */
/* XXX 2 bits hole, try to pack */
/* Bitfield combined with next fields */
s8 instr; /* 79 1 */
struct alt_group * alt_group; /* 80 8 */
- struct symbol * call_dest; /* 88 8 */
- struct instruction * jump_dest; /* 96 8 */
- struct instruction * first_jump_src; /* 104 8 */
- struct reloc * jump_table; /* 112 8 */
- struct alternative * alts; /* 120 8 */
+ struct instruction * jump_dest; /* 88 8 */
+ struct instruction * first_jump_src; /* 96 8 */
+ union {
+ struct symbol * _call_dest; /* 104 8 */
+ struct reloc * _jump_table; /* 104 8 */
+ }; /* 104 8 */
+ struct alternative * alts; /* 112 8 */
+ struct symbol * sym; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
- struct symbol * sym; /* 128 8 */
- struct stack_op * stack_ops; /* 136 8 */
- struct cfi_state * cfi; /* 144 8 */
+ struct stack_op * stack_ops; /* 128 8 */
+ struct cfi_state * cfi; /* 136 8 */
- /* size: 152, cachelines: 3, members: 29 */
- /* sum members: 150 */
+ /* size: 144, cachelines: 3, members: 28 */
+ /* sum members: 142 */
/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 24 bytes */
+ /* last cacheline: 16 bytes */
};
pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem
post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
2023-02-08 17:18:02 +00:00
|
|
|
!strcmp(insn_call_dest(insn)->name,
|
2017-06-28 15:11:05 +00:00
|
|
|
"__ubsan_handle_builtin_unreachable"));
|
|
|
|
}
|
|
|
|
|
2020-09-19 06:41:18 +00:00
|
|
|
static bool ignore_unreachable_insn(struct objtool_file *file, struct instruction *insn)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
2025-03-24 21:55:53 +00:00
|
|
|
struct symbol *func = insn_func(insn);
|
2020-09-19 06:41:18 +00:00
|
|
|
struct instruction *prev_insn;
|
2025-03-24 21:55:53 +00:00
|
|
|
int i;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2025-03-24 21:55:54 +00:00
|
|
|
if (insn->type == INSN_NOP || insn->type == INSN_TRAP || (func && func->ignore))
|
2017-06-28 15:11:07 +00:00
|
|
|
return true;
|
|
|
|
|
|
|
|
/*
|
2021-11-10 10:01:25 +00:00
|
|
|
* Ignore alternative replacement instructions. This can happen
|
2017-07-27 20:56:54 +00:00
|
|
|
* when a whitelisted function uses one of the ALTERNATIVE macros.
|
2017-06-28 15:11:07 +00:00
|
|
|
*/
|
2021-11-10 10:01:25 +00:00
|
|
|
if (!strcmp(insn->sec->name, ".altinstr_replacement") ||
|
2017-07-27 20:56:54 +00:00
|
|
|
!strcmp(insn->sec->name, ".altinstr_aux"))
|
2017-06-28 15:11:05 +00:00
|
|
|
return true;
|
|
|
|
|
2022-03-08 15:30:46 +00:00
|
|
|
/*
|
2022-04-18 16:50:43 +00:00
|
|
|
* Whole archive runs might encounter dead code from weak symbols.
|
2022-03-08 15:30:46 +00:00
|
|
|
* This is where the linker will have dropped the weak symbol in
|
|
|
|
* favour of a regular symbol, but leaves the code in place.
|
|
|
|
*
|
|
|
|
* In this case we'll find a piece of code (whole function) that is not
|
|
|
|
* covered by a !section symbol. Ignore them.
|
|
|
|
*/
|
2025-03-24 21:55:53 +00:00
|
|
|
if (opts.link && !func) {
|
2022-03-08 15:30:46 +00:00
|
|
|
int size = find_symbol_hole_containing(insn->sec, insn->offset);
|
|
|
|
unsigned long end = insn->offset + size;
|
|
|
|
|
|
|
|
if (!size) /* not a hole */
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (size < 0) /* hole until the end */
|
|
|
|
return true;
|
|
|
|
|
|
|
|
sec_for_each_insn_continue(file, insn) {
|
|
|
|
/*
|
|
|
|
* If we reach a visited instruction at or before the
|
|
|
|
* end of the hole, ignore the unreachable.
|
|
|
|
*/
|
|
|
|
if (insn->visited)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (insn->offset >= end)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If this hole jumps to a .cold function, mark it ignore too.
|
|
|
|
*/
|
2022-09-22 20:03:50 +00:00
|
|
|
if (insn->jump_dest && insn_func(insn->jump_dest) &&
|
|
|
|
strstr(insn_func(insn->jump_dest)->name, ".cold")) {
|
2025-03-24 21:55:53 +00:00
|
|
|
insn_func(insn->jump_dest)->ignore = true;
|
2022-03-08 15:30:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2025-03-24 21:55:53 +00:00
|
|
|
if (!func)
|
2020-04-01 18:23:25 +00:00
|
|
|
return false;
|
|
|
|
|
2025-03-24 21:55:53 +00:00
|
|
|
if (func->static_call_tramp)
|
2021-10-30 07:47:58 +00:00
|
|
|
return true;
|
|
|
|
|
2020-04-01 18:23:25 +00:00
|
|
|
/*
|
|
|
|
* CONFIG_UBSAN_TRAP inserts a UD2 when it sees
|
|
|
|
* __builtin_unreachable(). The BUG() macro has an unreachable() after
|
|
|
|
* the UD2, which causes GCC's undefined trap logic to emit another UD2
|
|
|
|
* (or occasionally a JMP to UD2).
|
2020-09-19 06:41:18 +00:00
|
|
|
*
|
|
|
|
* It may also insert a UD2 after calling a __noreturn function.
|
2020-04-01 18:23:25 +00:00
|
|
|
*/
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
prev_insn = prev_insn_same_sec(file, insn);
|
2025-03-28 05:04:21 +00:00
|
|
|
if (prev_insn && prev_insn->dead_end &&
|
2020-04-01 18:23:25 +00:00
|
|
|
(insn->type == INSN_BUG ||
|
|
|
|
(insn->type == INSN_JUMP_UNCONDITIONAL &&
|
|
|
|
insn->jump_dest && insn->jump_dest->type == INSN_BUG)))
|
|
|
|
return true;
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
/*
|
|
|
|
* Check if this (or a subsequent) instruction is related to
|
|
|
|
* CONFIG_UBSAN or CONFIG_KASAN.
|
|
|
|
*
|
|
|
|
* End the search at 5 instructions to avoid going into the weeds.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < 5; i++) {
|
|
|
|
|
|
|
|
if (is_kasan_insn(insn) || is_ubsan_insn(insn))
|
|
|
|
return true;
|
|
|
|
|
2018-02-08 23:09:25 +00:00
|
|
|
if (insn->type == INSN_JUMP_UNCONDITIONAL) {
|
|
|
|
if (insn->jump_dest &&
|
2025-03-24 21:55:53 +00:00
|
|
|
insn_func(insn->jump_dest) == func) {
|
2018-02-08 23:09:25 +00:00
|
|
|
insn = insn->jump_dest;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2025-03-24 21:55:53 +00:00
|
|
|
if (insn->offset + insn->len >= func->offset + func->len)
|
2017-06-28 15:11:05 +00:00
|
|
|
break;
|
2018-02-08 23:09:25 +00:00
|
|
|
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
insn = next_insn_same_sec(file, insn);
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2023-04-12 20:26:13 +00:00
|
|
|
static int add_prefix_symbol(struct objtool_file *file, struct symbol *func)
|
2022-10-28 13:50:42 +00:00
|
|
|
{
|
2023-04-12 20:26:13 +00:00
|
|
|
struct instruction *insn, *prev;
|
2023-04-12 20:26:15 +00:00
|
|
|
struct cfi_state *cfi;
|
2022-10-28 13:50:42 +00:00
|
|
|
|
2023-04-12 20:26:13 +00:00
|
|
|
insn = find_insn(file, func->sec, func->offset);
|
|
|
|
if (!insn)
|
|
|
|
return -1;
|
2022-10-28 13:50:42 +00:00
|
|
|
|
2023-04-12 20:26:13 +00:00
|
|
|
for (prev = prev_insn_same_sec(file, insn);
|
|
|
|
prev;
|
|
|
|
prev = prev_insn_same_sec(file, prev)) {
|
|
|
|
u64 offset;
|
2022-10-28 13:50:42 +00:00
|
|
|
|
|
|
|
if (prev->type != INSN_NOP)
|
2023-04-12 20:26:13 +00:00
|
|
|
return -1;
|
2022-10-28 13:50:42 +00:00
|
|
|
|
|
|
|
offset = func->offset - prev->offset;
|
2023-04-12 20:26:13 +00:00
|
|
|
|
|
|
|
if (offset > opts.prefix)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (offset < opts.prefix)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
elf_create_prefix_symbol(file->elf, func, opts.prefix);
|
|
|
|
break;
|
2022-10-28 13:50:42 +00:00
|
|
|
}
|
|
|
|
|
2023-04-12 20:26:13 +00:00
|
|
|
if (!prev)
|
|
|
|
return -1;
|
|
|
|
|
2023-04-12 20:26:15 +00:00
|
|
|
if (!insn->cfi) {
|
|
|
|
/*
|
|
|
|
* This can happen if stack validation isn't enabled or the
|
|
|
|
* function is annotated with STACK_FRAME_NON_STANDARD.
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Propagate insn->cfi to the prefix code */
|
|
|
|
cfi = cfi_hash_find_or_add(insn->cfi);
|
|
|
|
for (; prev != insn; prev = next_insn_same_sec(file, prev))
|
|
|
|
prev->cfi = cfi;
|
|
|
|
|
2022-10-28 13:50:42 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-04-12 20:26:13 +00:00
|
|
|
static int add_prefix_symbols(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
struct section *sec;
|
|
|
|
struct symbol *func;
|
|
|
|
|
|
|
|
for_each_sec(file, sec) {
|
|
|
|
if (!(sec->sh.sh_flags & SHF_EXECINSTR))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
sec_for_each_sym(sec, func) {
|
|
|
|
if (func->type != STT_FUNC)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
add_prefix_symbol(file, func);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-30 07:56:49 +00:00
|
|
|
return 0;
|
2023-04-12 20:26:13 +00:00
|
|
|
}
|
|
|
|
|
2020-03-23 20:17:50 +00:00
|
|
|
static int validate_symbol(struct objtool_file *file, struct section *sec,
|
|
|
|
struct symbol *sym, struct insn_state *state)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
|
|
|
struct instruction *insn;
|
2020-03-23 20:17:50 +00:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!sym->len) {
|
|
|
|
WARN("%s() is missing an ELF size annotation", sym->name);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sym->pfunc != sym || sym->alias != sym)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
insn = find_insn(file, sec, sym->offset);
|
2025-03-24 21:55:53 +00:00
|
|
|
if (!insn || insn->visited)
|
2020-03-23 20:17:50 +00:00
|
|
|
return 0;
|
|
|
|
|
2025-03-24 21:55:58 +00:00
|
|
|
if (opts.uaccess)
|
|
|
|
state->uaccess = sym->uaccess_safe;
|
2020-03-23 20:17:50 +00:00
|
|
|
|
2022-09-22 20:03:50 +00:00
|
|
|
ret = validate_branch(file, insn_func(insn), insn, *state);
|
2023-04-18 21:27:49 +00:00
|
|
|
if (ret)
|
|
|
|
BT_INSN(insn, "<=== (sym)");
|
2020-03-23 20:17:50 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int validate_section(struct objtool_file *file, struct section *sec)
|
|
|
|
{
|
2017-06-28 15:11:07 +00:00
|
|
|
struct insn_state state;
|
2020-03-23 20:17:50 +00:00
|
|
|
struct symbol *func;
|
|
|
|
int warnings = 0;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2023-04-12 19:03:19 +00:00
|
|
|
sec_for_each_sym(sec, func) {
|
2020-03-23 19:57:13 +00:00
|
|
|
if (func->type != STT_FUNC)
|
|
|
|
continue;
|
2019-07-18 01:36:48 +00:00
|
|
|
|
2022-04-18 16:50:43 +00:00
|
|
|
init_insn_state(file, &state, sec);
|
2021-01-21 21:29:24 +00:00
|
|
|
set_func_state(&state.cfi);
|
2020-03-27 15:28:40 +00:00
|
|
|
|
2020-03-23 20:17:50 +00:00
|
|
|
warnings += validate_symbol(file, sec, func, &state);
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return warnings;
|
|
|
|
}
|
|
|
|
|
2022-04-18 16:50:43 +00:00
|
|
|
static int validate_noinstr_sections(struct objtool_file *file)
|
2020-03-10 17:57:41 +00:00
|
|
|
{
|
|
|
|
struct section *sec;
|
2020-03-23 17:26:03 +00:00
|
|
|
int warnings = 0;
|
2020-03-10 17:57:41 +00:00
|
|
|
|
|
|
|
sec = find_section_by_name(file->elf, ".noinstr.text");
|
2020-03-25 16:18:17 +00:00
|
|
|
if (sec) {
|
|
|
|
warnings += validate_section(file, sec);
|
|
|
|
warnings += validate_unwind_hints(file, sec);
|
|
|
|
}
|
2020-03-10 17:57:41 +00:00
|
|
|
|
2020-03-25 16:18:17 +00:00
|
|
|
sec = find_section_by_name(file->elf, ".entry.text");
|
|
|
|
if (sec) {
|
|
|
|
warnings += validate_section(file, sec);
|
|
|
|
warnings += validate_unwind_hints(file, sec);
|
|
|
|
}
|
2020-03-23 17:26:03 +00:00
|
|
|
|
2023-01-12 19:43:31 +00:00
|
|
|
sec = find_section_by_name(file->elf, ".cpuidle.text");
|
|
|
|
if (sec) {
|
|
|
|
warnings += validate_section(file, sec);
|
|
|
|
warnings += validate_unwind_hints(file, sec);
|
|
|
|
}
|
|
|
|
|
2020-03-23 17:26:03 +00:00
|
|
|
return warnings;
|
2020-03-10 17:57:41 +00:00
|
|
|
}
|
|
|
|
|
2020-03-23 19:57:13 +00:00
|
|
|
static int validate_functions(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
struct section *sec;
|
|
|
|
int warnings = 0;
|
|
|
|
|
2020-03-23 20:11:14 +00:00
|
|
|
for_each_sec(file, sec) {
|
|
|
|
if (!(sec->sh.sh_flags & SHF_EXECINSTR))
|
|
|
|
continue;
|
|
|
|
|
2020-03-23 19:57:13 +00:00
|
|
|
warnings += validate_section(file, sec);
|
2020-03-23 20:11:14 +00:00
|
|
|
}
|
2020-03-23 19:57:13 +00:00
|
|
|
|
|
|
|
return warnings;
|
|
|
|
}
|
|
|
|
|
2022-04-18 16:50:34 +00:00
|
|
|
static void mark_endbr_used(struct instruction *insn)
|
2022-03-08 15:30:54 +00:00
|
|
|
{
|
2022-04-18 16:50:34 +00:00
|
|
|
if (!list_empty(&insn->call_node))
|
|
|
|
list_del_init(&insn->call_node);
|
|
|
|
}
|
|
|
|
|
2022-09-15 11:11:13 +00:00
|
|
|
static bool noendbr_range(struct objtool_file *file, struct instruction *insn)
|
|
|
|
{
|
|
|
|
struct symbol *sym = find_symbol_containing(insn->sec, insn->offset-1);
|
|
|
|
struct instruction *first;
|
|
|
|
|
|
|
|
if (!sym)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
first = find_insn(file, sym->sec, sym->offset);
|
|
|
|
if (!first)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (first->type != INSN_ENDBR && !first->noendbr)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return insn->offset == sym->offset + sym->len;
|
|
|
|
}
|
|
|
|
|
2024-10-04 00:31:10 +00:00
|
|
|
static int __validate_ibt_insn(struct objtool_file *file, struct instruction *insn,
|
|
|
|
struct instruction *dest)
|
|
|
|
{
|
|
|
|
if (dest->type == INSN_ENDBR) {
|
|
|
|
mark_endbr_used(dest);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (insn_func(dest) && insn_func(insn) &&
|
|
|
|
insn_func(dest)->pfunc == insn_func(insn)->pfunc) {
|
|
|
|
/*
|
|
|
|
* Anything from->to self is either _THIS_IP_ or
|
|
|
|
* IRET-to-self.
|
|
|
|
*
|
|
|
|
* There is no sane way to annotate _THIS_IP_ since the
|
|
|
|
* compiler treats the relocation as a constant and is
|
|
|
|
* happy to fold in offsets, skewing any annotation we
|
|
|
|
* do, leading to vast amounts of false-positives.
|
|
|
|
*
|
|
|
|
* There's also compiler generated _THIS_IP_ through
|
|
|
|
* KCOV and such which we have no hope of annotating.
|
|
|
|
*
|
|
|
|
* As such, blanket accept self-references without
|
|
|
|
* issue.
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Accept anything ANNOTATE_NOENDBR.
|
|
|
|
*/
|
|
|
|
if (dest->noendbr)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Accept if this is the instruction after a symbol
|
|
|
|
* that is (no)endbr -- typical code-range usage.
|
|
|
|
*/
|
|
|
|
if (noendbr_range(file, dest))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
WARN_INSN(insn, "relocation to !ENDBR: %s", offstr(dest->sec, dest->offset));
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2022-04-18 16:50:34 +00:00
|
|
|
static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn)
|
|
|
|
{
|
|
|
|
struct instruction *dest;
|
2022-03-08 15:30:54 +00:00
|
|
|
struct reloc *reloc;
|
2022-04-18 16:50:34 +00:00
|
|
|
unsigned long off;
|
|
|
|
int warnings = 0;
|
2022-03-08 15:30:54 +00:00
|
|
|
|
2022-04-18 16:50:34 +00:00
|
|
|
/*
|
|
|
|
* Looking for function pointer load relocations. Ignore
|
|
|
|
* direct/indirect branches:
|
|
|
|
*/
|
|
|
|
switch (insn->type) {
|
2024-10-04 00:31:10 +00:00
|
|
|
|
2022-04-18 16:50:34 +00:00
|
|
|
case INSN_CALL:
|
|
|
|
case INSN_CALL_DYNAMIC:
|
|
|
|
case INSN_JUMP_CONDITIONAL:
|
|
|
|
case INSN_JUMP_UNCONDITIONAL:
|
|
|
|
case INSN_JUMP_DYNAMIC:
|
|
|
|
case INSN_JUMP_DYNAMIC_CONDITIONAL:
|
|
|
|
case INSN_RETURN:
|
|
|
|
case INSN_NOP:
|
|
|
|
return 0;
|
2024-10-04 00:31:10 +00:00
|
|
|
|
|
|
|
case INSN_LEA_RIP:
|
|
|
|
if (!insn_reloc(file, insn)) {
|
|
|
|
/* local function pointer reference without reloc */
|
|
|
|
|
|
|
|
off = arch_jump_destination(insn);
|
|
|
|
|
|
|
|
dest = find_insn(file, insn->sec, off);
|
|
|
|
if (!dest) {
|
|
|
|
WARN_INSN(insn, "corrupt function pointer reference");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return __validate_ibt_insn(file, insn, dest);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2022-04-18 16:50:34 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2022-03-08 15:30:54 +00:00
|
|
|
|
2022-04-18 16:50:34 +00:00
|
|
|
for (reloc = insn_reloc(file, insn);
|
|
|
|
reloc;
|
|
|
|
reloc = find_reloc_by_dest_range(file->elf, insn->sec,
|
2023-05-30 17:21:06 +00:00
|
|
|
reloc_offset(reloc) + 1,
|
|
|
|
(insn->offset + insn->len) - (reloc_offset(reloc) + 1))) {
|
2022-03-08 15:30:54 +00:00
|
|
|
|
2022-04-18 16:50:34 +00:00
|
|
|
off = reloc->sym->offset;
|
2023-05-30 17:21:07 +00:00
|
|
|
if (reloc_type(reloc) == R_X86_64_PC32 ||
|
|
|
|
reloc_type(reloc) == R_X86_64_PLT32)
|
2023-05-30 17:21:08 +00:00
|
|
|
off += arch_dest_reloc_offset(reloc_addend(reloc));
|
2022-04-18 16:50:34 +00:00
|
|
|
else
|
2023-05-30 17:21:08 +00:00
|
|
|
off += reloc_addend(reloc);
|
2022-04-18 16:50:34 +00:00
|
|
|
|
|
|
|
dest = find_insn(file, reloc->sym->sec, off);
|
|
|
|
if (!dest)
|
2022-03-08 15:30:54 +00:00
|
|
|
continue;
|
|
|
|
|
2024-10-04 00:31:10 +00:00
|
|
|
warnings += __validate_ibt_insn(file, insn, dest);
|
2022-04-18 16:50:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return warnings;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int validate_ibt_data_reloc(struct objtool_file *file,
|
|
|
|
struct reloc *reloc)
|
|
|
|
{
|
|
|
|
struct instruction *dest;
|
|
|
|
|
|
|
|
dest = find_insn(file, reloc->sym->sec,
|
2023-05-30 17:21:08 +00:00
|
|
|
reloc->sym->offset + reloc_addend(reloc));
|
2022-04-18 16:50:34 +00:00
|
|
|
if (!dest)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (dest->type == INSN_ENDBR) {
|
|
|
|
mark_endbr_used(dest);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dest->noendbr)
|
|
|
|
return 0;
|
|
|
|
|
2025-04-01 04:26:41 +00:00
|
|
|
WARN_FUNC(reloc->sec->base, reloc_offset(reloc),
|
|
|
|
"data relocation to !ENDBR: %s", offstr(dest->sec, dest->offset));
|
2022-04-18 16:50:34 +00:00
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Validate IBT rules and remove used ENDBR instructions from the seal list.
|
|
|
|
* Unused ENDBR instructions will be annotated for sealing (i.e., replaced with
|
|
|
|
* NOPs) later, in create_ibt_endbr_seal_sections().
|
|
|
|
*/
|
|
|
|
static int validate_ibt(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
struct section *sec;
|
|
|
|
struct reloc *reloc;
|
|
|
|
struct instruction *insn;
|
|
|
|
int warnings = 0;
|
|
|
|
|
|
|
|
for_each_insn(file, insn)
|
|
|
|
warnings += validate_ibt_insn(file, insn);
|
|
|
|
|
|
|
|
for_each_sec(file, sec) {
|
|
|
|
|
|
|
|
/* Already done by validate_ibt_insn() */
|
|
|
|
if (sec->sh.sh_flags & SHF_EXECINSTR)
|
2022-03-08 15:30:54 +00:00
|
|
|
continue;
|
|
|
|
|
2023-05-30 17:20:55 +00:00
|
|
|
if (!sec->rsec)
|
2022-04-18 16:50:34 +00:00
|
|
|
continue;
|
2022-03-08 15:30:54 +00:00
|
|
|
|
2022-04-18 16:50:34 +00:00
|
|
|
/*
|
|
|
|
* These sections can reference text addresses, but not with
|
|
|
|
* the intent to indirect branch to them.
|
|
|
|
*/
|
2022-08-18 21:39:27 +00:00
|
|
|
if ((!strncmp(sec->name, ".discard", 8) &&
|
|
|
|
strcmp(sec->name, ".discard.ibt_endbr_noseal")) ||
|
2022-04-18 16:50:34 +00:00
|
|
|
!strncmp(sec->name, ".debug", 6) ||
|
|
|
|
!strcmp(sec->name, ".altinstructions") ||
|
|
|
|
!strcmp(sec->name, ".ibt_endbr_seal") ||
|
|
|
|
!strcmp(sec->name, ".orc_unwind_ip") ||
|
|
|
|
!strcmp(sec->name, ".parainstructions") ||
|
|
|
|
!strcmp(sec->name, ".retpoline_sites") ||
|
|
|
|
!strcmp(sec->name, ".smp_locks") ||
|
|
|
|
!strcmp(sec->name, ".static_call_sites") ||
|
|
|
|
!strcmp(sec->name, "_error_injection_whitelist") ||
|
|
|
|
!strcmp(sec->name, "_kprobe_blacklist") ||
|
|
|
|
!strcmp(sec->name, "__bug_table") ||
|
|
|
|
!strcmp(sec->name, "__ex_table") ||
|
|
|
|
!strcmp(sec->name, "__jump_table") ||
|
2022-09-08 21:54:59 +00:00
|
|
|
!strcmp(sec->name, "__mcount_loc") ||
|
Networking changes for 6.1.
Core
----
- Introduce and use a single page frag cache for allocating small skb
heads, clawing back the 10-20% performance regression in UDP flood
test from previous fixes.
- Run packets which already went thru HW coalescing thru SW GRO.
This significantly improves TCP segment coalescing and simplifies
deployments as different workloads benefit from HW or SW GRO.
- Shrink the size of the base zero-copy send structure.
- Move TCP init under a new slow / sleepable version of DO_ONCE().
BPF
---
- Add BPF-specific, any-context-safe memory allocator.
- Add helpers/kfuncs for PKCS#7 signature verification from BPF
programs.
- Define a new map type and related helpers for user space -> kernel
communication over a ring buffer (BPF_MAP_TYPE_USER_RINGBUF).
- Allow targeting BPF iterators to loop through resources of one
task/thread.
- Add ability to call selected destructive functions.
Expose crash_kexec() to allow BPF to trigger a kernel dump.
Use CAP_SYS_BOOT check on the loading process to judge permissions.
- Enable BPF to collect custom hierarchical cgroup stats efficiently
by integrating with the rstat framework.
- Support struct arguments for trampoline based programs.
Only structs with size <= 16B and x86 are supported.
- Invoke cgroup/connect{4,6} programs for unprivileged ICMP ping
sockets (instead of just TCP and UDP sockets).
- Add a helper for accessing CLOCK_TAI for time sensitive network
related programs.
- Support accessing network tunnel metadata's flags.
- Make TCP SYN ACK RTO tunable by BPF programs with TCP Fast Open.
- Add support for writing to Netfilter's nf_conn:mark.
Protocols
---------
- WiFi: more Extremely High Throughput (EHT) and Multi-Link
Operation (MLO) work (802.11be, WiFi 7).
- vsock: improve support for SO_RCVLOWAT.
- SMC: support SO_REUSEPORT.
- Netlink: define and document how to use netlink in a "modern" way.
Support reporting missing attributes via extended ACK.
- IPSec: support collect metadata mode for xfrm interfaces.
- TCPv6: send consistent autoflowlabel in SYN_RECV state
and RST packets.
- TCP: introduce optional per-netns connection hash table to allow
better isolation between namespaces (opt-in, at the cost of memory
and cache pressure).
- MPTCP: support TCP_FASTOPEN_CONNECT.
- Add NEXT-C-SID support in Segment Routing (SRv6) End behavior.
- Adjust IP_UNICAST_IF sockopt behavior for connected UDP sockets.
- Open vSwitch:
- Allow specifying ifindex of new interfaces.
- Allow conntrack and metering in non-initial user namespace.
- TLS: support the Korean ARIA-GCM crypto algorithm.
- Remove DECnet support.
Driver API
----------
- Allow selecting the conduit interface used by each port
in DSA switches, at runtime.
- Ethernet Power Sourcing Equipment and Power Device support.
- Add tc-taprio support for queueMaxSDU parameter, i.e. setting
per traffic class max frame size for time-based packet schedules.
- Support PHY rate matching - adapting between differing host-side
and link-side speeds.
- Introduce QUSGMII PHY mode and 1000BASE-KX interface mode.
- Validate OF (device tree) nodes for DSA shared ports; make
phylink-related properties mandatory on DSA and CPU ports.
Enforcing more uniformity should allow transitioning to phylink.
- Require that flash component name used during update matches one
of the components for which version is reported by info_get().
- Remove "weight" argument from driver-facing NAPI API as much
as possible. It's one of those magic knobs which seemed like
a good idea at the time but is too indirect to use in practice.
- Support offload of TLS connections with 256 bit keys.
New hardware / drivers
----------------------
- Ethernet:
- Microchip KSZ9896 6-port Gigabit Ethernet Switch
- Renesas Ethernet AVB (EtherAVB-IF) Gen4 SoCs
- Analog Devices ADIN1110 and ADIN2111 industrial single pair
Ethernet (10BASE-T1L) MAC+PHY.
- Rockchip RV1126 Gigabit Ethernet (a version of stmmac IP).
- Ethernet SFPs / modules:
- RollBall / Hilink / Turris 10G copper SFPs
- HALNy GPON module
- WiFi:
- CYW43439 SDIO chipset (brcmfmac)
- CYW89459 PCIe chipset (brcmfmac)
- BCM4378 on Apple platforms (brcmfmac)
Drivers
-------
- CAN:
- gs_usb: HW timestamp support
- Ethernet PHYs:
- lan8814: cable diagnostics
- Ethernet NICs:
- Intel (100G):
- implement control of FCS/CRC stripping
- port splitting via devlink
- L2TPv3 filtering offload
- nVidia/Mellanox:
- tunnel offload for sub-functions
- MACSec offload, w/ Extended packet number and replay
window offload
- significantly restructure, and optimize the AF_XDP support,
align the behavior with other vendors
- Huawei:
- configuring DSCP map for traffic class selection
- querying standard FEC statistics
- querying SerDes lane number via ethtool
- Marvell/Cavium:
- egress priority flow control
- MACSec offload
- AMD/SolarFlare:
- PTP over IPv6 and raw Ethernet
- small / embedded:
- ax88772: convert to phylink (to support SFP cages)
- altera: tse: convert to phylink
- ftgmac100: support fixed link
- enetc: standard Ethtool counters
- macb: ZynqMP SGMII dynamic configuration support
- tsnep: support multi-queue and use page pool
- lan743x: Rx IP & TCP checksum offload
- igc: add xdp frags support to ndo_xdp_xmit
- Ethernet high-speed switches:
- Marvell (prestera):
- support SPAN port features (traffic mirroring)
- nexthop object offloading
- Microchip (sparx5):
- multicast forwarding offload
- QoS queuing offload (tc-mqprio, tc-tbf, tc-ets)
- Ethernet embedded switches:
- Marvell (mv88e6xxx):
- support RGMII cmode
- NXP (felix):
- standardized ethtool counters
- Microchip (lan966x):
- QoS queuing offload (tc-mqprio, tc-tbf, tc-cbs, tc-ets)
- traffic policing and mirroring
- link aggregation / bonding offload
- QUSGMII PHY mode support
- Qualcomm 802.11ax WiFi (ath11k):
- cold boot calibration support on WCN6750
- support to connect to a non-transmit MBSSID AP profile
- enable remain-on-channel support on WCN6750
- Wake-on-WLAN support for WCN6750
- support to provide transmit power from firmware via nl80211
- support to get power save duration for each client
- spectral scan support for 160 MHz
- MediaTek WiFi (mt76):
- WiFi-to-Ethernet bridging offload for MT7986 chips
- RealTek WiFi (rtw89):
- P2P support
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-----BEGIN PGP SIGNATURE-----
iQIzBAABCAAdFiEE6jPA+I1ugmIBA4hXMUZtbf5SIrsFAmM7vtkACgkQMUZtbf5S
Irvotg//dmh53rC+UMKO3OgOqPlSMnaqzbUdDEfN6mj4Mpox7Csb8zERVURHhBHY
fvlXWsDgxmvgTebI5fvNC5+f1iW5xcqgJV2TWnNmDOKWwvQwb6qQfgixVmunvkpe
IIukMXYt0dAf9bXeeEfbNXcCb85cPwB76stX0tMV6BX7osp3T0TL1fvFk0NJkL0j
TeydLad/yAQtPb4TbeWYjNDoxPVDf0cVpUrevLGmWE88UMYmgTqPze+h1W5Wri52
bzjdLklY/4cgcIZClHQ6F9CeRWqEBxvujA5Hj/cwOcn/ptVVJWUGi7sQo3sYkoSs
HFu+F8XsTec14kGNC0Ab40eVdqs5l/w8+E+4jvgXeKGOtVns8DwoiUIzqXpyty89
Ib04mffrwWNjFtHvo/kIsNwP05X2PGE9HUHfwsTUfisl/ASvMmQp7D7vUoqQC/4B
AMVzT5qpjkmfBHYQQGuw8FxJhMeAOjC6aAo6censhXJyiUhIfleQsN0syHdaNb8q
9RZlhAgQoVb6ZgvBV8r8unQh/WtNZ3AopwifwVJld2unsE/UNfQy2KyqOWBES/zf
LP9sfuX0JnmHn8s1BQEUMPU1jF9ZVZCft7nufJDL6JhlAL+bwZeEN4yCiAHOPZqE
ymSLHI9s8yWZoNpuMWKrI9kFexVnQFKmA3+quAJUcYHNMSsLkL8=
=Gsio
-----END PGP SIGNATURE-----
Merge tag 'net-next-6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski:
"Core:
- Introduce and use a single page frag cache for allocating small skb
heads, clawing back the 10-20% performance regression in UDP flood
test from previous fixes.
- Run packets which already went thru HW coalescing thru SW GRO. This
significantly improves TCP segment coalescing and simplifies
deployments as different workloads benefit from HW or SW GRO.
- Shrink the size of the base zero-copy send structure.
- Move TCP init under a new slow / sleepable version of DO_ONCE().
BPF:
- Add BPF-specific, any-context-safe memory allocator.
- Add helpers/kfuncs for PKCS#7 signature verification from BPF
programs.
- Define a new map type and related helpers for user space -> kernel
communication over a ring buffer (BPF_MAP_TYPE_USER_RINGBUF).
- Allow targeting BPF iterators to loop through resources of one
task/thread.
- Add ability to call selected destructive functions. Expose
crash_kexec() to allow BPF to trigger a kernel dump. Use
CAP_SYS_BOOT check on the loading process to judge permissions.
- Enable BPF to collect custom hierarchical cgroup stats efficiently
by integrating with the rstat framework.
- Support struct arguments for trampoline based programs. Only
structs with size <= 16B and x86 are supported.
- Invoke cgroup/connect{4,6} programs for unprivileged ICMP ping
sockets (instead of just TCP and UDP sockets).
- Add a helper for accessing CLOCK_TAI for time sensitive network
related programs.
- Support accessing network tunnel metadata's flags.
- Make TCP SYN ACK RTO tunable by BPF programs with TCP Fast Open.
- Add support for writing to Netfilter's nf_conn:mark.
Protocols:
- WiFi: more Extremely High Throughput (EHT) and Multi-Link Operation
(MLO) work (802.11be, WiFi 7).
- vsock: improve support for SO_RCVLOWAT.
- SMC: support SO_REUSEPORT.
- Netlink: define and document how to use netlink in a "modern" way.
Support reporting missing attributes via extended ACK.
- IPSec: support collect metadata mode for xfrm interfaces.
- TCPv6: send consistent autoflowlabel in SYN_RECV state and RST
packets.
- TCP: introduce optional per-netns connection hash table to allow
better isolation between namespaces (opt-in, at the cost of memory
and cache pressure).
- MPTCP: support TCP_FASTOPEN_CONNECT.
- Add NEXT-C-SID support in Segment Routing (SRv6) End behavior.
- Adjust IP_UNICAST_IF sockopt behavior for connected UDP sockets.
- Open vSwitch:
- Allow specifying ifindex of new interfaces.
- Allow conntrack and metering in non-initial user namespace.
- TLS: support the Korean ARIA-GCM crypto algorithm.
- Remove DECnet support.
Driver API:
- Allow selecting the conduit interface used by each port in DSA
switches, at runtime.
- Ethernet Power Sourcing Equipment and Power Device support.
- Add tc-taprio support for queueMaxSDU parameter, i.e. setting per
traffic class max frame size for time-based packet schedules.
- Support PHY rate matching - adapting between differing host-side
and link-side speeds.
- Introduce QUSGMII PHY mode and 1000BASE-KX interface mode.
- Validate OF (device tree) nodes for DSA shared ports; make
phylink-related properties mandatory on DSA and CPU ports.
Enforcing more uniformity should allow transitioning to phylink.
- Require that flash component name used during update matches one of
the components for which version is reported by info_get().
- Remove "weight" argument from driver-facing NAPI API as much as
possible. It's one of those magic knobs which seemed like a good
idea at the time but is too indirect to use in practice.
- Support offload of TLS connections with 256 bit keys.
New hardware / drivers:
- Ethernet:
- Microchip KSZ9896 6-port Gigabit Ethernet Switch
- Renesas Ethernet AVB (EtherAVB-IF) Gen4 SoCs
- Analog Devices ADIN1110 and ADIN2111 industrial single pair
Ethernet (10BASE-T1L) MAC+PHY.
- Rockchip RV1126 Gigabit Ethernet (a version of stmmac IP).
- Ethernet SFPs / modules:
- RollBall / Hilink / Turris 10G copper SFPs
- HALNy GPON module
- WiFi:
- CYW43439 SDIO chipset (brcmfmac)
- CYW89459 PCIe chipset (brcmfmac)
- BCM4378 on Apple platforms (brcmfmac)
Drivers:
- CAN:
- gs_usb: HW timestamp support
- Ethernet PHYs:
- lan8814: cable diagnostics
- Ethernet NICs:
- Intel (100G):
- implement control of FCS/CRC stripping
- port splitting via devlink
- L2TPv3 filtering offload
- nVidia/Mellanox:
- tunnel offload for sub-functions
- MACSec offload, w/ Extended packet number and replay window
offload
- significantly restructure, and optimize the AF_XDP support,
align the behavior with other vendors
- Huawei:
- configuring DSCP map for traffic class selection
- querying standard FEC statistics
- querying SerDes lane number via ethtool
- Marvell/Cavium:
- egress priority flow control
- MACSec offload
- AMD/SolarFlare:
- PTP over IPv6 and raw Ethernet
- small / embedded:
- ax88772: convert to phylink (to support SFP cages)
- altera: tse: convert to phylink
- ftgmac100: support fixed link
- enetc: standard Ethtool counters
- macb: ZynqMP SGMII dynamic configuration support
- tsnep: support multi-queue and use page pool
- lan743x: Rx IP & TCP checksum offload
- igc: add xdp frags support to ndo_xdp_xmit
- Ethernet high-speed switches:
- Marvell (prestera):
- support SPAN port features (traffic mirroring)
- nexthop object offloading
- Microchip (sparx5):
- multicast forwarding offload
- QoS queuing offload (tc-mqprio, tc-tbf, tc-ets)
- Ethernet embedded switches:
- Marvell (mv88e6xxx):
- support RGMII cmode
- NXP (felix):
- standardized ethtool counters
- Microchip (lan966x):
- QoS queuing offload (tc-mqprio, tc-tbf, tc-cbs, tc-ets)
- traffic policing and mirroring
- link aggregation / bonding offload
- QUSGMII PHY mode support
- Qualcomm 802.11ax WiFi (ath11k):
- cold boot calibration support on WCN6750
- support to connect to a non-transmit MBSSID AP profile
- enable remain-on-channel support on WCN6750
- Wake-on-WLAN support for WCN6750
- support to provide transmit power from firmware via nl80211
- support to get power save duration for each client
- spectral scan support for 160 MHz
- MediaTek WiFi (mt76):
- WiFi-to-Ethernet bridging offload for MT7986 chips
- RealTek WiFi (rtw89):
- P2P support"
* tag 'net-next-6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1864 commits)
eth: pse: add missing static inlines
once: rename _SLOW to _SLEEPABLE
net: pse-pd: add regulator based PSE driver
dt-bindings: net: pse-dt: add bindings for regulator based PoDL PSE controller
ethtool: add interface to interact with Ethernet Power Equipment
net: mdiobus: search for PSE nodes by parsing PHY nodes.
net: mdiobus: fwnode_mdiobus_register_phy() rework error handling
net: add framework to support Ethernet PSE and PDs devices
dt-bindings: net: phy: add PoDL PSE property
net: marvell: prestera: Propagate nh state from hw to kernel
net: marvell: prestera: Add neighbour cache accounting
net: marvell: prestera: add stub handler neighbour events
net: marvell: prestera: Add heplers to interact with fib_notifier_info
net: marvell: prestera: Add length macros for prestera_ip_addr
net: marvell: prestera: add delayed wq and flush wq on deinit
net: marvell: prestera: Add strict cleanup of fib arbiter
net: marvell: prestera: Add cleanup of allocated fib_nodes
net: marvell: prestera: Add router nexthops ABI
eth: octeon: fix build after netif_napi_add() changes
net/mlx5: E-Switch, Return EBUSY if can't get mode lock
...
2022-10-04 20:38:03 +00:00
|
|
|
!strcmp(sec->name, ".kcfi_traps") ||
|
kbuild: Add AutoFDO support for Clang build
Add the build support for using Clang's AutoFDO. Building the kernel
with AutoFDO does not reduce the optimization level from the
compiler. AutoFDO uses hardware sampling to gather information about
the frequency of execution of different code paths within a binary.
This information is then used to guide the compiler's optimization
decisions, resulting in a more efficient binary. Experiments
showed that the kernel can improve up to 10% in latency.
The support requires a Clang compiler after LLVM 17. This submission
is limited to x86 platforms that support PMU features like LBR on
Intel machines and AMD Zen3 BRS. Support for SPE on ARM 1,
and BRBE on ARM 1 is part of planned future work.
Here is an example workflow for AutoFDO kernel:
1) Build the kernel on the host machine with LLVM enabled, for example,
$ make menuconfig LLVM=1
Turn on AutoFDO build config:
CONFIG_AUTOFDO_CLANG=y
With a configuration that has LLVM enabled, use the following
command:
scripts/config -e AUTOFDO_CLANG
After getting the config, build with
$ make LLVM=1
2) Install the kernel on the test machine.
3) Run the load tests. The '-c' option in perf specifies the sample
event period. We suggest using a suitable prime number,
like 500009, for this purpose.
For Intel platforms:
$ perf record -e BR_INST_RETIRED.NEAR_TAKEN:k -a -N -b -c <count> \
-o <perf_file> -- <loadtest>
For AMD platforms:
The supported system are: Zen3 with BRS, or Zen4 with amd_lbr_v2
For Zen3:
$ cat proc/cpuinfo | grep " brs"
For Zen4:
$ cat proc/cpuinfo | grep amd_lbr_v2
$ perf record --pfm-events RETIRED_TAKEN_BRANCH_INSTRUCTIONS:k -a \
-N -b -c <count> -o <perf_file> -- <loadtest>
4) (Optional) Download the raw perf file to the host machine.
5) To generate an AutoFDO profile, two offline tools are available:
create_llvm_prof and llvm_profgen. The create_llvm_prof tool is part
of the AutoFDO project and can be found on GitHub
(https://github.com/google/autofdo), version v0.30.1 or later. The
llvm_profgen tool is included in the LLVM compiler itself. It's
important to note that the version of llvm_profgen doesn't need to
match the version of Clang. It needs to be the LLVM 19 release or
later, or from the LLVM trunk.
$ llvm-profgen --kernel --binary=<vmlinux> --perfdata=<perf_file> \
-o <profile_file>
or
$ create_llvm_prof --binary=<vmlinux> --profile=<perf_file> \
--format=extbinary --out=<profile_file>
Note that multiple AutoFDO profile files can be merged into one via:
$ llvm-profdata merge -o <profile_file> <profile_1> ... <profile_n>
6) Rebuild the kernel using the AutoFDO profile file with the same config
as step 1, (Note CONFIG_AUTOFDO_CLANG needs to be enabled):
$ make LLVM=1 CLANG_AUTOFDO_PROFILE=<profile_file>
Co-developed-by: Han Shen <shenhan@google.com>
Signed-off-by: Han Shen <shenhan@google.com>
Signed-off-by: Rong Xu <xur@google.com>
Suggested-by: Sriraman Tallam <tmsriram@google.com>
Suggested-by: Krzysztof Pszeniczny <kpszeniczny@google.com>
Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
Suggested-by: Stephane Eranian <eranian@google.com>
Tested-by: Yonghong Song <yonghong.song@linux.dev>
Tested-by: Yabin Cui <yabinc@google.com>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Kees Cook <kees@kernel.org>
Tested-by: Peter Jung <ptr1337@cachyos.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
2024-11-02 17:51:08 +00:00
|
|
|
!strcmp(sec->name, ".llvm.call-graph-profile") ||
|
kbuild: Add Propeller configuration for kernel build
Add the build support for using Clang's Propeller optimizer. Like
AutoFDO, Propeller uses hardware sampling to gather information
about the frequency of execution of different code paths within a
binary. This information is then used to guide the compiler's
optimization decisions, resulting in a more efficient binary.
The support requires a Clang compiler LLVM 19 or later, and the
create_llvm_prof tool
(https://github.com/google/autofdo/releases/tag/v0.30.1). This
commit is limited to x86 platforms that support PMU features
like LBR on Intel machines and AMD Zen3 BRS.
Here is an example workflow for building an AutoFDO+Propeller
optimized kernel:
1) Build the kernel on the host machine, with AutoFDO and Propeller
build config
CONFIG_AUTOFDO_CLANG=y
CONFIG_PROPELLER_CLANG=y
then
$ make LLVM=1 CLANG_AUTOFDO_PROFILE=<autofdo_profile>
“<autofdo_profile>” is the profile collected when doing a non-Propeller
AutoFDO build. This step builds a kernel that has the same optimization
level as AutoFDO, plus a metadata section that records basic block
information. This kernel image runs as fast as an AutoFDO optimized
kernel.
2) Install the kernel on test/production machines.
3) Run the load tests. The '-c' option in perf specifies the sample
event period. We suggest using a suitable prime number,
like 500009, for this purpose.
For Intel platforms:
$ perf record -e BR_INST_RETIRED.NEAR_TAKEN:k -a -N -b -c <count> \
-o <perf_file> -- <loadtest>
For AMD platforms:
The supported system are: Zen3 with BRS, or Zen4 with amd_lbr_v2
# To see if Zen3 support LBR:
$ cat proc/cpuinfo | grep " brs"
# To see if Zen4 support LBR:
$ cat proc/cpuinfo | grep amd_lbr_v2
# If the result is yes, then collect the profile using:
$ perf record --pfm-events RETIRED_TAKEN_BRANCH_INSTRUCTIONS:k -a \
-N -b -c <count> -o <perf_file> -- <loadtest>
4) (Optional) Download the raw perf file to the host machine.
5) Generate Propeller profile:
$ create_llvm_prof --binary=<vmlinux> --profile=<perf_file> \
--format=propeller --propeller_output_module_name \
--out=<propeller_profile_prefix>_cc_profile.txt \
--propeller_symorder=<propeller_profile_prefix>_ld_profile.txt
“create_llvm_prof” is the profile conversion tool, and a prebuilt
binary for linux can be found on
https://github.com/google/autofdo/releases/tag/v0.30.1 (can also build
from source).
"<propeller_profile_prefix>" can be something like
"/home/user/dir/any_string".
This command generates a pair of Propeller profiles:
"<propeller_profile_prefix>_cc_profile.txt" and
"<propeller_profile_prefix>_ld_profile.txt".
6) Rebuild the kernel using the AutoFDO and Propeller profile files.
CONFIG_AUTOFDO_CLANG=y
CONFIG_PROPELLER_CLANG=y
and
$ make LLVM=1 CLANG_AUTOFDO_PROFILE=<autofdo_profile> \
CLANG_PROPELLER_PROFILE_PREFIX=<propeller_profile_prefix>
Co-developed-by: Han Shen <shenhan@google.com>
Signed-off-by: Han Shen <shenhan@google.com>
Signed-off-by: Rong Xu <xur@google.com>
Suggested-by: Sriraman Tallam <tmsriram@google.com>
Suggested-by: Krzysztof Pszeniczny <kpszeniczny@google.com>
Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
Suggested-by: Stephane Eranian <eranian@google.com>
Tested-by: Yonghong Song <yonghong.song@linux.dev>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Kees Cook <kees@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
2024-11-02 17:51:14 +00:00
|
|
|
!strcmp(sec->name, ".llvm_bb_addr_map") ||
|
2024-11-08 09:32:02 +00:00
|
|
|
!strcmp(sec->name, "__tracepoints") ||
|
2022-09-03 13:11:53 +00:00
|
|
|
strstr(sec->name, "__patchable_function_entries"))
|
2022-04-18 16:50:34 +00:00
|
|
|
continue;
|
2022-03-08 15:30:54 +00:00
|
|
|
|
2023-05-30 17:21:02 +00:00
|
|
|
for_each_reloc(sec->rsec, reloc)
|
2022-04-18 16:50:34 +00:00
|
|
|
warnings += validate_ibt_data_reloc(file, reloc);
|
2022-03-08 15:30:54 +00:00
|
|
|
}
|
|
|
|
|
2022-04-18 16:50:34 +00:00
|
|
|
return warnings;
|
2022-03-08 15:30:54 +00:00
|
|
|
}
|
|
|
|
|
2022-04-18 16:50:35 +00:00
|
|
|
static int validate_sls(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
struct instruction *insn, *next_insn;
|
|
|
|
int warnings = 0;
|
|
|
|
|
|
|
|
for_each_insn(file, insn) {
|
|
|
|
next_insn = next_insn_same_sec(file, insn);
|
|
|
|
|
|
|
|
if (insn->retpoline_safe)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
switch (insn->type) {
|
|
|
|
case INSN_RETURN:
|
|
|
|
if (!next_insn || next_insn->type != INSN_TRAP) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "missing int3 after ret");
|
2022-04-18 16:50:35 +00:00
|
|
|
warnings++;
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
case INSN_JUMP_DYNAMIC:
|
|
|
|
if (!next_insn || next_insn->type != INSN_TRAP) {
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "missing int3 after indirect jump");
|
2022-04-18 16:50:35 +00:00
|
|
|
warnings++;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return warnings;
|
|
|
|
}
|
|
|
|
|
2017-06-28 15:11:07 +00:00
|
|
|
static int validate_reachable_instructions(struct objtool_file *file)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
2023-04-18 21:27:50 +00:00
|
|
|
struct instruction *insn, *prev_insn;
|
|
|
|
struct symbol *call_dest;
|
2023-04-18 21:27:47 +00:00
|
|
|
int warnings = 0;
|
2017-06-28 15:11:07 +00:00
|
|
|
|
|
|
|
if (file->ignore_unreachables)
|
|
|
|
return 0;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
|
|
|
for_each_insn(file, insn) {
|
2020-09-19 06:41:18 +00:00
|
|
|
if (insn->visited || ignore_unreachable_insn(file, insn))
|
2017-06-28 15:11:07 +00:00
|
|
|
continue;
|
|
|
|
|
2023-04-18 21:27:50 +00:00
|
|
|
prev_insn = prev_insn_same_sec(file, insn);
|
|
|
|
if (prev_insn && prev_insn->dead_end) {
|
|
|
|
call_dest = insn_call_dest(prev_insn);
|
2025-03-14 19:28:59 +00:00
|
|
|
if (call_dest) {
|
2025-03-14 19:29:01 +00:00
|
|
|
WARN_INSN(insn, "%s() missing __noreturn in .c/.h or NORETURN() in noreturns.h",
|
2023-04-18 21:27:50 +00:00
|
|
|
call_dest->name);
|
|
|
|
warnings++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-12 19:03:17 +00:00
|
|
|
WARN_INSN(insn, "unreachable instruction");
|
2023-04-18 21:27:47 +00:00
|
|
|
warnings++;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2023-04-18 21:27:47 +00:00
|
|
|
return warnings;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|
|
|
|
|
2023-04-18 21:27:48 +00:00
|
|
|
/* 'funcs' is a space-separated list of function names */
|
2025-03-24 21:55:59 +00:00
|
|
|
static void disas_funcs(const char *funcs)
|
2023-04-18 21:27:48 +00:00
|
|
|
{
|
|
|
|
const char *objdump_str, *cross_compile;
|
|
|
|
int size, ret;
|
|
|
|
char *cmd;
|
|
|
|
|
|
|
|
cross_compile = getenv("CROSS_COMPILE");
|
2025-04-01 04:26:42 +00:00
|
|
|
if (!cross_compile)
|
|
|
|
cross_compile = "";
|
2023-04-18 21:27:48 +00:00
|
|
|
|
|
|
|
objdump_str = "%sobjdump -wdr %s | gawk -M -v _funcs='%s' '"
|
|
|
|
"BEGIN { split(_funcs, funcs); }"
|
|
|
|
"/^$/ { func_match = 0; }"
|
|
|
|
"/<.*>:/ { "
|
|
|
|
"f = gensub(/.*<(.*)>:/, \"\\\\1\", 1);"
|
|
|
|
"for (i in funcs) {"
|
|
|
|
"if (funcs[i] == f) {"
|
|
|
|
"func_match = 1;"
|
|
|
|
"base = strtonum(\"0x\" $1);"
|
|
|
|
"break;"
|
|
|
|
"}"
|
|
|
|
"}"
|
|
|
|
"}"
|
|
|
|
"{"
|
|
|
|
"if (func_match) {"
|
|
|
|
"addr = strtonum(\"0x\" $1);"
|
|
|
|
"printf(\"%%04x \", addr - base);"
|
|
|
|
"print;"
|
|
|
|
"}"
|
|
|
|
"}' 1>&2";
|
|
|
|
|
|
|
|
/* fake snprintf() to calculate the size */
|
|
|
|
size = snprintf(NULL, 0, objdump_str, cross_compile, objname, funcs) + 1;
|
|
|
|
if (size <= 0) {
|
|
|
|
WARN("objdump string size calculation failed");
|
2025-03-24 21:55:59 +00:00
|
|
|
return;
|
2023-04-18 21:27:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
cmd = malloc(size);
|
|
|
|
|
|
|
|
/* real snprintf() */
|
|
|
|
snprintf(cmd, size, objdump_str, cross_compile, objname, funcs);
|
|
|
|
ret = system(cmd);
|
|
|
|
if (ret) {
|
|
|
|
WARN("disassembly failed: %d", ret);
|
2025-03-24 21:55:59 +00:00
|
|
|
return;
|
2023-04-18 21:27:48 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
static void disas_warned_funcs(struct objtool_file *file)
|
2023-04-18 21:27:48 +00:00
|
|
|
{
|
|
|
|
struct symbol *sym;
|
|
|
|
char *funcs = NULL, *tmp;
|
|
|
|
|
|
|
|
for_each_sym(file, sym) {
|
2025-04-01 04:26:39 +00:00
|
|
|
if (sym->warned) {
|
2023-04-18 21:27:48 +00:00
|
|
|
if (!funcs) {
|
|
|
|
funcs = malloc(strlen(sym->name) + 1);
|
2025-03-24 21:55:59 +00:00
|
|
|
if (!funcs) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_GLIBC("malloc");
|
2025-03-24 21:55:59 +00:00
|
|
|
return;
|
|
|
|
}
|
2023-04-18 21:27:48 +00:00
|
|
|
strcpy(funcs, sym->name);
|
|
|
|
} else {
|
|
|
|
tmp = malloc(strlen(funcs) + strlen(sym->name) + 2);
|
2025-03-24 21:55:59 +00:00
|
|
|
if (!tmp) {
|
2025-04-01 04:26:41 +00:00
|
|
|
ERROR_GLIBC("malloc");
|
2025-03-24 21:55:59 +00:00
|
|
|
return;
|
|
|
|
}
|
2023-04-18 21:27:48 +00:00
|
|
|
sprintf(tmp, "%s %s", funcs, sym->name);
|
|
|
|
free(funcs);
|
|
|
|
funcs = tmp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (funcs)
|
|
|
|
disas_funcs(funcs);
|
|
|
|
}
|
|
|
|
|
2023-05-30 17:21:13 +00:00
|
|
|
struct insn_chunk {
|
|
|
|
void *addr;
|
|
|
|
struct insn_chunk *next;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reduce peak RSS usage by freeing insns memory before writing the ELF file,
|
|
|
|
* which can trigger more allocations for .debug_* sections whose data hasn't
|
|
|
|
* been read yet.
|
|
|
|
*/
|
|
|
|
static void free_insns(struct objtool_file *file)
|
|
|
|
{
|
|
|
|
struct instruction *insn;
|
|
|
|
struct insn_chunk *chunks = NULL, *chunk;
|
|
|
|
|
|
|
|
for_each_insn(file, insn) {
|
|
|
|
if (!insn->idx) {
|
|
|
|
chunk = malloc(sizeof(*chunk));
|
|
|
|
chunk->addr = insn;
|
|
|
|
chunk->next = chunks;
|
|
|
|
chunks = chunk;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (chunk = chunks; chunk; chunk = chunk->next)
|
|
|
|
free(chunk->addr);
|
|
|
|
}
|
|
|
|
|
2020-08-25 12:47:40 +00:00
|
|
|
int check(struct objtool_file *file)
|
2017-06-28 15:11:05 +00:00
|
|
|
{
|
2025-03-24 21:55:59 +00:00
|
|
|
int ret = 0, warnings = 0;
|
2017-06-28 15:11:05 +00:00
|
|
|
|
2017-06-28 15:11:07 +00:00
|
|
|
arch_initial_func_cfi_state(&initial_func_cfi);
|
2021-06-24 09:41:01 +00:00
|
|
|
init_cfi_state(&init_cfi);
|
|
|
|
init_cfi_state(&func_cfi);
|
|
|
|
set_func_state(&func_cfi);
|
2023-06-05 16:12:21 +00:00
|
|
|
init_cfi_state(&force_undefined_cfi);
|
|
|
|
force_undefined_cfi.force_undefined = true;
|
2021-06-24 09:41:01 +00:00
|
|
|
|
2025-03-14 19:29:00 +00:00
|
|
|
if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3))) {
|
|
|
|
ret = -1;
|
2021-06-24 09:41:01 +00:00
|
|
|
goto out;
|
2025-03-14 19:29:00 +00:00
|
|
|
}
|
2021-06-24 09:41:01 +00:00
|
|
|
|
|
|
|
cfi_hash_add(&init_cfi);
|
|
|
|
cfi_hash_add(&func_cfi);
|
2017-06-28 15:11:07 +00:00
|
|
|
|
2020-08-25 12:47:39 +00:00
|
|
|
ret = decode_sections(file);
|
2025-03-24 21:55:59 +00:00
|
|
|
if (ret)
|
2017-06-28 15:11:05 +00:00
|
|
|
goto out;
|
2021-06-24 09:41:01 +00:00
|
|
|
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
if (!nr_insns)
|
2017-06-28 15:11:05 +00:00
|
|
|
goto out;
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
if (opts.retpoline)
|
|
|
|
warnings += validate_retpoline(file);
|
2018-01-16 09:24:06 +00:00
|
|
|
|
2022-04-18 16:50:35 +00:00
|
|
|
if (opts.stackval || opts.orc || opts.uaccess) {
|
2025-03-24 21:55:59 +00:00
|
|
|
int w = 0;
|
2017-07-11 15:33:43 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
w += validate_functions(file);
|
|
|
|
w += validate_unwind_hints(file, NULL);
|
|
|
|
if (!w)
|
|
|
|
w += validate_reachable_instructions(file);
|
2022-04-18 16:50:33 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
warnings += w;
|
2022-04-18 16:50:43 +00:00
|
|
|
|
|
|
|
} else if (opts.noinstr) {
|
2025-03-24 21:55:59 +00:00
|
|
|
warnings += validate_noinstr_sections(file);
|
2022-03-08 15:30:54 +00:00
|
|
|
}
|
|
|
|
|
2022-06-14 21:16:03 +00:00
|
|
|
if (opts.unret) {
|
|
|
|
/*
|
|
|
|
* Must be after validate_branch() and friends, it plays
|
|
|
|
* further games with insn->visited.
|
|
|
|
*/
|
2025-03-24 21:55:59 +00:00
|
|
|
warnings += validate_unrets(file);
|
2022-06-14 21:16:03 +00:00
|
|
|
}
|
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
if (opts.ibt)
|
|
|
|
warnings += validate_ibt(file);
|
2022-04-18 16:50:35 +00:00
|
|
|
|
2025-03-24 21:55:59 +00:00
|
|
|
if (opts.sls)
|
|
|
|
warnings += validate_sls(file);
|
2017-06-28 15:11:07 +00:00
|
|
|
|
2022-04-18 16:50:38 +00:00
|
|
|
if (opts.static_call) {
|
|
|
|
ret = create_static_call_sections(file);
|
2025-03-24 21:55:59 +00:00
|
|
|
if (ret)
|
2022-04-18 16:50:38 +00:00
|
|
|
goto out;
|
|
|
|
}
|
2020-08-18 13:57:45 +00:00
|
|
|
|
2022-04-18 16:50:26 +00:00
|
|
|
if (opts.retpoline) {
|
2021-10-26 12:01:36 +00:00
|
|
|
ret = create_retpoline_sites_sections(file);
|
2025-03-24 21:55:59 +00:00
|
|
|
if (ret)
|
2021-10-26 12:01:36 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2022-10-27 09:28:13 +00:00
|
|
|
if (opts.cfi) {
|
|
|
|
ret = create_cfi_sections(file);
|
2025-03-24 21:55:59 +00:00
|
|
|
if (ret)
|
2022-10-27 09:28:13 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2022-06-27 22:21:17 +00:00
|
|
|
if (opts.rethunk) {
|
2022-06-14 21:15:38 +00:00
|
|
|
ret = create_return_sites_sections(file);
|
2025-03-24 21:55:59 +00:00
|
|
|
if (ret)
|
2022-06-14 21:15:38 +00:00
|
|
|
goto out;
|
2022-09-15 11:11:09 +00:00
|
|
|
|
2022-09-15 11:11:10 +00:00
|
|
|
if (opts.hack_skylake) {
|
|
|
|
ret = create_direct_call_sections(file);
|
2025-03-24 21:55:59 +00:00
|
|
|
if (ret)
|
2022-09-15 11:11:10 +00:00
|
|
|
goto out;
|
|
|
|
}
|
2021-10-26 12:01:36 +00:00
|
|
|
}
|
|
|
|
|
2022-04-18 16:50:26 +00:00
|
|
|
if (opts.mcount) {
|
2020-08-06 22:14:09 +00:00
|
|
|
ret = create_mcount_loc_sections(file);
|
2025-03-24 21:55:59 +00:00
|
|
|
if (ret)
|
2020-08-06 22:14:09 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2023-04-12 20:26:13 +00:00
|
|
|
if (opts.prefix) {
|
|
|
|
ret = add_prefix_symbols(file);
|
2025-03-24 21:55:59 +00:00
|
|
|
if (ret)
|
2025-03-14 19:29:00 +00:00
|
|
|
goto out;
|
2023-04-12 20:26:13 +00:00
|
|
|
}
|
|
|
|
|
2022-04-18 16:50:26 +00:00
|
|
|
if (opts.ibt) {
|
2022-03-08 15:30:55 +00:00
|
|
|
ret = create_ibt_endbr_seal_sections(file);
|
2025-03-24 21:55:59 +00:00
|
|
|
if (ret)
|
2022-03-08 15:30:55 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
objtool: Remove instruction::list
Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.
struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */
- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */
- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */
- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};
pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
2023-02-08 17:18:05 +00:00
|
|
|
if (opts.orc && nr_insns) {
|
2022-04-18 16:50:27 +00:00
|
|
|
ret = orc_create(file);
|
2025-03-24 21:55:59 +00:00
|
|
|
if (ret)
|
2022-04-18 16:50:27 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2023-05-30 17:21:13 +00:00
|
|
|
free_insns(file);
|
|
|
|
|
2022-04-18 16:50:26 +00:00
|
|
|
if (opts.stats) {
|
2021-06-24 09:41:01 +00:00
|
|
|
printf("nr_insns_visited: %ld\n", nr_insns_visited);
|
|
|
|
printf("nr_cfi: %ld\n", nr_cfi);
|
|
|
|
printf("nr_cfi_reused: %ld\n", nr_cfi_reused);
|
|
|
|
printf("nr_cfi_cache: %ld\n", nr_cfi_cache);
|
|
|
|
}
|
|
|
|
|
2017-06-28 15:11:05 +00:00
|
|
|
out:
|
2025-03-24 21:56:00 +00:00
|
|
|
if (!ret && !warnings)
|
|
|
|
return 0;
|
|
|
|
|
2025-04-01 04:26:40 +00:00
|
|
|
if (opts.werror && warnings)
|
|
|
|
ret = 1;
|
|
|
|
|
2025-03-24 21:56:00 +00:00
|
|
|
if (opts.verbose) {
|
|
|
|
if (opts.werror && warnings)
|
|
|
|
WARN("%d warning(s) upgraded to errors", warnings);
|
|
|
|
print_args();
|
|
|
|
disas_warned_funcs(file);
|
|
|
|
}
|
|
|
|
|
2025-04-01 04:26:40 +00:00
|
|
|
return ret;
|
2017-06-28 15:11:05 +00:00
|
|
|
}
|