perf: Rewrite core context handling
Bugzilla: https://bugzilla.redhat.com/2177180 upstream ======== commit bd27568117664b8b3e259721393df420ed51f57b Author: Peter Zijlstra <peterz@infradead.org> Date: Sat Oct 8 11:54:24 2022 +0530 description =========== There have been various issues and limitations with the way perf uses (task) contexts to track events. Most notable is the single hardware PMU task context, which has resulted in a number of yucky things (both proposed and merged). Notably: - HW breakpoint PMU - ARM big.little PMU / Intel ADL PMU - Intel Branch Monitoring PMU - AMD IBS PMU - S390 cpum_cf PMU - PowerPC trace_imc PMU *Current design:* Currently we have a per task and per cpu perf_event_contexts: task_struct::perf_events_ctxp[] <-> perf_event_context <-> perf_cpu_context ^ | ^ | ^ `---------------------------------' | `--> pmu ---' v ^ perf_event ------' Each task has an array of pointers to a perf_event_context. Each perf_event_context has a direct relation to a PMU and a group of events for that PMU. The task related perf_event_context's have a pointer back to that task. Each PMU has a per-cpu pointer to a per-cpu perf_cpu_context, which includes a perf_event_context, which again has a direct relation to that PMU, and a group of events for that PMU. The perf_cpu_context also tracks which task context is currently associated with that CPU and includes a few other things like the hrtimer for rotation etc. Each perf_event is then associated with its PMU and one perf_event_context. *Proposed design:* New design proposed by this patch reduce to a single task context and a single CPU context but adds some intermediate data-structures: task_struct::perf_event_ctxp -> perf_event_context <- perf_cpu_context ^ | ^ ^ `---------------------------' | | | | perf_cpu_pmu_context <--. | `----. ^ | | | | | | v v | | ,--> perf_event_pmu_context | | | | | | | v v | perf_event ---> pmu ----------------' With the new design, perf_event_context will hold all events for all pmus in the (respective pinned/flexible) rbtrees. This can be achieved by adding pmu to rbtree key: {cpu, pmu, cgroup, group_index} Each perf_event_context carries a list of perf_event_pmu_context which is used to hold per-pmu-per-context state. For example, it keeps track of currently active events for that pmu, a pmu specific task_ctx_data, a flag to tell whether rotation is required or not etc. Additionally, perf_cpu_pmu_context is used to hold per-pmu-per-cpu state like hrtimer details to drive the event rotation, a pointer to perf_event_pmu_context of currently running task and some other ancillary information. Each perf_event is associated to it's pmu, perf_event_context and perf_event_pmu_context. Further optimizations to current implementation are possible. For example, ctx_resched() can be optimized to reschedule only single pmu events. Much thanks to Ravi for picking this up and pushing it towards completion. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Co-developed-by: Ravi Bangoria <ravi.bangoria@amd.com> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20221008062424.313-1-ravi.bangoria@amd.com Conflicts: ========== Strange situation upstream -- both the following patches appear to be applied against a revision that does not contain the other patch, which is impossible: - bd2756811766 ("perf: Rewrite core context handling") - 517e6a301f34 ("perf: Fix perf_pending_task() UaF") No matter which order I apply them, they always conflict on not-expecting the other patch to be there. However, the conflict can be easily resolved, fortunately. Signed-off-by: Michael Petlan <mpetlan@redhat.com>
This commit is contained in:
parent
8d33f15015
commit
6250874cd2
|
@ -806,10 +806,14 @@ static void armv8pmu_disable_event(struct perf_event *event)
|
|||
|
||||
static void armv8pmu_start(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
struct perf_event_context *task_ctx =
|
||||
this_cpu_ptr(cpu_pmu->pmu.pmu_cpu_context)->task_ctx;
|
||||
struct perf_event_context *ctx;
|
||||
int nr_user = 0;
|
||||
|
||||
if (sysctl_perf_user_access && task_ctx && task_ctx->nr_user)
|
||||
ctx = perf_cpu_task_ctx();
|
||||
if (ctx)
|
||||
nr_user = ctx->nr_user;
|
||||
|
||||
if (sysctl_perf_user_access && nr_user)
|
||||
armv8pmu_enable_user_access(cpu_pmu);
|
||||
else
|
||||
armv8pmu_disable_user_access();
|
||||
|
@ -1019,10 +1023,10 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int armv8pmu_filter_match(struct perf_event *event)
|
||||
static bool armv8pmu_filter(struct pmu *pmu, int cpu)
|
||||
{
|
||||
unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT;
|
||||
return evtype != ARMV8_PMUV3_PERFCTR_CHAIN;
|
||||
struct arm_pmu *armpmu = to_arm_pmu(pmu);
|
||||
return !cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus);
|
||||
}
|
||||
|
||||
static void armv8pmu_reset(void *info)
|
||||
|
@ -1253,7 +1257,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
|
|||
cpu_pmu->stop = armv8pmu_stop;
|
||||
cpu_pmu->reset = armv8pmu_reset;
|
||||
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
|
||||
cpu_pmu->filter_match = armv8pmu_filter_match;
|
||||
cpu_pmu->filter = armv8pmu_filter;
|
||||
|
||||
cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx;
|
||||
|
||||
|
|
|
@ -131,7 +131,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
|
|||
|
||||
static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
|
||||
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
|
||||
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
|
||||
static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) {}
|
||||
static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
|
||||
static void pmao_restore_workaround(bool ebb) { }
|
||||
#endif /* CONFIG_PPC32 */
|
||||
|
@ -414,7 +414,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event)
|
|||
cpuhw->bhrb_context = event->ctx;
|
||||
}
|
||||
cpuhw->bhrb_users++;
|
||||
perf_sched_cb_inc(event->ctx->pmu);
|
||||
perf_sched_cb_inc(event->pmu);
|
||||
}
|
||||
|
||||
static void power_pmu_bhrb_disable(struct perf_event *event)
|
||||
|
@ -426,7 +426,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
|
|||
|
||||
WARN_ON_ONCE(!cpuhw->bhrb_users);
|
||||
cpuhw->bhrb_users--;
|
||||
perf_sched_cb_dec(event->ctx->pmu);
|
||||
perf_sched_cb_dec(event->pmu);
|
||||
|
||||
if (!cpuhw->disabled && !cpuhw->bhrb_users) {
|
||||
/* BHRB cannot be turned off when other
|
||||
|
@ -441,7 +441,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
|
|||
/* Called from ctxsw to prevent one process's branch entries to
|
||||
* mingle with the other process's entries during context switch.
|
||||
*/
|
||||
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
|
||||
static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
|
||||
{
|
||||
if (!ppmu->bhrb_nr)
|
||||
return;
|
||||
|
|
|
@ -379,7 +379,7 @@ static int paicrypt_push_sample(void)
|
|||
/* Called on schedule-in and schedule-out. No access to event structure,
|
||||
* but for sampling only event CRYPTO_ALL is allowed.
|
||||
*/
|
||||
static void paicrypt_sched_task(struct perf_event_context *ctx, bool sched_in)
|
||||
static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
|
||||
{
|
||||
/* We started with a clean page on event installation. So read out
|
||||
* results on schedule_out and if page was dirty, clear values.
|
||||
|
|
|
@ -471,7 +471,7 @@ static int paiext_push_sample(void)
|
|||
/* Called on schedule-in and schedule-out. No access to event structure,
|
||||
* but for sampling only event NNPA_ALL is allowed.
|
||||
*/
|
||||
static void paiext_sched_task(struct perf_event_context *ctx, bool sched_in)
|
||||
static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
|
||||
{
|
||||
/* We started with a clean page on event installation. So read out
|
||||
* results on schedule_out and if page was dirty, clear values.
|
||||
|
|
|
@ -384,7 +384,7 @@ static void amd_brs_poison_buffer(void)
|
|||
* On ctxswin, sched_in = true, called after the PMU has started
|
||||
* On ctxswout, sched_in = false, called before the PMU is stopped
|
||||
*/
|
||||
void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in)
|
||||
void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
|
|
|
@ -352,7 +352,7 @@ void amd_pmu_lbr_add(struct perf_event *event)
|
|||
cpuc->br_sel = reg->reg;
|
||||
}
|
||||
|
||||
perf_sched_cb_inc(event->ctx->pmu);
|
||||
perf_sched_cb_inc(event->pmu);
|
||||
|
||||
if (!cpuc->lbr_users++ && !event->total_time_running)
|
||||
amd_pmu_lbr_reset();
|
||||
|
@ -370,10 +370,10 @@ void amd_pmu_lbr_del(struct perf_event *event)
|
|||
|
||||
cpuc->lbr_users--;
|
||||
WARN_ON_ONCE(cpuc->lbr_users < 0);
|
||||
perf_sched_cb_dec(event->ctx->pmu);
|
||||
perf_sched_cb_dec(event->pmu);
|
||||
}
|
||||
|
||||
void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
|
||||
void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
|
|
|
@ -90,6 +90,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
|
|||
DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
|
||||
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter);
|
||||
|
||||
/*
|
||||
* This one is magic, it will get called even when PMU init fails (because
|
||||
* there is no PMU), in which case it should simply return NULL.
|
||||
|
@ -2031,6 +2033,7 @@ static void x86_pmu_static_call_update(void)
|
|||
static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
|
||||
|
||||
static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs);
|
||||
static_call_update(x86_pmu_filter, x86_pmu.filter);
|
||||
}
|
||||
|
||||
static void _x86_pmu_read(struct perf_event *event)
|
||||
|
@ -2052,23 +2055,6 @@ void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
|
|||
pr_info("... event mask: %016Lx\n", intel_ctrl);
|
||||
}
|
||||
|
||||
/*
|
||||
* The generic code is not hybrid friendly. The hybrid_pmu->pmu
|
||||
* of the first registered PMU is unconditionally assigned to
|
||||
* each possible cpuctx->ctx.pmu.
|
||||
* Update the correct hybrid PMU to the cpuctx->ctx.pmu.
|
||||
*/
|
||||
void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
|
||||
if (!pmu->pmu_cpu_context)
|
||||
return;
|
||||
|
||||
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
|
||||
cpuctx->ctx.pmu = pmu;
|
||||
}
|
||||
|
||||
static int __init init_hw_perf_events(void)
|
||||
{
|
||||
struct x86_pmu_quirk *quirk;
|
||||
|
@ -2195,9 +2181,6 @@ static int __init init_hw_perf_events(void)
|
|||
(hybrid_pmu->cpu_type == hybrid_big) ? PERF_TYPE_RAW : -1);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
if (cpu_type == hybrid_pmu->cpu_type)
|
||||
x86_pmu_update_cpu_context(&hybrid_pmu->pmu, raw_smp_processor_id());
|
||||
}
|
||||
|
||||
if (i < x86_pmu.num_hybrid_pmus) {
|
||||
|
@ -2646,15 +2629,15 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
|
|||
NULL,
|
||||
};
|
||||
|
||||
static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
|
||||
static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
|
||||
{
|
||||
static_call_cond(x86_pmu_sched_task)(ctx, sched_in);
|
||||
static_call_cond(x86_pmu_sched_task)(pmu_ctx, sched_in);
|
||||
}
|
||||
|
||||
static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
|
||||
struct perf_event_context *next)
|
||||
static void x86_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
|
||||
struct perf_event_pmu_context *next_epc)
|
||||
{
|
||||
static_call_cond(x86_pmu_swap_task_ctx)(prev, next);
|
||||
static_call_cond(x86_pmu_swap_task_ctx)(prev_epc, next_epc);
|
||||
}
|
||||
|
||||
void perf_check_microcode(void)
|
||||
|
@ -2689,12 +2672,13 @@ static int x86_pmu_aux_output_match(struct perf_event *event)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int x86_pmu_filter_match(struct perf_event *event)
|
||||
static bool x86_pmu_filter(struct pmu *pmu, int cpu)
|
||||
{
|
||||
if (x86_pmu.filter_match)
|
||||
return x86_pmu.filter_match(event);
|
||||
bool ret = false;
|
||||
|
||||
return 1;
|
||||
static_call_cond(x86_pmu_filter)(pmu, cpu, &ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct pmu pmu = {
|
||||
|
@ -2725,7 +2709,7 @@ static struct pmu pmu = {
|
|||
|
||||
.aux_output_match = x86_pmu_aux_output_match,
|
||||
|
||||
.filter_match = x86_pmu_filter_match,
|
||||
.filter = x86_pmu_filter,
|
||||
};
|
||||
|
||||
void arch_perf_update_userpage(struct perf_event *event,
|
||||
|
|
|
@ -4536,8 +4536,6 @@ end:
|
|||
cpumask_set_cpu(cpu, &pmu->supported_cpus);
|
||||
cpuc->pmu = &pmu->pmu;
|
||||
|
||||
x86_pmu_update_cpu_context(&pmu->pmu, cpu);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -4671,17 +4669,17 @@ static void intel_pmu_cpu_dead(int cpu)
|
|||
cpumask_clear_cpu(cpu, &hybrid_pmu(cpuc->pmu)->supported_cpus);
|
||||
}
|
||||
|
||||
static void intel_pmu_sched_task(struct perf_event_context *ctx,
|
||||
static void intel_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
|
||||
bool sched_in)
|
||||
{
|
||||
intel_pmu_pebs_sched_task(ctx, sched_in);
|
||||
intel_pmu_lbr_sched_task(ctx, sched_in);
|
||||
intel_pmu_pebs_sched_task(pmu_ctx, sched_in);
|
||||
intel_pmu_lbr_sched_task(pmu_ctx, sched_in);
|
||||
}
|
||||
|
||||
static void intel_pmu_swap_task_ctx(struct perf_event_context *prev,
|
||||
struct perf_event_context *next)
|
||||
static void intel_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
|
||||
struct perf_event_pmu_context *next_epc)
|
||||
{
|
||||
intel_pmu_lbr_swap_task_ctx(prev, next);
|
||||
intel_pmu_lbr_swap_task_ctx(prev_epc, next_epc);
|
||||
}
|
||||
|
||||
static int intel_pmu_check_period(struct perf_event *event, u64 value)
|
||||
|
@ -4705,12 +4703,11 @@ static int intel_pmu_aux_output_match(struct perf_event *event)
|
|||
return is_intel_pt_event(event);
|
||||
}
|
||||
|
||||
static int intel_pmu_filter_match(struct perf_event *event)
|
||||
static void intel_pmu_filter(struct pmu *pmu, int cpu, bool *ret)
|
||||
{
|
||||
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
|
||||
unsigned int cpu = smp_processor_id();
|
||||
struct x86_hybrid_pmu *hpmu = hybrid_pmu(pmu);
|
||||
|
||||
return cpumask_test_cpu(cpu, &pmu->supported_cpus);
|
||||
*ret = !cpumask_test_cpu(cpu, &hpmu->supported_cpus);
|
||||
}
|
||||
|
||||
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
|
||||
|
@ -6413,7 +6410,7 @@ __init int intel_pmu_init(void)
|
|||
static_call_update(intel_pmu_set_topdown_event_period,
|
||||
&adl_set_topdown_event_period);
|
||||
|
||||
x86_pmu.filter_match = intel_pmu_filter_match;
|
||||
x86_pmu.filter = intel_pmu_filter;
|
||||
x86_pmu.get_event_constraints = adl_get_event_constraints;
|
||||
x86_pmu.hw_config = adl_hw_config;
|
||||
x86_pmu.limit_period = spr_limit_period;
|
||||
|
|
|
@ -1069,7 +1069,7 @@ static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
|
|||
return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
|
||||
void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
|
@ -1177,7 +1177,7 @@ static void
|
|||
pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event, bool add)
|
||||
{
|
||||
struct pmu *pmu = event->ctx->pmu;
|
||||
struct pmu *pmu = event->pmu;
|
||||
/*
|
||||
* Make sure we get updated with the first PEBS
|
||||
* event. It will trigger also during removal, but
|
||||
|
|
|
@ -515,21 +515,21 @@ static void __intel_pmu_lbr_save(void *ctx)
|
|||
cpuc->last_log_id = ++task_context_opt(ctx)->log_id;
|
||||
}
|
||||
|
||||
void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
|
||||
struct perf_event_context *next)
|
||||
void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
|
||||
struct perf_event_pmu_context *next_epc)
|
||||
{
|
||||
void *prev_ctx_data, *next_ctx_data;
|
||||
|
||||
swap(prev->task_ctx_data, next->task_ctx_data);
|
||||
swap(prev_epc->task_ctx_data, next_epc->task_ctx_data);
|
||||
|
||||
/*
|
||||
* Architecture specific synchronization makes sense in
|
||||
* case both prev->task_ctx_data and next->task_ctx_data
|
||||
* Architecture specific synchronization makes sense in case
|
||||
* both prev_epc->task_ctx_data and next_epc->task_ctx_data
|
||||
* pointers are allocated.
|
||||
*/
|
||||
|
||||
prev_ctx_data = next->task_ctx_data;
|
||||
next_ctx_data = prev->task_ctx_data;
|
||||
prev_ctx_data = next_epc->task_ctx_data;
|
||||
next_ctx_data = prev_epc->task_ctx_data;
|
||||
|
||||
if (!prev_ctx_data || !next_ctx_data)
|
||||
return;
|
||||
|
@ -538,7 +538,7 @@ void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
|
|||
task_context_opt(next_ctx_data)->lbr_callstack_users);
|
||||
}
|
||||
|
||||
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
|
||||
void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
void *task_ctx;
|
||||
|
@ -551,7 +551,7 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
|
|||
* the task was scheduled out, restore the stack. Otherwise flush
|
||||
* the LBR stack.
|
||||
*/
|
||||
task_ctx = ctx ? ctx->task_ctx_data : NULL;
|
||||
task_ctx = pmu_ctx ? pmu_ctx->task_ctx_data : NULL;
|
||||
if (task_ctx) {
|
||||
if (sched_in)
|
||||
__intel_pmu_lbr_restore(task_ctx);
|
||||
|
@ -587,8 +587,8 @@ void intel_pmu_lbr_add(struct perf_event *event)
|
|||
|
||||
cpuc->br_sel = event->hw.branch_reg.reg;
|
||||
|
||||
if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data)
|
||||
task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users++;
|
||||
if (branch_user_callstack(cpuc->br_sel) && event->pmu_ctx->task_ctx_data)
|
||||
task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users++;
|
||||
|
||||
/*
|
||||
* Request pmu::sched_task() callback, which will fire inside the
|
||||
|
@ -611,7 +611,7 @@ void intel_pmu_lbr_add(struct perf_event *event)
|
|||
*/
|
||||
if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
|
||||
cpuc->lbr_pebs_users++;
|
||||
perf_sched_cb_inc(event->ctx->pmu);
|
||||
perf_sched_cb_inc(event->pmu);
|
||||
if (!cpuc->lbr_users++ && !event->total_time_running)
|
||||
intel_pmu_lbr_reset();
|
||||
}
|
||||
|
@ -664,8 +664,8 @@ void intel_pmu_lbr_del(struct perf_event *event)
|
|||
return;
|
||||
|
||||
if (branch_user_callstack(cpuc->br_sel) &&
|
||||
event->ctx->task_ctx_data)
|
||||
task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users--;
|
||||
event->pmu_ctx->task_ctx_data)
|
||||
task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users--;
|
||||
|
||||
if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
|
||||
cpuc->lbr_select = 0;
|
||||
|
@ -675,7 +675,7 @@ void intel_pmu_lbr_del(struct perf_event *event)
|
|||
cpuc->lbr_users--;
|
||||
WARN_ON_ONCE(cpuc->lbr_users < 0);
|
||||
WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
|
||||
perf_sched_cb_dec(event->ctx->pmu);
|
||||
perf_sched_cb_dec(event->pmu);
|
||||
}
|
||||
|
||||
static inline bool vlbr_exclude_host(void)
|
||||
|
|
|
@ -811,7 +811,7 @@ struct x86_pmu {
|
|||
void (*cpu_dead)(int cpu);
|
||||
|
||||
void (*check_microcode)(void);
|
||||
void (*sched_task)(struct perf_event_context *ctx,
|
||||
void (*sched_task)(struct perf_event_pmu_context *pmu_ctx,
|
||||
bool sched_in);
|
||||
|
||||
/*
|
||||
|
@ -894,12 +894,12 @@ struct x86_pmu {
|
|||
int num_topdown_events;
|
||||
|
||||
/*
|
||||
* perf task context (i.e. struct perf_event_context::task_ctx_data)
|
||||
* perf task context (i.e. struct perf_event_pmu_context::task_ctx_data)
|
||||
* switch helper to bridge calls from perf/core to perf/x86.
|
||||
* See struct pmu::swap_task_ctx() usage for examples;
|
||||
*/
|
||||
void (*swap_task_ctx)(struct perf_event_context *prev,
|
||||
struct perf_event_context *next);
|
||||
void (*swap_task_ctx)(struct perf_event_pmu_context *prev_epc,
|
||||
struct perf_event_pmu_context *next_epc);
|
||||
|
||||
/*
|
||||
* AMD bits
|
||||
|
@ -925,7 +925,7 @@ struct x86_pmu {
|
|||
|
||||
int (*aux_output_match) (struct perf_event *event);
|
||||
|
||||
int (*filter_match)(struct perf_event *event);
|
||||
void (*filter)(struct pmu *pmu, int cpu, bool *ret);
|
||||
/*
|
||||
* Hybrid support
|
||||
*
|
||||
|
@ -1180,8 +1180,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs);
|
|||
void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
|
||||
u64 intel_ctrl);
|
||||
|
||||
void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu);
|
||||
|
||||
extern struct event_constraint emptyconstraint;
|
||||
|
||||
extern struct event_constraint unconstrained;
|
||||
|
@ -1306,7 +1304,7 @@ void amd_pmu_lbr_reset(void);
|
|||
void amd_pmu_lbr_read(void);
|
||||
void amd_pmu_lbr_add(struct perf_event *event);
|
||||
void amd_pmu_lbr_del(struct perf_event *event);
|
||||
void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
|
||||
void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
|
||||
void amd_pmu_lbr_enable_all(void);
|
||||
void amd_pmu_lbr_disable_all(void);
|
||||
int amd_pmu_lbr_hw_config(struct perf_event *event);
|
||||
|
@ -1330,7 +1328,7 @@ static inline void amd_pmu_brs_add(struct perf_event *event)
|
|||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
perf_sched_cb_inc(event->ctx->pmu);
|
||||
perf_sched_cb_inc(event->pmu);
|
||||
cpuc->lbr_users++;
|
||||
/*
|
||||
* No need to reset BRS because it is reset
|
||||
|
@ -1345,10 +1343,10 @@ static inline void amd_pmu_brs_del(struct perf_event *event)
|
|||
cpuc->lbr_users--;
|
||||
WARN_ON_ONCE(cpuc->lbr_users < 0);
|
||||
|
||||
perf_sched_cb_dec(event->ctx->pmu);
|
||||
perf_sched_cb_dec(event->pmu);
|
||||
}
|
||||
|
||||
void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in);
|
||||
void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
|
||||
#else
|
||||
static inline int amd_brs_init(void)
|
||||
{
|
||||
|
@ -1373,7 +1371,7 @@ static inline void amd_pmu_brs_del(struct perf_event *event)
|
|||
{
|
||||
}
|
||||
|
||||
static inline void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in)
|
||||
static inline void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -1533,7 +1531,7 @@ void intel_pmu_pebs_enable_all(void);
|
|||
|
||||
void intel_pmu_pebs_disable_all(void);
|
||||
|
||||
void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
|
||||
void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
|
||||
|
||||
void intel_pmu_auto_reload_read(struct perf_event *event);
|
||||
|
||||
|
@ -1541,10 +1539,10 @@ void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
|
|||
|
||||
void intel_ds_init(void);
|
||||
|
||||
void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
|
||||
struct perf_event_context *next);
|
||||
void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
|
||||
struct perf_event_pmu_context *next_epc);
|
||||
|
||||
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
|
||||
void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
|
||||
|
||||
u64 lbr_from_signext_quirk_wr(u64 val);
|
||||
|
||||
|
|
|
@ -552,15 +552,14 @@ static void armpmu_disable(struct pmu *pmu)
|
|||
* microarchitecture, and aren't suitable for another. Thus, only match CPUs of
|
||||
* the same microarchitecture.
|
||||
*/
|
||||
static int armpmu_filter_match(struct perf_event *event)
|
||||
static bool armpmu_filter(struct pmu *pmu, int cpu)
|
||||
{
|
||||
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
|
||||
unsigned int cpu = smp_processor_id();
|
||||
int ret;
|
||||
struct arm_pmu *armpmu = to_arm_pmu(pmu);
|
||||
bool ret;
|
||||
|
||||
ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus);
|
||||
if (ret && armpmu->filter_match)
|
||||
return armpmu->filter_match(event);
|
||||
if (ret && armpmu->filter)
|
||||
return armpmu->filter(pmu, cpu);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -887,14 +886,13 @@ struct arm_pmu *armpmu_alloc(void)
|
|||
.start = armpmu_start,
|
||||
.stop = armpmu_stop,
|
||||
.read = armpmu_read,
|
||||
.filter_match = armpmu_filter_match,
|
||||
.filter = armpmu_filter,
|
||||
.attr_groups = pmu->attr_groups,
|
||||
/*
|
||||
* This is a CPU PMU potentially in a heterogeneous
|
||||
* configuration (e.g. big.LITTLE). This is not an uncore PMU,
|
||||
* and we have taken ctx sharing into account (e.g. with our
|
||||
* pmu::filter_match callback and pmu::event_init group
|
||||
* validation).
|
||||
* pmu::filter callback and pmu::event_init group validation).
|
||||
*/
|
||||
.capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS | PERF_PMU_CAP_EXTENDED_REGS,
|
||||
};
|
||||
|
|
|
@ -100,7 +100,7 @@ struct arm_pmu {
|
|||
void (*stop)(struct arm_pmu *);
|
||||
void (*reset)(void *);
|
||||
int (*map_event)(struct perf_event *event);
|
||||
int (*filter_match)(struct perf_event *event);
|
||||
bool (*filter)(struct pmu *pmu, int cpu);
|
||||
int num_events;
|
||||
bool secure_access; /* 32-bit ARM only */
|
||||
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
|
||||
|
|
|
@ -266,6 +266,7 @@ struct hw_perf_event {
|
|||
};
|
||||
|
||||
struct perf_event;
|
||||
struct perf_event_pmu_context;
|
||||
|
||||
/*
|
||||
* Common implementation detail of pmu::{start,commit,cancel}_txn
|
||||
|
@ -308,7 +309,7 @@ struct pmu {
|
|||
int capabilities;
|
||||
|
||||
int __percpu *pmu_disable_count;
|
||||
struct perf_cpu_context __percpu *pmu_cpu_context;
|
||||
struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
|
||||
atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
|
||||
int task_ctx_nr;
|
||||
int hrtimer_interval_ms;
|
||||
|
@ -443,7 +444,7 @@ struct pmu {
|
|||
/*
|
||||
* context-switches callback
|
||||
*/
|
||||
void (*sched_task) (struct perf_event_context *ctx,
|
||||
void (*sched_task) (struct perf_event_pmu_context *pmu_ctx,
|
||||
bool sched_in);
|
||||
|
||||
/*
|
||||
|
@ -457,8 +458,8 @@ struct pmu {
|
|||
* implementation and Perf core context switch handling callbacks for usage
|
||||
* examples.
|
||||
*/
|
||||
void (*swap_task_ctx) (struct perf_event_context *prev,
|
||||
struct perf_event_context *next);
|
||||
void (*swap_task_ctx) (struct perf_event_pmu_context *prev_epc,
|
||||
struct perf_event_pmu_context *next_epc);
|
||||
/* optional */
|
||||
|
||||
/*
|
||||
|
@ -522,9 +523,10 @@ struct pmu {
|
|||
/* optional */
|
||||
|
||||
/*
|
||||
* Filter events for PMU-specific reasons.
|
||||
* Skip programming this PMU on the given CPU. Typically needed for
|
||||
* big.LITTLE things.
|
||||
*/
|
||||
int (*filter_match) (struct perf_event *event); /* optional */
|
||||
bool (*filter) (struct pmu *pmu, int cpu); /* optional */
|
||||
|
||||
/*
|
||||
* Check period value for PERF_EVENT_IOC_PERIOD ioctl.
|
||||
|
@ -695,6 +697,11 @@ struct perf_event {
|
|||
int group_caps;
|
||||
|
||||
struct perf_event *group_leader;
|
||||
/*
|
||||
* event->pmu will always point to pmu in which this event belongs.
|
||||
* Whereas event->pmu_ctx->pmu may point to other pmu when group of
|
||||
* different pmu events is created.
|
||||
*/
|
||||
struct pmu *pmu;
|
||||
void *pmu_private;
|
||||
|
||||
|
@ -720,6 +727,12 @@ struct perf_event {
|
|||
struct hw_perf_event hw;
|
||||
|
||||
struct perf_event_context *ctx;
|
||||
/*
|
||||
* event->pmu_ctx points to perf_event_pmu_context in which the event
|
||||
* is added. This pmu_ctx can be of other pmu for sw event when that
|
||||
* sw event is part of a group which also contains non-sw events.
|
||||
*/
|
||||
struct perf_event_pmu_context *pmu_ctx;
|
||||
atomic_long_t refcount;
|
||||
|
||||
/*
|
||||
|
@ -812,19 +825,69 @@ struct perf_event {
|
|||
#endif /* CONFIG_PERF_EVENTS */
|
||||
};
|
||||
|
||||
/*
|
||||
* ,-----------------------[1:n]----------------------.
|
||||
* V V
|
||||
* perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event
|
||||
* ^ ^ | |
|
||||
* `--------[1:n]---------' `-[n:1]-> pmu <-[1:n]-'
|
||||
*
|
||||
*
|
||||
* struct perf_event_pmu_context lifetime is refcount based and RCU freed
|
||||
* (similar to perf_event_context). Locking is as if it were a member of
|
||||
* perf_event_context; specifically:
|
||||
*
|
||||
* modification, both: ctx->mutex && ctx->lock
|
||||
* reading, either: ctx->mutex || ctx->lock
|
||||
*
|
||||
* There is one exception to this; namely put_pmu_ctx() isn't always called
|
||||
* with ctx->mutex held; this means that as long as we can guarantee the epc
|
||||
* has events the above rules hold.
|
||||
*
|
||||
* Specificially, sys_perf_event_open()'s group_leader case depends on
|
||||
* ctx->mutex pinning the configuration. Since we hold a reference on
|
||||
* group_leader (through the filedesc) it can't go away, therefore it's
|
||||
* associated pmu_ctx must exist and cannot change due to ctx->mutex.
|
||||
*/
|
||||
struct perf_event_pmu_context {
|
||||
struct pmu *pmu;
|
||||
struct perf_event_context *ctx;
|
||||
|
||||
struct list_head pmu_ctx_entry;
|
||||
|
||||
struct list_head pinned_active;
|
||||
struct list_head flexible_active;
|
||||
|
||||
/* Used to avoid freeing per-cpu perf_event_pmu_context */
|
||||
unsigned int embedded : 1;
|
||||
|
||||
unsigned int nr_events;
|
||||
|
||||
atomic_t refcount; /* event <-> epc */
|
||||
struct rcu_head rcu_head;
|
||||
|
||||
void *task_ctx_data; /* pmu specific data */
|
||||
/*
|
||||
* Set when one or more (plausibly active) event can't be scheduled
|
||||
* due to pmu overcommit or pmu constraints, except tolerant to
|
||||
* events not necessary to be active due to scheduling constraints,
|
||||
* such as cgroups.
|
||||
*/
|
||||
int rotate_necessary;
|
||||
};
|
||||
|
||||
struct perf_event_groups {
|
||||
struct rb_root tree;
|
||||
u64 index;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* struct perf_event_context - event context structure
|
||||
*
|
||||
* Used as a container for task events and CPU events as well:
|
||||
*/
|
||||
struct perf_event_context {
|
||||
struct pmu *pmu;
|
||||
/*
|
||||
* Protect the states of the events in the list,
|
||||
* nr_active, and the list:
|
||||
|
@ -837,27 +900,21 @@ struct perf_event_context {
|
|||
*/
|
||||
struct mutex mutex;
|
||||
|
||||
struct list_head active_ctx_list;
|
||||
struct list_head pmu_ctx_list;
|
||||
struct perf_event_groups pinned_groups;
|
||||
struct perf_event_groups flexible_groups;
|
||||
struct list_head event_list;
|
||||
|
||||
struct list_head pinned_active;
|
||||
struct list_head flexible_active;
|
||||
|
||||
int nr_events;
|
||||
int nr_active;
|
||||
int nr_user;
|
||||
int is_active;
|
||||
|
||||
int nr_task_data;
|
||||
int nr_stat;
|
||||
int nr_freq;
|
||||
int rotate_disable;
|
||||
/*
|
||||
* Set when nr_events != nr_active, except tolerant to events not
|
||||
* necessary to be active due to scheduling constraints, such as cgroups.
|
||||
*/
|
||||
int rotate_necessary;
|
||||
refcount_t refcount;
|
||||
|
||||
refcount_t refcount; /* event <-> ctx */
|
||||
struct task_struct *task;
|
||||
|
||||
/*
|
||||
|
@ -878,7 +935,6 @@ struct perf_event_context {
|
|||
#ifdef CONFIG_CGROUP_PERF
|
||||
int nr_cgroups; /* cgroup evts */
|
||||
#endif
|
||||
void *task_ctx_data; /* pmu specific data */
|
||||
struct rcu_head rcu_head;
|
||||
|
||||
/*
|
||||
|
@ -896,12 +952,13 @@ struct perf_event_context {
|
|||
*/
|
||||
#define PERF_NR_CONTEXTS 4
|
||||
|
||||
/**
|
||||
* struct perf_cpu_context - per cpu event context structure
|
||||
*/
|
||||
struct perf_cpu_context {
|
||||
struct perf_event_context ctx;
|
||||
struct perf_event_context *task_ctx;
|
||||
struct perf_cpu_pmu_context {
|
||||
struct perf_event_pmu_context epc;
|
||||
struct perf_event_pmu_context *task_epc;
|
||||
|
||||
struct list_head sched_cb_entry;
|
||||
int sched_cb_usage;
|
||||
|
||||
int active_oncpu;
|
||||
int exclusive;
|
||||
|
||||
|
@ -909,16 +966,20 @@ struct perf_cpu_context {
|
|||
struct hrtimer hrtimer;
|
||||
ktime_t hrtimer_interval;
|
||||
unsigned int hrtimer_active;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct perf_event_cpu_context - per cpu event context structure
|
||||
*/
|
||||
struct perf_cpu_context {
|
||||
struct perf_event_context ctx;
|
||||
struct perf_event_context *task_ctx;
|
||||
int online;
|
||||
|
||||
#ifdef CONFIG_CGROUP_PERF
|
||||
struct perf_cgroup *cgrp;
|
||||
struct list_head cgrp_cpuctx_entry;
|
||||
#endif
|
||||
|
||||
struct list_head sched_cb_entry;
|
||||
int sched_cb_usage;
|
||||
|
||||
int online;
|
||||
/*
|
||||
* Per-CPU storage for iterators used in visit_groups_merge. The default
|
||||
* storage is of size 2 to hold the CPU and any CPU event iterators.
|
||||
|
@ -982,6 +1043,8 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
|
|||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
|
||||
extern struct perf_event_context *perf_cpu_task_ctx(void);
|
||||
|
||||
extern void *perf_aux_output_begin(struct perf_output_handle *handle,
|
||||
struct perf_event *event);
|
||||
extern void perf_aux_output_end(struct perf_output_handle *handle,
|
||||
|
@ -1187,7 +1250,7 @@ static inline int is_software_event(struct perf_event *event)
|
|||
*/
|
||||
static inline int in_software_context(struct perf_event *event)
|
||||
{
|
||||
return event->ctx->pmu->task_ctx_nr == perf_sw_context;
|
||||
return event->pmu_ctx->pmu->task_ctx_nr == perf_sw_context;
|
||||
}
|
||||
|
||||
static inline int is_exclusive_pmu(struct pmu *pmu)
|
||||
|
|
|
@ -1245,7 +1245,7 @@ struct task_struct {
|
|||
unsigned int futex_state;
|
||||
#endif
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
|
||||
struct perf_event_context *perf_event_ctxp;
|
||||
struct mutex perf_event_mutex;
|
||||
struct list_head perf_event_list;
|
||||
#endif
|
||||
|
|
1996
kernel/events/core.c
1996
kernel/events/core.c
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue