Miscellaneous perf events fixes and a minor HW enablement change:
- Fix missing RCU protection in perf_iterate_ctx() - Fix pmu_ctx_list ordering bug - Reject the zero page in uprobes - Fix a family of bugs related to low frequency sampling - Add Intel Arrow Lake U CPUs to the generic Arrow Lake RAPL support table - Fix a lockdep-assert false positive in uretprobes Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmfCCxQRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1iT1RAAtG0sbai0gJ2OMEOIAZROKKwkFMfx67Vl ZrcnPCkXK7mL6WQNErFTjdi7Cv2fsMP2WfMHv92ddgrlQZ02EosMKKro1q8ZEd18 DpJmfujNGDkM0VDHd1Lg4/vPQw1RuEY85kqxRKIr5xtoKFR1sxNNNFwsWWeECbKW QnfzJsk1nFQUPHcD+FlLeyTnb6MgnLdcPMnXWLC4qVRBTHxCi/TS1XXhFHah31Rv kiCdEHMVUA1WXrPl+1I0DW/EjugcTWTB6cXat9YBZpsR2ZsVNrfNgdBtjCn0zEuf U3g8gQ/jm9GaZ1Q0ozTsklZlcH8JtOskYOaYiinN7lh5QWYlI2AWTnl6EZxrIKmV sw2LCl1BQLQocCr9GC+99Golv3U5FvxvRgTIBTzJs2t2WZtjF5Ceg1gwy12zLTKw VSGlLQZz55uHsgl3g37oNhNA0q4BbtuINlZWU6hHWjUEEeogVTjbSucv+8zFI+Dk 0tupuNF5xQB55D5KZ2EhCFgmSFWvjq1K9piM0HuHk8yrFYhHWoSPp5rg4XyYFpBC o3nJfkOL5hEVGJoeV2wo1CTs6SZNgWBNuV+9MyCS/sTDM2Ggj0x8Vl+d/ewVi7iO WE2Xksp5awRPv/m+a/XIPc+xQMecnOELVj3RrQZ8AzNUSvfKmv01BqjqcOa0wdgW 9EJeG6U2msQ= =e7a/ -----END PGP SIGNATURE----- Merge tag 'perf-urgent-2025-02-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull perf event fixes from Ingo Molnar: "Miscellaneous perf events fixes and a minor HW enablement change: - Fix missing RCU protection in perf_iterate_ctx() - Fix pmu_ctx_list ordering bug - Reject the zero page in uprobes - Fix a family of bugs related to low frequency sampling - Add Intel Arrow Lake U CPUs to the generic Arrow Lake RAPL support table - Fix a lockdep-assert false positive in uretprobes" * tag 'perf-urgent-2025-02-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: uprobes: Remove too strict lockdep_assert() condition in hprobe_expire() perf/x86/rapl: Add support for Intel Arrow Lake U perf/x86/intel: Use better start period for frequency mode perf/core: Fix low freq setting via IOC_PERIOD perf/x86: Fix low freqency setting issue uprobes: Reject the shared zeropage in uprobe_write_opcode() perf/core: Order the PMU list to fix warning about unordered pmu_ctx_list perf/core: Add RCU read lock protection to perf_iterate_ctx()
This commit is contained in:
commit
766331f286
|
@ -628,7 +628,7 @@ int x86_pmu_hw_config(struct perf_event *event)
|
|||
if (event->attr.type == event->pmu->type)
|
||||
event->hw.config |= x86_pmu_get_event_config(event);
|
||||
|
||||
if (event->attr.sample_period && x86_pmu.limit_period) {
|
||||
if (!event->attr.freq && x86_pmu.limit_period) {
|
||||
s64 left = event->attr.sample_period;
|
||||
x86_pmu.limit_period(event, &left);
|
||||
if (left > event->attr.sample_period)
|
||||
|
|
|
@ -3952,6 +3952,85 @@ static inline bool intel_pmu_has_cap(struct perf_event *event, int idx)
|
|||
return test_bit(idx, (unsigned long *)&intel_cap->capabilities);
|
||||
}
|
||||
|
||||
static u64 intel_pmu_freq_start_period(struct perf_event *event)
|
||||
{
|
||||
int type = event->attr.type;
|
||||
u64 config, factor;
|
||||
s64 start;
|
||||
|
||||
/*
|
||||
* The 127 is the lowest possible recommended SAV (sample after value)
|
||||
* for a 4000 freq (default freq), according to the event list JSON file.
|
||||
* Also, assume the workload is idle 50% time.
|
||||
*/
|
||||
factor = 64 * 4000;
|
||||
if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE)
|
||||
goto end;
|
||||
|
||||
/*
|
||||
* The estimation of the start period in the freq mode is
|
||||
* based on the below assumption.
|
||||
*
|
||||
* For a cycles or an instructions event, 1GHZ of the
|
||||
* underlying platform, 1 IPC. The workload is idle 50% time.
|
||||
* The start period = 1,000,000,000 * 1 / freq / 2.
|
||||
* = 500,000,000 / freq
|
||||
*
|
||||
* Usually, the branch-related events occur less than the
|
||||
* instructions event. According to the Intel event list JSON
|
||||
* file, the SAV (sample after value) of a branch-related event
|
||||
* is usually 1/4 of an instruction event.
|
||||
* The start period of branch-related events = 125,000,000 / freq.
|
||||
*
|
||||
* The cache-related events occurs even less. The SAV is usually
|
||||
* 1/20 of an instruction event.
|
||||
* The start period of cache-related events = 25,000,000 / freq.
|
||||
*/
|
||||
config = event->attr.config & PERF_HW_EVENT_MASK;
|
||||
if (type == PERF_TYPE_HARDWARE) {
|
||||
switch (config) {
|
||||
case PERF_COUNT_HW_CPU_CYCLES:
|
||||
case PERF_COUNT_HW_INSTRUCTIONS:
|
||||
case PERF_COUNT_HW_BUS_CYCLES:
|
||||
case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND:
|
||||
case PERF_COUNT_HW_STALLED_CYCLES_BACKEND:
|
||||
case PERF_COUNT_HW_REF_CPU_CYCLES:
|
||||
factor = 500000000;
|
||||
break;
|
||||
case PERF_COUNT_HW_BRANCH_INSTRUCTIONS:
|
||||
case PERF_COUNT_HW_BRANCH_MISSES:
|
||||
factor = 125000000;
|
||||
break;
|
||||
case PERF_COUNT_HW_CACHE_REFERENCES:
|
||||
case PERF_COUNT_HW_CACHE_MISSES:
|
||||
factor = 25000000;
|
||||
break;
|
||||
default:
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
if (type == PERF_TYPE_HW_CACHE)
|
||||
factor = 25000000;
|
||||
end:
|
||||
/*
|
||||
* Usually, a prime or a number with less factors (close to prime)
|
||||
* is chosen as an SAV, which makes it less likely that the sampling
|
||||
* period synchronizes with some periodic event in the workload.
|
||||
* Minus 1 to make it at least avoiding values near power of twos
|
||||
* for the default freq.
|
||||
*/
|
||||
start = DIV_ROUND_UP_ULL(factor, event->attr.sample_freq) - 1;
|
||||
|
||||
if (start > x86_pmu.max_period)
|
||||
start = x86_pmu.max_period;
|
||||
|
||||
if (x86_pmu.limit_period)
|
||||
x86_pmu.limit_period(event, &start);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
static int intel_pmu_hw_config(struct perf_event *event)
|
||||
{
|
||||
int ret = x86_pmu_hw_config(event);
|
||||
|
@ -3963,6 +4042,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (event->attr.freq && event->attr.sample_freq) {
|
||||
event->hw.sample_period = intel_pmu_freq_start_period(event);
|
||||
event->hw.last_period = event->hw.sample_period;
|
||||
local64_set(&event->hw.period_left, event->hw.sample_period);
|
||||
}
|
||||
|
||||
if (event->attr.precise_ip) {
|
||||
if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT)
|
||||
return -EINVAL;
|
||||
|
|
|
@ -879,6 +879,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
|
|||
X86_MATCH_VFM(INTEL_METEORLAKE_L, &model_skl),
|
||||
X86_MATCH_VFM(INTEL_ARROWLAKE_H, &model_skl),
|
||||
X86_MATCH_VFM(INTEL_ARROWLAKE, &model_skl),
|
||||
X86_MATCH_VFM(INTEL_ARROWLAKE_U, &model_skl),
|
||||
X86_MATCH_VFM(INTEL_LUNARLAKE_M, &model_skl),
|
||||
{},
|
||||
};
|
||||
|
|
|
@ -4950,7 +4950,7 @@ static struct perf_event_pmu_context *
|
|||
find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
|
||||
struct perf_event *event)
|
||||
{
|
||||
struct perf_event_pmu_context *new = NULL, *epc;
|
||||
struct perf_event_pmu_context *new = NULL, *pos = NULL, *epc;
|
||||
void *task_ctx_data = NULL;
|
||||
|
||||
if (!ctx->task) {
|
||||
|
@ -5007,12 +5007,19 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
|
|||
atomic_inc(&epc->refcount);
|
||||
goto found_epc;
|
||||
}
|
||||
/* Make sure the pmu_ctx_list is sorted by PMU type: */
|
||||
if (!pos && epc->pmu->type > pmu->type)
|
||||
pos = epc;
|
||||
}
|
||||
|
||||
epc = new;
|
||||
new = NULL;
|
||||
|
||||
list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
|
||||
if (!pos)
|
||||
list_add_tail(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
|
||||
else
|
||||
list_add(&epc->pmu_ctx_entry, pos->pmu_ctx_entry.prev);
|
||||
|
||||
epc->ctx = ctx;
|
||||
|
||||
found_epc:
|
||||
|
@ -5962,14 +5969,15 @@ static int _perf_event_period(struct perf_event *event, u64 value)
|
|||
if (!value)
|
||||
return -EINVAL;
|
||||
|
||||
if (event->attr.freq && value > sysctl_perf_event_sample_rate)
|
||||
return -EINVAL;
|
||||
|
||||
if (perf_event_check_period(event, value))
|
||||
return -EINVAL;
|
||||
|
||||
if (!event->attr.freq && (value & (1ULL << 63)))
|
||||
return -EINVAL;
|
||||
if (event->attr.freq) {
|
||||
if (value > sysctl_perf_event_sample_rate)
|
||||
return -EINVAL;
|
||||
} else {
|
||||
if (perf_event_check_period(event, value))
|
||||
return -EINVAL;
|
||||
if (value & (1ULL << 63))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
event_function_call(event, __perf_event_period, &value);
|
||||
|
||||
|
@ -8321,7 +8329,8 @@ void perf_event_exec(void)
|
|||
|
||||
perf_event_enable_on_exec(ctx);
|
||||
perf_event_remove_on_exec(ctx);
|
||||
perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL, true);
|
||||
scoped_guard(rcu)
|
||||
perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL, true);
|
||||
|
||||
perf_unpin_context(ctx);
|
||||
put_ctx(ctx);
|
||||
|
|
|
@ -495,6 +495,11 @@ retry:
|
|||
if (ret <= 0)
|
||||
goto put_old;
|
||||
|
||||
if (is_zero_page(old_page)) {
|
||||
ret = -EINVAL;
|
||||
goto put_old;
|
||||
}
|
||||
|
||||
if (WARN(!is_register && PageCompound(old_page),
|
||||
"uprobe unregister should never work on compound page\n")) {
|
||||
ret = -EINVAL;
|
||||
|
@ -762,10 +767,14 @@ static struct uprobe *hprobe_expire(struct hprobe *hprobe, bool get)
|
|||
enum hprobe_state hstate;
|
||||
|
||||
/*
|
||||
* return_instance's hprobe is protected by RCU.
|
||||
* Underlying uprobe is itself protected from reuse by SRCU.
|
||||
* Caller should guarantee that return_instance is not going to be
|
||||
* freed from under us. This can be achieved either through holding
|
||||
* rcu_read_lock() or by owning return_instance in the first place.
|
||||
*
|
||||
* Underlying uprobe is itself protected from reuse by SRCU, so ensure
|
||||
* SRCU lock is held properly.
|
||||
*/
|
||||
lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu));
|
||||
lockdep_assert(srcu_read_lock_held(&uretprobes_srcu));
|
||||
|
||||
hstate = READ_ONCE(hprobe->state);
|
||||
switch (hstate) {
|
||||
|
|
Loading…
Reference in New Issue