Miscellaneous perf events fixes and a minor HW enablement change:

- Fix missing RCU protection in perf_iterate_ctx()
 
  - Fix pmu_ctx_list ordering bug
 
  - Reject the zero page in uprobes
 
  - Fix a family of bugs related to low frequency sampling
 
  - Add Intel Arrow Lake U CPUs to the generic Arrow Lake
    RAPL support table
 
  - Fix a lockdep-assert false positive in uretprobes
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmfCCxQRHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1iT1RAAtG0sbai0gJ2OMEOIAZROKKwkFMfx67Vl
 ZrcnPCkXK7mL6WQNErFTjdi7Cv2fsMP2WfMHv92ddgrlQZ02EosMKKro1q8ZEd18
 DpJmfujNGDkM0VDHd1Lg4/vPQw1RuEY85kqxRKIr5xtoKFR1sxNNNFwsWWeECbKW
 QnfzJsk1nFQUPHcD+FlLeyTnb6MgnLdcPMnXWLC4qVRBTHxCi/TS1XXhFHah31Rv
 kiCdEHMVUA1WXrPl+1I0DW/EjugcTWTB6cXat9YBZpsR2ZsVNrfNgdBtjCn0zEuf
 U3g8gQ/jm9GaZ1Q0ozTsklZlcH8JtOskYOaYiinN7lh5QWYlI2AWTnl6EZxrIKmV
 sw2LCl1BQLQocCr9GC+99Golv3U5FvxvRgTIBTzJs2t2WZtjF5Ceg1gwy12zLTKw
 VSGlLQZz55uHsgl3g37oNhNA0q4BbtuINlZWU6hHWjUEEeogVTjbSucv+8zFI+Dk
 0tupuNF5xQB55D5KZ2EhCFgmSFWvjq1K9piM0HuHk8yrFYhHWoSPp5rg4XyYFpBC
 o3nJfkOL5hEVGJoeV2wo1CTs6SZNgWBNuV+9MyCS/sTDM2Ggj0x8Vl+d/ewVi7iO
 WE2Xksp5awRPv/m+a/XIPc+xQMecnOELVj3RrQZ8AzNUSvfKmv01BqjqcOa0wdgW
 9EJeG6U2msQ=
 =e7a/
 -----END PGP SIGNATURE-----

Merge tag 'perf-urgent-2025-02-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf event fixes from Ingo Molnar:
 "Miscellaneous perf events fixes and a minor HW enablement change:

   - Fix missing RCU protection in perf_iterate_ctx()

   - Fix pmu_ctx_list ordering bug

   - Reject the zero page in uprobes

   - Fix a family of bugs related to low frequency sampling

   - Add Intel Arrow Lake U CPUs to the generic Arrow Lake RAPL support
     table

   - Fix a lockdep-assert false positive in uretprobes"

* tag 'perf-urgent-2025-02-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  uprobes: Remove too strict lockdep_assert() condition in hprobe_expire()
  perf/x86/rapl: Add support for Intel Arrow Lake U
  perf/x86/intel: Use better start period for frequency mode
  perf/core: Fix low freq setting via IOC_PERIOD
  perf/x86: Fix low freqency setting issue
  uprobes: Reject the shared zeropage in uprobe_write_opcode()
  perf/core: Order the PMU list to fix warning about unordered pmu_ctx_list
  perf/core: Add RCU read lock protection to perf_iterate_ctx()
This commit is contained in:
Linus Torvalds 2025-02-28 16:52:10 -08:00
commit 766331f286
5 changed files with 119 additions and 15 deletions

View File

@ -628,7 +628,7 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.type == event->pmu->type)
event->hw.config |= x86_pmu_get_event_config(event);
if (event->attr.sample_period && x86_pmu.limit_period) {
if (!event->attr.freq && x86_pmu.limit_period) {
s64 left = event->attr.sample_period;
x86_pmu.limit_period(event, &left);
if (left > event->attr.sample_period)

View File

@ -3952,6 +3952,85 @@ static inline bool intel_pmu_has_cap(struct perf_event *event, int idx)
return test_bit(idx, (unsigned long *)&intel_cap->capabilities);
}
static u64 intel_pmu_freq_start_period(struct perf_event *event)
{
int type = event->attr.type;
u64 config, factor;
s64 start;
/*
* The 127 is the lowest possible recommended SAV (sample after value)
* for a 4000 freq (default freq), according to the event list JSON file.
* Also, assume the workload is idle 50% time.
*/
factor = 64 * 4000;
if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE)
goto end;
/*
* The estimation of the start period in the freq mode is
* based on the below assumption.
*
* For a cycles or an instructions event, 1GHZ of the
* underlying platform, 1 IPC. The workload is idle 50% time.
* The start period = 1,000,000,000 * 1 / freq / 2.
* = 500,000,000 / freq
*
* Usually, the branch-related events occur less than the
* instructions event. According to the Intel event list JSON
* file, the SAV (sample after value) of a branch-related event
* is usually 1/4 of an instruction event.
* The start period of branch-related events = 125,000,000 / freq.
*
* The cache-related events occurs even less. The SAV is usually
* 1/20 of an instruction event.
* The start period of cache-related events = 25,000,000 / freq.
*/
config = event->attr.config & PERF_HW_EVENT_MASK;
if (type == PERF_TYPE_HARDWARE) {
switch (config) {
case PERF_COUNT_HW_CPU_CYCLES:
case PERF_COUNT_HW_INSTRUCTIONS:
case PERF_COUNT_HW_BUS_CYCLES:
case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND:
case PERF_COUNT_HW_STALLED_CYCLES_BACKEND:
case PERF_COUNT_HW_REF_CPU_CYCLES:
factor = 500000000;
break;
case PERF_COUNT_HW_BRANCH_INSTRUCTIONS:
case PERF_COUNT_HW_BRANCH_MISSES:
factor = 125000000;
break;
case PERF_COUNT_HW_CACHE_REFERENCES:
case PERF_COUNT_HW_CACHE_MISSES:
factor = 25000000;
break;
default:
goto end;
}
}
if (type == PERF_TYPE_HW_CACHE)
factor = 25000000;
end:
/*
* Usually, a prime or a number with less factors (close to prime)
* is chosen as an SAV, which makes it less likely that the sampling
* period synchronizes with some periodic event in the workload.
* Minus 1 to make it at least avoiding values near power of twos
* for the default freq.
*/
start = DIV_ROUND_UP_ULL(factor, event->attr.sample_freq) - 1;
if (start > x86_pmu.max_period)
start = x86_pmu.max_period;
if (x86_pmu.limit_period)
x86_pmu.limit_period(event, &start);
return start;
}
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@ -3963,6 +4042,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;
if (event->attr.freq && event->attr.sample_freq) {
event->hw.sample_period = intel_pmu_freq_start_period(event);
event->hw.last_period = event->hw.sample_period;
local64_set(&event->hw.period_left, event->hw.sample_period);
}
if (event->attr.precise_ip) {
if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT)
return -EINVAL;

View File

@ -879,6 +879,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
X86_MATCH_VFM(INTEL_METEORLAKE_L, &model_skl),
X86_MATCH_VFM(INTEL_ARROWLAKE_H, &model_skl),
X86_MATCH_VFM(INTEL_ARROWLAKE, &model_skl),
X86_MATCH_VFM(INTEL_ARROWLAKE_U, &model_skl),
X86_MATCH_VFM(INTEL_LUNARLAKE_M, &model_skl),
{},
};

View File

@ -4950,7 +4950,7 @@ static struct perf_event_pmu_context *
find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
struct perf_event *event)
{
struct perf_event_pmu_context *new = NULL, *epc;
struct perf_event_pmu_context *new = NULL, *pos = NULL, *epc;
void *task_ctx_data = NULL;
if (!ctx->task) {
@ -5007,12 +5007,19 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
atomic_inc(&epc->refcount);
goto found_epc;
}
/* Make sure the pmu_ctx_list is sorted by PMU type: */
if (!pos && epc->pmu->type > pmu->type)
pos = epc;
}
epc = new;
new = NULL;
list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
if (!pos)
list_add_tail(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
else
list_add(&epc->pmu_ctx_entry, pos->pmu_ctx_entry.prev);
epc->ctx = ctx;
found_epc:
@ -5962,14 +5969,15 @@ static int _perf_event_period(struct perf_event *event, u64 value)
if (!value)
return -EINVAL;
if (event->attr.freq && value > sysctl_perf_event_sample_rate)
return -EINVAL;
if (perf_event_check_period(event, value))
return -EINVAL;
if (!event->attr.freq && (value & (1ULL << 63)))
return -EINVAL;
if (event->attr.freq) {
if (value > sysctl_perf_event_sample_rate)
return -EINVAL;
} else {
if (perf_event_check_period(event, value))
return -EINVAL;
if (value & (1ULL << 63))
return -EINVAL;
}
event_function_call(event, __perf_event_period, &value);
@ -8321,7 +8329,8 @@ void perf_event_exec(void)
perf_event_enable_on_exec(ctx);
perf_event_remove_on_exec(ctx);
perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL, true);
scoped_guard(rcu)
perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL, true);
perf_unpin_context(ctx);
put_ctx(ctx);

View File

@ -495,6 +495,11 @@ retry:
if (ret <= 0)
goto put_old;
if (is_zero_page(old_page)) {
ret = -EINVAL;
goto put_old;
}
if (WARN(!is_register && PageCompound(old_page),
"uprobe unregister should never work on compound page\n")) {
ret = -EINVAL;
@ -762,10 +767,14 @@ static struct uprobe *hprobe_expire(struct hprobe *hprobe, bool get)
enum hprobe_state hstate;
/*
* return_instance's hprobe is protected by RCU.
* Underlying uprobe is itself protected from reuse by SRCU.
* Caller should guarantee that return_instance is not going to be
* freed from under us. This can be achieved either through holding
* rcu_read_lock() or by owning return_instance in the first place.
*
* Underlying uprobe is itself protected from reuse by SRCU, so ensure
* SRCU lock is held properly.
*/
lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu));
lockdep_assert(srcu_read_lock_held(&uretprobes_srcu));
hstate = READ_ONCE(hprobe->state);
switch (hstate) {