perf: Avoid the read if the count is already updated
JIRA: https://issues.redhat.com/browse/RHEL-47444 upstream ======== commit 8ce939a0fa194939cc1f92dbd8bc1a7806e7d40a Author: Peter Zijlstra (Intel) <peterz@infradead.org> Date: Tue Jan 21 07:23:02 2025 -0800 description =========== The event may have been updated in the PMU-specific implementation, e.g., Intel PEBS counters snapshotting. The common code should not read and overwrite the value. The PERF_SAMPLE_READ in the data->sample_type can be used to detect whether the PMU-specific value is available. If yes, avoid the pmu->read() in the common code. Add a new flag, skip_read, to track the case. Factor out a perf_pmu_read() to clean up the code. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20250121152303.3128733-3-kan.liang@linux.intel.com Signed-off-by: Michael Petlan <mpetlan@redhat.com>
This commit is contained in:
parent
f8df95796d
commit
03491643b6
|
@ -1041,7 +1041,13 @@ struct perf_output_handle {
|
|||
struct perf_buffer *rb;
|
||||
unsigned long wakeup;
|
||||
unsigned long size;
|
||||
u64 aux_flags;
|
||||
union {
|
||||
u64 flags; /* perf_output*() */
|
||||
u64 aux_flags; /* perf_aux_output*() */
|
||||
struct {
|
||||
u64 skip_read : 1;
|
||||
};
|
||||
};
|
||||
union {
|
||||
void *addr;
|
||||
unsigned long head;
|
||||
|
|
|
@ -1195,6 +1195,12 @@ static void perf_assert_pmu_disabled(struct pmu *pmu)
|
|||
WARN_ON_ONCE(*this_cpu_ptr(pmu->pmu_disable_count) == 0);
|
||||
}
|
||||
|
||||
static inline void perf_pmu_read(struct perf_event *event)
|
||||
{
|
||||
if (event->state == PERF_EVENT_STATE_ACTIVE)
|
||||
event->pmu->read(event);
|
||||
}
|
||||
|
||||
static void get_ctx(struct perf_event_context *ctx)
|
||||
{
|
||||
refcount_inc(&ctx->refcount);
|
||||
|
@ -3473,8 +3479,7 @@ static void __perf_event_sync_stat(struct perf_event *event,
|
|||
* we know the event must be on the current CPU, therefore we
|
||||
* don't need to use it.
|
||||
*/
|
||||
if (event->state == PERF_EVENT_STATE_ACTIVE)
|
||||
event->pmu->read(event);
|
||||
perf_pmu_read(event);
|
||||
|
||||
perf_event_update_time(event);
|
||||
|
||||
|
@ -4624,15 +4629,8 @@ static void __perf_event_read(void *info)
|
|||
|
||||
pmu->read(event);
|
||||
|
||||
for_each_sibling_event(sub, event) {
|
||||
if (sub->state == PERF_EVENT_STATE_ACTIVE) {
|
||||
/*
|
||||
* Use sibling's PMU rather than @event's since
|
||||
* sibling could be on different (eg: software) PMU.
|
||||
*/
|
||||
sub->pmu->read(sub);
|
||||
}
|
||||
}
|
||||
for_each_sibling_event(sub, event)
|
||||
perf_pmu_read(sub);
|
||||
|
||||
data->ret = pmu->commit_txn(pmu);
|
||||
|
||||
|
@ -7399,9 +7397,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
|
|||
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
|
||||
values[n++] = running;
|
||||
|
||||
if ((leader != event) &&
|
||||
(leader->state == PERF_EVENT_STATE_ACTIVE))
|
||||
leader->pmu->read(leader);
|
||||
if ((leader != event) && !handle->skip_read)
|
||||
perf_pmu_read(leader);
|
||||
|
||||
values[n++] = perf_event_count(leader, self);
|
||||
if (read_format & PERF_FORMAT_ID)
|
||||
|
@ -7414,9 +7411,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
|
|||
for_each_sibling_event(sub, leader) {
|
||||
n = 0;
|
||||
|
||||
if ((sub != event) &&
|
||||
(sub->state == PERF_EVENT_STATE_ACTIVE))
|
||||
sub->pmu->read(sub);
|
||||
if ((sub != event) && !handle->skip_read)
|
||||
perf_pmu_read(sub);
|
||||
|
||||
values[n++] = perf_event_count(sub, self);
|
||||
if (read_format & PERF_FORMAT_ID)
|
||||
|
@ -7475,6 +7471,9 @@ void perf_output_sample(struct perf_output_handle *handle,
|
|||
{
|
||||
u64 sample_type = data->type;
|
||||
|
||||
if (data->sample_flags & PERF_SAMPLE_READ)
|
||||
handle->skip_read = 1;
|
||||
|
||||
perf_output_put(handle, *header);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_IDENTIFIER)
|
||||
|
|
|
@ -185,6 +185,7 @@ __perf_output_begin(struct perf_output_handle *handle,
|
|||
|
||||
handle->rb = rb;
|
||||
handle->event = event;
|
||||
handle->flags = 0;
|
||||
|
||||
have_lost = local_read(&rb->lost);
|
||||
if (unlikely(have_lost)) {
|
||||
|
|
Loading…
Reference in New Issue