sched, smp: Trace smp callback causing an IPI

Bugzilla: https://bugzilla.redhat.com/2192613

Conflicts: Need to modify __smp_call_single_queue_debug() too. It was
removed upstream by commit 1771257cb447 ("locking/csd_lock: Remove
added data from CSD lock debugging")

commit 68f4ff04dbada18dad79659c266a8e5e29e458cd
Author: Valentin Schneider <vschneid@redhat.com>
Date:   Tue Mar 7 14:35:58 2023 +0000

    sched, smp: Trace smp callback causing an IPI

    Context
    =======

    The newly-introduced ipi_send_cpumask tracepoint has a "callback" parameter
    which so far has only been fed with NULL.

    While CSD_TYPE_SYNC/ASYNC and CSD_TYPE_IRQ_WORK share a similar backing
    struct layout (meaning their callback func can be accessed without caring
    about the actual CSD type), CSD_TYPE_TTWU doesn't even have a function
    attached to its struct. This means we need to check the type of a CSD
    before eventually dereferencing its associated callback.

    This isn't as trivial as it sounds: the CSD type is stored in
    __call_single_node.u_flags, which get cleared right before the callback is
    executed via csd_unlock(). This implies checking the CSD type before it is
    enqueued on the call_single_queue, as the target CPU's queue can be flushed
    before we get to sending an IPI.

    Furthermore, send_call_function_single_ipi() only has a CPU parameter, and
    would need to have an additional argument to trickle down the invoked
    function. This is somewhat silly, as the extra argument will always be
    pushed down to the function even when nothing is being traced, which is
    unnecessary overhead.

    Changes
    =======

    send_call_function_single_ipi() is only used by smp.c, and is defined in
    sched/core.c as it contains scheduler-specific ops (set_nr_if_polling() of
    a CPU's idle task).

    Split it into two parts: the scheduler bits remain in sched/core.c, and the
    actual IPI emission is moved into smp.c. This lets us define an
    __always_inline helper function that can take the related callback as
    parameter without creating useless register pressure in the non-traced path
    which only gains a (disabled) static branch.

    Do the same thing for the multi IPI case.

    Signed-off-by: Valentin Schneider <vschneid@redhat.com>
    Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
    Link: https://lore.kernel.org/r/20230307143558.294354-8-vschneid@redhat.com

Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
This commit is contained in:
Jerome Marchand 2023-05-03 12:21:47 +02:00
parent ba855c7efc
commit aa5786b04d
3 changed files with 57 additions and 19 deletions

View File

@ -3940,16 +3940,20 @@ void sched_ttwu_pending(void *arg)
rq_unlock_irqrestore(rq, &rf);
}
void send_call_function_single_ipi(int cpu)
/*
* Prepare the scene for sending an IPI for a remote smp_call
*
* Returns true if the caller can proceed with sending the IPI.
* Returns false otherwise.
*/
bool call_function_single_prep_ipi(int cpu)
{
struct rq *rq = cpu_rq(cpu);
if (!set_nr_if_polling(rq->idle)) {
trace_ipi_send_cpumask(cpumask_of(cpu), _RET_IP_, NULL);
arch_send_call_function_single_ipi(cpu);
} else {
if (set_nr_if_polling(cpu_rq(cpu)->idle)) {
trace_sched_wake_idle_without_ipi(cpu);
return false;
}
return true;
}
/*

View File

@ -6,7 +6,7 @@
extern void sched_ttwu_pending(void *arg);
extern void send_call_function_single_ipi(int cpu);
extern bool call_function_single_prep_ipi(int cpu);
#ifdef CONFIG_SMP
extern void flush_smp_call_function_queue(void);

View File

@ -159,9 +159,18 @@ void __init call_function_init(void)
}
static __always_inline void
send_call_function_ipi_mask(struct cpumask *mask)
send_call_function_single_ipi(int cpu, smp_call_func_t func)
{
trace_ipi_send_cpumask(mask, _RET_IP_, NULL);
if (call_function_single_prep_ipi(cpu)) {
trace_ipi_send_cpumask(cpumask_of(cpu), _RET_IP_, func);
arch_send_call_function_single_ipi(cpu);
}
}
static __always_inline void
send_call_function_ipi_mask(struct cpumask *mask, smp_call_func_t func)
{
trace_ipi_send_cpumask(mask, _RET_IP_, func);
arch_send_call_function_ipi_mask(mask);
}
@ -422,7 +431,8 @@ static __always_inline void csd_lock_wait(struct __call_single_data *csd)
smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
}
static void __smp_call_single_queue_debug(int cpu, struct llist_node *node)
static void __smp_call_single_queue_debug(int cpu, struct llist_node *node,
smp_call_func_t func)
{
unsigned int this_cpu = smp_processor_id();
struct cfd_seq_local *seq = this_cpu_ptr(&cfd_seq_local);
@ -433,7 +443,7 @@ static void __smp_call_single_queue_debug(int cpu, struct llist_node *node)
if (llist_add(node, &per_cpu(call_single_queue, cpu))) {
cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI);
cfd_seq_store(seq->ping, this_cpu, cpu, CFD_SEQ_PING);
send_call_function_single_ipi(cpu);
send_call_function_single_ipi(cpu, func);
cfd_seq_store(seq->pinged, this_cpu, cpu, CFD_SEQ_PINGED);
} else {
cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI);
@ -475,9 +485,8 @@ static __always_inline void csd_unlock(struct __call_single_data *csd)
smp_store_release(&csd->node.u_flags, 0);
}
static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
void __smp_call_single_queue(int cpu, struct llist_node *node)
static __always_inline void
raw_smp_call_single_queue(int cpu, struct llist_node *node, smp_call_func_t func)
{
#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
if (static_branch_unlikely(&csdlock_debug_extended)) {
@ -486,7 +495,7 @@ void __smp_call_single_queue(int cpu, struct llist_node *node)
type = CSD_TYPE(container_of(node, call_single_data_t,
node.llist));
if (type == CSD_TYPE_SYNC || type == CSD_TYPE_ASYNC) {
__smp_call_single_queue_debug(cpu, node);
__smp_call_single_queue_debug(cpu, node, func);
return;
}
}
@ -505,7 +514,32 @@ void __smp_call_single_queue(int cpu, struct llist_node *node)
* equipped to do the right thing...
*/
if (llist_add(node, &per_cpu(call_single_queue, cpu)))
send_call_function_single_ipi(cpu);
send_call_function_single_ipi(cpu, func);
}
static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
void __smp_call_single_queue(int cpu, struct llist_node *node)
{
/*
* We have to check the type of the CSD before queueing it, because
* once queued it can have its flags cleared by
* flush_smp_call_function_queue()
* even if we haven't sent the smp_call IPI yet (e.g. the stopper
* executes migration_cpu_stop() on the remote CPU).
*/
if (trace_ipi_send_cpumask_enabled()) {
call_single_data_t *csd;
smp_call_func_t func;
csd = container_of(node, call_single_data_t, node.llist);
func = CSD_TYPE(csd) == CSD_TYPE_TTWU ?
sched_ttwu_pending : csd->func;
raw_smp_call_single_queue(cpu, node, func);
} else {
raw_smp_call_single_queue(cpu, node, NULL);
}
}
/*
@ -972,9 +1006,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
* provided mask.
*/
if (nr_cpus == 1)
send_call_function_single_ipi(last_cpu);
send_call_function_single_ipi(last_cpu, func);
else if (likely(nr_cpus > 1))
send_call_function_ipi_mask(cfd->cpumask_ipi);
send_call_function_ipi_mask(cfd->cpumask_ipi, func);
cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->pinged, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PINGED);
}