linux-kernelorg-stable/drivers/resctrl/mpam_devices.c

2724 lines
72 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2025 Arm Ltd.
#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
#include <linux/acpi.h>
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
#include <linux/atomic.h>
#include <linux/arm_mpam.h>
#include <linux/bitfield.h>
2025-11-19 12:22:48 +00:00
#include <linux/bitmap.h>
#include <linux/cacheinfo.h>
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <linux/gfp.h>
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/irqdesc.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/mutex.h>
#include <linux/platform_device.h>
#include <linux/printk.h>
#include <linux/srcu.h>
arm_mpam: Add helpers for managing the locking around the mon_sel registers The MSC MON_SEL register needs to be accessed from hardirq for the overflow interrupt, and when taking an IPI to access these registers on platforms where MSC are not accessible from every CPU. This makes an irqsave spinlock the obvious lock to protect these registers. On systems with SCMI or PCC mailboxes it must be able to sleep, meaning a mutex must be used. The SCMI or PCC platforms can't support an overflow interrupt, and can't access the registers from hardirq context. Clearly these two can't exist for one MSC at the same time. Add helpers for the MON_SEL locking. For now, use a irqsave spinlock and only support 'real' MMIO platforms. In the future this lock will be split in two allowing SCMI/PCC platforms to take a mutex. Because there are contexts where the SCMI/PCC platforms can't make an access, mpam_mon_sel_lock() needs to be able to fail. Do this now, so that all the error handling on these paths is present. This allows the relevant paths to fail if they are needed on a platform where this isn't possible, instead of having to make explicit checks of the interface type. Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:45 +00:00
#include <linux/spinlock.h>
#include <linux/types.h>
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
#include <linux/workqueue.h>
#include "mpam_internal.h"
DEFINE_STATIC_KEY_FALSE(mpam_enabled); /* This moves to arch code */
/*
* mpam_list_lock protects the SRCU lists when writing. Once the
* mpam_enabled key is enabled these lists are read-only,
* unless the error interrupt disables the driver.
*/
static DEFINE_MUTEX(mpam_list_lock);
static LIST_HEAD(mpam_all_msc);
struct srcu_struct mpam_srcu;
/*
* Number of MSCs that have been probed. Once all MSCs have been probed MPAM
* can be enabled.
*/
static atomic_t mpam_num_msc;
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
static int mpam_cpuhp_state;
static DEFINE_MUTEX(mpam_cpuhp_state_lock);
/*
* The smallest common values for any CPU or MSC in the system.
* Generating traffic outside this range will result in screaming interrupts.
*/
u16 mpam_partid_max;
u8 mpam_pmg_max;
static bool partid_max_init, partid_max_published;
static DEFINE_SPINLOCK(partid_max_lock);
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
/*
* mpam is enabled once all devices have been probed from CPU online callbacks,
* scheduled via this work_struct. If access to an MSC depends on a CPU that
* was not brought online at boot, this can happen surprisingly late.
*/
static DECLARE_WORK(mpam_enable_work, &mpam_enable);
/*
* All mpam error interrupts indicate a software bug. On receipt, disable the
* driver.
*/
static DECLARE_WORK(mpam_broken_work, &mpam_disable);
/* When mpam is disabled, the printed reason to aid debugging */
static char *mpam_disable_reason;
/*
* An MSC is a physical container for controls and monitors, each identified by
* their RIS index. These share a base-address, interrupts and some MMIO
* registers. A vMSC is a virtual container for RIS in an MSC that control or
* monitor the same thing. Members of a vMSC are all RIS in the same MSC, but
* not all RIS in an MSC share a vMSC.
*
* Components are a group of vMSC that control or monitor the same thing but
* are from different MSC, so have different base-address, interrupts etc.
* Classes are the set components of the same type.
*
* The features of a vMSC is the union of the RIS it contains.
* The features of a Class and Component are the common subset of the vMSC
* they contain.
*
* e.g. The system cache may have bandwidth controls on multiple interfaces,
* for regulating traffic from devices independently of traffic from CPUs.
* If these are two RIS in one MSC, they will be treated as controlling
* different things, and will not share a vMSC/component/class.
*
* e.g. The L2 may have one MSC and two RIS, one for cache-controls another
* for bandwidth. These two RIS are members of the same vMSC.
*
* e.g. The set of RIS that make up the L2 are grouped as a component. These
* are sometimes termed slices. They should be configured the same, as if there
* were only one.
*
* e.g. The SoC probably has more than one L2, each attached to a distinct set
* of CPUs. All the L2 components are grouped as a class.
*
* When creating an MSC, struct mpam_msc is added to the all mpam_all_msc list,
* then linked via struct mpam_ris to a vmsc, component and class.
* The same MSC may exist under different class->component->vmsc paths, but the
* RIS index will be unique.
*/
LIST_HEAD(mpam_classes);
/* List of all objects that can be free()d after synchronise_srcu() */
static LLIST_HEAD(mpam_garbage);
static inline void init_garbage(struct mpam_garbage *garbage)
{
init_llist_node(&garbage->llist);
}
#define add_to_garbage(x) \
do { \
__typeof__(x) _x = (x); \
_x->garbage.to_free = _x; \
llist_add(&_x->garbage.llist, &mpam_garbage); \
} while (0)
static void mpam_free_garbage(void)
{
struct mpam_garbage *iter, *tmp;
struct llist_node *to_free = llist_del_all(&mpam_garbage);
if (!to_free)
return;
synchronize_srcu(&mpam_srcu);
llist_for_each_entry_safe(iter, tmp, to_free, llist) {
if (iter->pdev)
devm_kfree(&iter->pdev->dev, iter->to_free);
else
kfree(iter->to_free);
}
}
/*
* Once mpam is enabled, new requestors cannot further reduce the available
* partid. Assert that the size is fixed, and new requestors will be turned
* away.
*/
static void mpam_assert_partid_sizes_fixed(void)
{
WARN_ON_ONCE(!partid_max_published);
}
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg)
{
WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
return readl_relaxed(msc->mapped_hwpage + reg);
}
static inline u32 _mpam_read_partsel_reg(struct mpam_msc *msc, u16 reg)
{
lockdep_assert_held_once(&msc->part_sel_lock);
return __mpam_read_reg(msc, reg);
}
#define mpam_read_partsel_reg(msc, reg) _mpam_read_partsel_reg(msc, MPAMF_##reg)
static void __mpam_write_reg(struct mpam_msc *msc, u16 reg, u32 val)
{
WARN_ON_ONCE(reg + sizeof(u32) > msc->mapped_hwpage_sz);
WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
writel_relaxed(val, msc->mapped_hwpage + reg);
}
static inline void _mpam_write_partsel_reg(struct mpam_msc *msc, u16 reg, u32 val)
{
lockdep_assert_held_once(&msc->part_sel_lock);
__mpam_write_reg(msc, reg, val);
}
#define mpam_write_partsel_reg(msc, reg, val) _mpam_write_partsel_reg(msc, MPAMCFG_##reg, val)
static inline u32 _mpam_read_monsel_reg(struct mpam_msc *msc, u16 reg)
{
mpam_mon_sel_lock_held(msc);
return __mpam_read_reg(msc, reg);
}
#define mpam_read_monsel_reg(msc, reg) _mpam_read_monsel_reg(msc, MSMON_##reg)
static inline void _mpam_write_monsel_reg(struct mpam_msc *msc, u16 reg, u32 val)
{
mpam_mon_sel_lock_held(msc);
__mpam_write_reg(msc, reg, val);
}
#define mpam_write_monsel_reg(msc, reg, val) _mpam_write_monsel_reg(msc, MSMON_##reg, val)
static u64 mpam_msc_read_idr(struct mpam_msc *msc)
{
u64 idr_high = 0, idr_low;
lockdep_assert_held(&msc->part_sel_lock);
idr_low = mpam_read_partsel_reg(msc, IDR);
if (FIELD_GET(MPAMF_IDR_EXT, idr_low))
idr_high = mpam_read_partsel_reg(msc, IDR + 4);
return (idr_high << 32) | idr_low;
}
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
static void mpam_msc_clear_esr(struct mpam_msc *msc)
{
u64 esr_low = __mpam_read_reg(msc, MPAMF_ESR);
if (!esr_low)
return;
/*
* Clearing the high/low bits of MPAMF_ESR can not be atomic.
* Clear the top half first, so that the pending error bits in the
* lower half prevent hardware from updating either half of the
* register.
*/
if (msc->has_extd_esr)
__mpam_write_reg(msc, MPAMF_ESR + 4, 0);
__mpam_write_reg(msc, MPAMF_ESR, 0);
}
static u64 mpam_msc_read_esr(struct mpam_msc *msc)
{
u64 esr_high = 0, esr_low;
esr_low = __mpam_read_reg(msc, MPAMF_ESR);
if (msc->has_extd_esr)
esr_high = __mpam_read_reg(msc, MPAMF_ESR + 4);
return (esr_high << 32) | esr_low;
}
static void __mpam_part_sel_raw(u32 partsel, struct mpam_msc *msc)
{
lockdep_assert_held(&msc->part_sel_lock);
mpam_write_partsel_reg(msc, PART_SEL, partsel);
}
static void __mpam_part_sel(u8 ris_idx, u16 partid, struct mpam_msc *msc)
{
u32 partsel = FIELD_PREP(MPAMCFG_PART_SEL_RIS, ris_idx) |
FIELD_PREP(MPAMCFG_PART_SEL_PARTID_SEL, partid);
__mpam_part_sel_raw(partsel, msc);
}
static void __mpam_intpart_sel(u8 ris_idx, u16 intpartid, struct mpam_msc *msc)
{
u32 partsel = FIELD_PREP(MPAMCFG_PART_SEL_RIS, ris_idx) |
FIELD_PREP(MPAMCFG_PART_SEL_PARTID_SEL, intpartid) |
MPAMCFG_PART_SEL_INTERNAL;
__mpam_part_sel_raw(partsel, msc);
}
int mpam_register_requestor(u16 partid_max, u8 pmg_max)
{
guard(spinlock)(&partid_max_lock);
if (!partid_max_init) {
mpam_partid_max = partid_max;
mpam_pmg_max = pmg_max;
partid_max_init = true;
} else if (!partid_max_published) {
mpam_partid_max = min(mpam_partid_max, partid_max);
mpam_pmg_max = min(mpam_pmg_max, pmg_max);
} else {
/* New requestors can't lower the values */
if (partid_max < mpam_partid_max || pmg_max < mpam_pmg_max)
return -EBUSY;
}
return 0;
}
EXPORT_SYMBOL(mpam_register_requestor);
static struct mpam_class *
mpam_class_alloc(u8 level_idx, enum mpam_class_types type)
{
struct mpam_class *class;
lockdep_assert_held(&mpam_list_lock);
class = kzalloc(sizeof(*class), GFP_KERNEL);
if (!class)
return ERR_PTR(-ENOMEM);
init_garbage(&class->garbage);
INIT_LIST_HEAD_RCU(&class->components);
/* Affinity is updated when ris are added */
class->level = level_idx;
class->type = type;
INIT_LIST_HEAD_RCU(&class->classes_list);
ida_init(&class->ida_csu_mon);
ida_init(&class->ida_mbwu_mon);
list_add_rcu(&class->classes_list, &mpam_classes);
return class;
}
static void mpam_class_destroy(struct mpam_class *class)
{
lockdep_assert_held(&mpam_list_lock);
list_del_rcu(&class->classes_list);
add_to_garbage(class);
}
static struct mpam_class *
mpam_class_find(u8 level_idx, enum mpam_class_types type)
{
struct mpam_class *class;
lockdep_assert_held(&mpam_list_lock);
list_for_each_entry(class, &mpam_classes, classes_list) {
if (class->type == type && class->level == level_idx)
return class;
}
return mpam_class_alloc(level_idx, type);
}
static struct mpam_component *
mpam_component_alloc(struct mpam_class *class, int id)
{
struct mpam_component *comp;
lockdep_assert_held(&mpam_list_lock);
comp = kzalloc(sizeof(*comp), GFP_KERNEL);
if (!comp)
return ERR_PTR(-ENOMEM);
init_garbage(&comp->garbage);
comp->comp_id = id;
INIT_LIST_HEAD_RCU(&comp->vmsc);
/* Affinity is updated when RIS are added */
INIT_LIST_HEAD_RCU(&comp->class_list);
comp->class = class;
list_add_rcu(&comp->class_list, &class->components);
return comp;
}
static void __destroy_component_cfg(struct mpam_component *comp);
static void mpam_component_destroy(struct mpam_component *comp)
{
struct mpam_class *class = comp->class;
lockdep_assert_held(&mpam_list_lock);
__destroy_component_cfg(comp);
list_del_rcu(&comp->class_list);
add_to_garbage(comp);
if (list_empty(&class->components))
mpam_class_destroy(class);
}
static struct mpam_component *
mpam_component_find(struct mpam_class *class, int id)
{
struct mpam_component *comp;
lockdep_assert_held(&mpam_list_lock);
list_for_each_entry(comp, &class->components, class_list) {
if (comp->comp_id == id)
return comp;
}
return mpam_component_alloc(class, id);
}
static struct mpam_vmsc *
mpam_vmsc_alloc(struct mpam_component *comp, struct mpam_msc *msc)
{
struct mpam_vmsc *vmsc;
lockdep_assert_held(&mpam_list_lock);
vmsc = kzalloc(sizeof(*vmsc), GFP_KERNEL);
if (!vmsc)
return ERR_PTR(-ENOMEM);
init_garbage(&vmsc->garbage);
INIT_LIST_HEAD_RCU(&vmsc->ris);
INIT_LIST_HEAD_RCU(&vmsc->comp_list);
vmsc->comp = comp;
vmsc->msc = msc;
list_add_rcu(&vmsc->comp_list, &comp->vmsc);
return vmsc;
}
static void mpam_vmsc_destroy(struct mpam_vmsc *vmsc)
{
struct mpam_component *comp = vmsc->comp;
lockdep_assert_held(&mpam_list_lock);
list_del_rcu(&vmsc->comp_list);
add_to_garbage(vmsc);
if (list_empty(&comp->vmsc))
mpam_component_destroy(comp);
}
static struct mpam_vmsc *
mpam_vmsc_find(struct mpam_component *comp, struct mpam_msc *msc)
{
struct mpam_vmsc *vmsc;
lockdep_assert_held(&mpam_list_lock);
list_for_each_entry(vmsc, &comp->vmsc, comp_list) {
if (vmsc->msc->id == msc->id)
return vmsc;
}
return mpam_vmsc_alloc(comp, msc);
}
/*
* The cacheinfo structures are only populated when CPUs are online.
* This helper walks the acpi tables to include offline CPUs too.
*/
int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level,
cpumask_t *affinity)
{
return acpi_pptt_get_cpumask_from_cache_id(cache_id, affinity);
}
/*
* cpumask_of_node() only knows about online CPUs. This can't tell us whether
* a class is represented on all possible CPUs.
*/
static void get_cpumask_from_node_id(u32 node_id, cpumask_t *affinity)
{
int cpu;
for_each_possible_cpu(cpu) {
if (node_id == cpu_to_node(cpu))
cpumask_set_cpu(cpu, affinity);
}
}
static int mpam_ris_get_affinity(struct mpam_msc *msc, cpumask_t *affinity,
enum mpam_class_types type,
struct mpam_class *class,
struct mpam_component *comp)
{
int err;
switch (type) {
case MPAM_CLASS_CACHE:
err = mpam_get_cpumask_from_cache_id(comp->comp_id, class->level,
affinity);
if (err) {
dev_warn_once(&msc->pdev->dev,
"Failed to determine CPU affinity\n");
return err;
}
if (cpumask_empty(affinity))
dev_warn_once(&msc->pdev->dev, "no CPUs associated with cache node\n");
break;
case MPAM_CLASS_MEMORY:
get_cpumask_from_node_id(comp->comp_id, affinity);
/* affinity may be empty for CPU-less memory nodes */
break;
case MPAM_CLASS_UNKNOWN:
return 0;
}
cpumask_and(affinity, affinity, &msc->accessibility);
return 0;
}
static int mpam_ris_create_locked(struct mpam_msc *msc, u8 ris_idx,
enum mpam_class_types type, u8 class_id,
int component_id)
{
int err;
struct mpam_vmsc *vmsc;
struct mpam_msc_ris *ris;
struct mpam_class *class;
struct mpam_component *comp;
struct platform_device *pdev = msc->pdev;
lockdep_assert_held(&mpam_list_lock);
if (ris_idx > MPAM_MSC_MAX_NUM_RIS)
return -EINVAL;
if (test_and_set_bit(ris_idx, &msc->ris_idxs))
return -EBUSY;
ris = devm_kzalloc(&msc->pdev->dev, sizeof(*ris), GFP_KERNEL);
if (!ris)
return -ENOMEM;
init_garbage(&ris->garbage);
ris->garbage.pdev = pdev;
class = mpam_class_find(class_id, type);
if (IS_ERR(class))
return PTR_ERR(class);
comp = mpam_component_find(class, component_id);
if (IS_ERR(comp)) {
if (list_empty(&class->components))
mpam_class_destroy(class);
return PTR_ERR(comp);
}
vmsc = mpam_vmsc_find(comp, msc);
if (IS_ERR(vmsc)) {
if (list_empty(&comp->vmsc))
mpam_component_destroy(comp);
return PTR_ERR(vmsc);
}
err = mpam_ris_get_affinity(msc, &ris->affinity, type, class, comp);
if (err) {
if (list_empty(&vmsc->ris))
mpam_vmsc_destroy(vmsc);
return err;
}
ris->ris_idx = ris_idx;
INIT_LIST_HEAD_RCU(&ris->msc_list);
INIT_LIST_HEAD_RCU(&ris->vmsc_list);
ris->vmsc = vmsc;
cpumask_or(&comp->affinity, &comp->affinity, &ris->affinity);
cpumask_or(&class->affinity, &class->affinity, &ris->affinity);
list_add_rcu(&ris->vmsc_list, &vmsc->ris);
list_add_rcu(&ris->msc_list, &msc->ris);
return 0;
}
static void mpam_ris_destroy(struct mpam_msc_ris *ris)
{
struct mpam_vmsc *vmsc = ris->vmsc;
struct mpam_msc *msc = vmsc->msc;
struct mpam_component *comp = vmsc->comp;
struct mpam_class *class = comp->class;
lockdep_assert_held(&mpam_list_lock);
/*
* It is assumed affinities don't overlap. If they do the class becomes
* unusable immediately.
*/
cpumask_andnot(&class->affinity, &class->affinity, &ris->affinity);
cpumask_andnot(&comp->affinity, &comp->affinity, &ris->affinity);
clear_bit(ris->ris_idx, &msc->ris_idxs);
list_del_rcu(&ris->msc_list);
list_del_rcu(&ris->vmsc_list);
add_to_garbage(ris);
if (list_empty(&vmsc->ris))
mpam_vmsc_destroy(vmsc);
}
int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx,
enum mpam_class_types type, u8 class_id, int component_id)
{
int err;
mutex_lock(&mpam_list_lock);
err = mpam_ris_create_locked(msc, ris_idx, type, class_id,
component_id);
mutex_unlock(&mpam_list_lock);
if (err)
mpam_free_garbage();
return err;
}
static struct mpam_msc_ris *mpam_get_or_create_ris(struct mpam_msc *msc,
u8 ris_idx)
{
int err;
struct mpam_msc_ris *ris;
lockdep_assert_held(&mpam_list_lock);
if (!test_bit(ris_idx, &msc->ris_idxs)) {
err = mpam_ris_create_locked(msc, ris_idx, MPAM_CLASS_UNKNOWN,
0, 0);
if (err)
return ERR_PTR(err);
}
list_for_each_entry(ris, &msc->ris, msc_list) {
if (ris->ris_idx == ris_idx)
return ris;
}
return ERR_PTR(-ENOENT);
}
/*
* IHI009A.a has this nugget: "If a monitor does not support automatic behaviour
* of NRDY, software can use this bit for any purpose" - so hardware might not
* implement this - but it isn't RES0.
*
* Try and see what values stick in this bit. If we can write either value,
* its probably not implemented by hardware.
*/
static bool _mpam_ris_hw_probe_hw_nrdy(struct mpam_msc_ris *ris, u32 mon_reg)
{
u32 now;
u64 mon_sel;
bool can_set, can_clear;
struct mpam_msc *msc = ris->vmsc->msc;
if (WARN_ON_ONCE(!mpam_mon_sel_lock(msc)))
return false;
mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, 0) |
FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx);
_mpam_write_monsel_reg(msc, mon_reg, mon_sel);
_mpam_write_monsel_reg(msc, mon_reg, MSMON___NRDY);
now = _mpam_read_monsel_reg(msc, mon_reg);
can_set = now & MSMON___NRDY;
_mpam_write_monsel_reg(msc, mon_reg, 0);
now = _mpam_read_monsel_reg(msc, mon_reg);
can_clear = !(now & MSMON___NRDY);
mpam_mon_sel_unlock(msc);
return (!can_set || !can_clear);
}
#define mpam_ris_hw_probe_hw_nrdy(_ris, _mon_reg) \
_mpam_ris_hw_probe_hw_nrdy(_ris, MSMON_##_mon_reg)
static void mpam_ris_hw_probe(struct mpam_msc_ris *ris)
{
int err;
struct mpam_msc *msc = ris->vmsc->msc;
struct device *dev = &msc->pdev->dev;
struct mpam_props *props = &ris->props;
struct mpam_class *class = ris->vmsc->comp->class;
lockdep_assert_held(&msc->probe_lock);
lockdep_assert_held(&msc->part_sel_lock);
/* Cache Capacity Partitioning */
if (FIELD_GET(MPAMF_IDR_HAS_CCAP_PART, ris->idr)) {
u32 ccap_features = mpam_read_partsel_reg(msc, CCAP_IDR);
props->cmax_wd = FIELD_GET(MPAMF_CCAP_IDR_CMAX_WD, ccap_features);
if (props->cmax_wd &&
FIELD_GET(MPAMF_CCAP_IDR_HAS_CMAX_SOFTLIM, ccap_features))
mpam_set_feature(mpam_feat_cmax_softlim, props);
if (props->cmax_wd &&
!FIELD_GET(MPAMF_CCAP_IDR_NO_CMAX, ccap_features))
mpam_set_feature(mpam_feat_cmax_cmax, props);
if (props->cmax_wd &&
FIELD_GET(MPAMF_CCAP_IDR_HAS_CMIN, ccap_features))
mpam_set_feature(mpam_feat_cmax_cmin, props);
props->cassoc_wd = FIELD_GET(MPAMF_CCAP_IDR_CASSOC_WD, ccap_features);
if (props->cassoc_wd &&
FIELD_GET(MPAMF_CCAP_IDR_HAS_CASSOC, ccap_features))
mpam_set_feature(mpam_feat_cmax_cassoc, props);
}
/* Cache Portion partitioning */
if (FIELD_GET(MPAMF_IDR_HAS_CPOR_PART, ris->idr)) {
u32 cpor_features = mpam_read_partsel_reg(msc, CPOR_IDR);
props->cpbm_wd = FIELD_GET(MPAMF_CPOR_IDR_CPBM_WD, cpor_features);
if (props->cpbm_wd)
mpam_set_feature(mpam_feat_cpor_part, props);
}
/* Memory bandwidth partitioning */
if (FIELD_GET(MPAMF_IDR_HAS_MBW_PART, ris->idr)) {
u32 mbw_features = mpam_read_partsel_reg(msc, MBW_IDR);
/* portion bitmap resolution */
props->mbw_pbm_bits = FIELD_GET(MPAMF_MBW_IDR_BWPBM_WD, mbw_features);
if (props->mbw_pbm_bits &&
FIELD_GET(MPAMF_MBW_IDR_HAS_PBM, mbw_features))
mpam_set_feature(mpam_feat_mbw_part, props);
props->bwa_wd = FIELD_GET(MPAMF_MBW_IDR_BWA_WD, mbw_features);
if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MAX, mbw_features))
mpam_set_feature(mpam_feat_mbw_max, props);
if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MIN, mbw_features))
mpam_set_feature(mpam_feat_mbw_min, props);
if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_PROP, mbw_features))
mpam_set_feature(mpam_feat_mbw_prop, props);
}
/* Priority partitioning */
if (FIELD_GET(MPAMF_IDR_HAS_PRI_PART, ris->idr)) {
u32 pri_features = mpam_read_partsel_reg(msc, PRI_IDR);
props->intpri_wd = FIELD_GET(MPAMF_PRI_IDR_INTPRI_WD, pri_features);
if (props->intpri_wd && FIELD_GET(MPAMF_PRI_IDR_HAS_INTPRI, pri_features)) {
mpam_set_feature(mpam_feat_intpri_part, props);
if (FIELD_GET(MPAMF_PRI_IDR_INTPRI_0_IS_LOW, pri_features))
mpam_set_feature(mpam_feat_intpri_part_0_low, props);
}
props->dspri_wd = FIELD_GET(MPAMF_PRI_IDR_DSPRI_WD, pri_features);
if (props->dspri_wd && FIELD_GET(MPAMF_PRI_IDR_HAS_DSPRI, pri_features)) {
mpam_set_feature(mpam_feat_dspri_part, props);
if (FIELD_GET(MPAMF_PRI_IDR_DSPRI_0_IS_LOW, pri_features))
mpam_set_feature(mpam_feat_dspri_part_0_low, props);
}
}
/* Performance Monitoring */
if (FIELD_GET(MPAMF_IDR_HAS_MSMON, ris->idr)) {
u32 msmon_features = mpam_read_partsel_reg(msc, MSMON_IDR);
/*
* If the firmware max-nrdy-us property is missing, the
* CSU counters can't be used. Should we wait forever?
*/
err = device_property_read_u32(&msc->pdev->dev,
"arm,not-ready-us",
&msc->nrdy_usec);
if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_CSU, msmon_features)) {
u32 csumonidr;
csumonidr = mpam_read_partsel_reg(msc, CSUMON_IDR);
props->num_csu_mon = FIELD_GET(MPAMF_CSUMON_IDR_NUM_MON, csumonidr);
if (props->num_csu_mon) {
bool hw_managed;
mpam_set_feature(mpam_feat_msmon_csu, props);
if (FIELD_GET(MPAMF_CSUMON_IDR_HAS_XCL, csumonidr))
mpam_set_feature(mpam_feat_msmon_csu_xcl, props);
/* Is NRDY hardware managed? */
hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, CSU);
if (hw_managed)
mpam_set_feature(mpam_feat_msmon_csu_hw_nrdy, props);
}
/*
* Accept the missing firmware property if NRDY appears
* un-implemented.
*/
if (err && mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, props))
dev_err_once(dev, "Counters are not usable because not-ready timeout was not provided by firmware.");
}
if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_MBWU, msmon_features)) {
bool has_long, hw_managed;
u32 mbwumon_idr = mpam_read_partsel_reg(msc, MBWUMON_IDR);
props->num_mbwu_mon = FIELD_GET(MPAMF_MBWUMON_IDR_NUM_MON, mbwumon_idr);
if (props->num_mbwu_mon) {
mpam_set_feature(mpam_feat_msmon_mbwu, props);
if (FIELD_GET(MPAMF_MBWUMON_IDR_HAS_RWBW, mbwumon_idr))
mpam_set_feature(mpam_feat_msmon_mbwu_rwbw, props);
has_long = FIELD_GET(MPAMF_MBWUMON_IDR_HAS_LONG, mbwumon_idr);
if (has_long) {
if (FIELD_GET(MPAMF_MBWUMON_IDR_LWD, mbwumon_idr))
mpam_set_feature(mpam_feat_msmon_mbwu_63counter, props);
else
mpam_set_feature(mpam_feat_msmon_mbwu_44counter, props);
} else {
mpam_set_feature(mpam_feat_msmon_mbwu_31counter, props);
}
/* Is NRDY hardware managed? */
hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, MBWU);
if (hw_managed)
mpam_set_feature(mpam_feat_msmon_mbwu_hw_nrdy, props);
/*
* Don't warn about any missing firmware property for
* MBWU NRDY - it doesn't make any sense!
*/
}
}
}
/*
* RIS with PARTID narrowing don't have enough storage for one
* configuration per PARTID. If these are in a class we could use,
* reduce the supported partid_max to match the number of intpartid.
* If the class is unknown, just ignore it.
*/
if (FIELD_GET(MPAMF_IDR_HAS_PARTID_NRW, ris->idr) &&
class->type != MPAM_CLASS_UNKNOWN) {
u32 nrwidr = mpam_read_partsel_reg(msc, PARTID_NRW_IDR);
u16 partid_max = FIELD_GET(MPAMF_PARTID_NRW_IDR_INTPARTID_MAX, nrwidr);
mpam_set_feature(mpam_feat_partid_nrw, props);
msc->partid_max = min(msc->partid_max, partid_max);
}
}
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
static int mpam_msc_hw_probe(struct mpam_msc *msc)
{
u64 idr;
u16 partid_max;
u8 ris_idx, pmg_max;
struct mpam_msc_ris *ris;
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
struct device *dev = &msc->pdev->dev;
lockdep_assert_held(&msc->probe_lock);
idr = __mpam_read_reg(msc, MPAMF_AIDR);
if ((idr & MPAMF_AIDR_ARCH_MAJOR_REV) != MPAM_ARCHITECTURE_V1) {
dev_err_once(dev, "MSC does not match MPAM architecture v1.x\n");
return -EIO;
}
/* Grab an IDR value to find out how many RIS there are */
mutex_lock(&msc->part_sel_lock);
idr = mpam_msc_read_idr(msc);
mutex_unlock(&msc->part_sel_lock);
msc->ris_max = FIELD_GET(MPAMF_IDR_RIS_MAX, idr);
/* Use these values so partid/pmg always starts with a valid value */
msc->partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr);
msc->pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr);
for (ris_idx = 0; ris_idx <= msc->ris_max; ris_idx++) {
mutex_lock(&msc->part_sel_lock);
__mpam_part_sel(ris_idx, 0, msc);
idr = mpam_msc_read_idr(msc);
mutex_unlock(&msc->part_sel_lock);
partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr);
pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr);
msc->partid_max = min(msc->partid_max, partid_max);
msc->pmg_max = min(msc->pmg_max, pmg_max);
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
msc->has_extd_esr = FIELD_GET(MPAMF_IDR_HAS_EXTD_ESR, idr);
mutex_lock(&mpam_list_lock);
ris = mpam_get_or_create_ris(msc, ris_idx);
mutex_unlock(&mpam_list_lock);
if (IS_ERR(ris))
return PTR_ERR(ris);
ris->idr = idr;
mutex_lock(&msc->part_sel_lock);
__mpam_part_sel(ris_idx, 0, msc);
mpam_ris_hw_probe(ris);
mutex_unlock(&msc->part_sel_lock);
}
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
/* Clear any stale errors */
mpam_msc_clear_esr(msc);
spin_lock(&partid_max_lock);
mpam_partid_max = min(mpam_partid_max, msc->partid_max);
mpam_pmg_max = min(mpam_pmg_max, msc->pmg_max);
spin_unlock(&partid_max_lock);
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
msc->probed = true;
return 0;
}
struct mon_read {
struct mpam_msc_ris *ris;
struct mon_cfg *ctx;
enum mpam_device_features type;
u64 *val;
int err;
};
static bool mpam_ris_has_mbwu_long_counter(struct mpam_msc_ris *ris)
{
return (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props) ||
mpam_has_feature(mpam_feat_msmon_mbwu_44counter, &ris->props));
}
static u64 mpam_msc_read_mbwu_l(struct mpam_msc *msc)
{
int retry = 3;
u32 mbwu_l_low;
u64 mbwu_l_high1, mbwu_l_high2;
mpam_mon_sel_lock_held(msc);
WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
do {
mbwu_l_high1 = mbwu_l_high2;
mbwu_l_low = __mpam_read_reg(msc, MSMON_MBWU_L);
mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
retry--;
} while (mbwu_l_high1 != mbwu_l_high2 && retry > 0);
if (mbwu_l_high1 == mbwu_l_high2)
return (mbwu_l_high1 << 32) | mbwu_l_low;
pr_warn("Failed to read a stable value\n");
return MSMON___L_NRDY;
}
static void mpam_msc_zero_mbwu_l(struct mpam_msc *msc)
{
mpam_mon_sel_lock_held(msc);
WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
__mpam_write_reg(msc, MSMON_MBWU_L, 0);
__mpam_write_reg(msc, MSMON_MBWU_L + 4, 0);
}
static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
u32 *flt_val)
{
struct mon_cfg *ctx = m->ctx;
/*
* For CSU counters its implementation-defined what happens when not
* filtering by partid.
*/
*ctl_val = MSMON_CFG_x_CTL_MATCH_PARTID;
*flt_val = FIELD_PREP(MSMON_CFG_x_FLT_PARTID, ctx->partid);
if (m->ctx->match_pmg) {
*ctl_val |= MSMON_CFG_x_CTL_MATCH_PMG;
*flt_val |= FIELD_PREP(MSMON_CFG_x_FLT_PMG, ctx->pmg);
}
switch (m->type) {
case mpam_feat_msmon_csu:
*ctl_val |= MSMON_CFG_CSU_CTL_TYPE_CSU;
if (mpam_has_feature(mpam_feat_msmon_csu_xcl, &m->ris->props))
*flt_val |= FIELD_PREP(MSMON_CFG_CSU_FLT_XCL, ctx->csu_exclude_clean);
break;
case mpam_feat_msmon_mbwu_31counter:
case mpam_feat_msmon_mbwu_44counter:
case mpam_feat_msmon_mbwu_63counter:
*ctl_val |= MSMON_CFG_MBWU_CTL_TYPE_MBWU;
if (mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, &m->ris->props))
*flt_val |= FIELD_PREP(MSMON_CFG_MBWU_FLT_RWBW, ctx->opts);
break;
default:
pr_warn("Unexpected monitor type %d\n", m->type);
}
}
static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
u32 *flt_val)
{
struct mpam_msc *msc = m->ris->vmsc->msc;
switch (m->type) {
case mpam_feat_msmon_csu:
*ctl_val = mpam_read_monsel_reg(msc, CFG_CSU_CTL);
*flt_val = mpam_read_monsel_reg(msc, CFG_CSU_FLT);
break;
case mpam_feat_msmon_mbwu_31counter:
case mpam_feat_msmon_mbwu_44counter:
case mpam_feat_msmon_mbwu_63counter:
*ctl_val = mpam_read_monsel_reg(msc, CFG_MBWU_CTL);
*flt_val = mpam_read_monsel_reg(msc, CFG_MBWU_FLT);
break;
default:
pr_warn("Unexpected monitor type %d\n", m->type);
}
}
/* Remove values set by the hardware to prevent apparent mismatches. */
static inline void clean_msmon_ctl_val(u32 *cur_ctl)
{
*cur_ctl &= ~MSMON_CFG_x_CTL_OFLOW_STATUS;
if (FIELD_GET(MSMON_CFG_x_CTL_TYPE, *cur_ctl) == MSMON_CFG_MBWU_CTL_TYPE_MBWU)
*cur_ctl &= ~MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L;
}
static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
u32 flt_val)
{
struct mpam_msc *msc = m->ris->vmsc->msc;
/*
* Write the ctl_val with the enable bit cleared, reset the counter,
* then enable counter.
*/
switch (m->type) {
case mpam_feat_msmon_csu:
mpam_write_monsel_reg(msc, CFG_CSU_FLT, flt_val);
mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val);
mpam_write_monsel_reg(msc, CSU, 0);
mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
break;
case mpam_feat_msmon_mbwu_31counter:
case mpam_feat_msmon_mbwu_44counter:
case mpam_feat_msmon_mbwu_63counter:
mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val);
mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val);
mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
/* Counting monitors require NRDY to be reset by software */
if (m->type == mpam_feat_msmon_mbwu_31counter)
mpam_write_monsel_reg(msc, MBWU, 0);
else
mpam_msc_zero_mbwu_l(m->ris->vmsc->msc);
break;
default:
pr_warn("Unexpected monitor type %d\n", m->type);
}
}
static u64 mpam_msmon_overflow_val(enum mpam_device_features type)
{
/* TODO: implement scaling counters */
switch (type) {
case mpam_feat_msmon_mbwu_63counter:
return BIT_ULL(hweight_long(MSMON___LWD_VALUE));
case mpam_feat_msmon_mbwu_44counter:
return BIT_ULL(hweight_long(MSMON___L_VALUE));
case mpam_feat_msmon_mbwu_31counter:
return BIT_ULL(hweight_long(MSMON___VALUE));
default:
return 0;
}
}
static void __ris_msmon_read(void *arg)
{
u64 now;
bool nrdy = false;
bool config_mismatch;
bool overflow;
struct mon_read *m = arg;
struct mon_cfg *ctx = m->ctx;
bool reset_on_next_read = false;
struct mpam_msc_ris *ris = m->ris;
struct msmon_mbwu_state *mbwu_state;
struct mpam_props *rprops = &ris->props;
struct mpam_msc *msc = m->ris->vmsc->msc;
u32 mon_sel, ctl_val, flt_val, cur_ctl, cur_flt;
if (!mpam_mon_sel_lock(msc)) {
m->err = -EIO;
return;
}
mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, ctx->mon) |
FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx);
mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel);
switch (m->type) {
case mpam_feat_msmon_mbwu_31counter:
case mpam_feat_msmon_mbwu_44counter:
case mpam_feat_msmon_mbwu_63counter:
mbwu_state = &ris->mbwu_state[ctx->mon];
if (mbwu_state) {
reset_on_next_read = mbwu_state->reset_on_next_read;
mbwu_state->reset_on_next_read = false;
}
break;
default:
break;
}
/*
* Read the existing configuration to avoid re-writing the same values.
* This saves waiting for 'nrdy' on subsequent reads.
*/
read_msmon_ctl_flt_vals(m, &cur_ctl, &cur_flt);
if (mpam_feat_msmon_mbwu_31counter == m->type)
overflow = cur_ctl & MSMON_CFG_x_CTL_OFLOW_STATUS;
else if (mpam_feat_msmon_mbwu_44counter == m->type ||
mpam_feat_msmon_mbwu_63counter == m->type)
overflow = cur_ctl & MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L;
clean_msmon_ctl_val(&cur_ctl);
gen_msmon_ctl_flt_vals(m, &ctl_val, &flt_val);
config_mismatch = cur_flt != flt_val ||
cur_ctl != (ctl_val | MSMON_CFG_x_CTL_EN);
if (config_mismatch || reset_on_next_read) {
write_msmon_ctl_flt_vals(m, ctl_val, flt_val);
overflow = false;
} else if (overflow) {
mpam_write_monsel_reg(msc, CFG_MBWU_CTL,
cur_ctl &
~(MSMON_CFG_x_CTL_OFLOW_STATUS |
MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L));
}
switch (m->type) {
case mpam_feat_msmon_csu:
now = mpam_read_monsel_reg(msc, CSU);
if (mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, rprops))
nrdy = now & MSMON___NRDY;
now = FIELD_GET(MSMON___VALUE, now);
break;
case mpam_feat_msmon_mbwu_31counter:
case mpam_feat_msmon_mbwu_44counter:
case mpam_feat_msmon_mbwu_63counter:
if (m->type != mpam_feat_msmon_mbwu_31counter) {
now = mpam_msc_read_mbwu_l(msc);
if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops))
nrdy = now & MSMON___L_NRDY;
if (m->type == mpam_feat_msmon_mbwu_63counter)
now = FIELD_GET(MSMON___LWD_VALUE, now);
else
now = FIELD_GET(MSMON___L_VALUE, now);
} else {
now = mpam_read_monsel_reg(msc, MBWU);
if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops))
nrdy = now & MSMON___NRDY;
now = FIELD_GET(MSMON___VALUE, now);
}
if (nrdy)
break;
mbwu_state = &ris->mbwu_state[ctx->mon];
if (overflow)
mbwu_state->correction += mpam_msmon_overflow_val(m->type);
/*
* Include bandwidth consumed before the last hardware reset and
* a counter size increment for each overflow.
*/
now += mbwu_state->correction;
break;
default:
m->err = -EINVAL;
}
mpam_mon_sel_unlock(msc);
if (nrdy) {
m->err = -EBUSY;
return;
}
*m->val += now;
}
static int _msmon_read(struct mpam_component *comp, struct mon_read *arg)
{
int err, any_err = 0;
struct mpam_vmsc *vmsc;
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list,
srcu_read_lock_held(&mpam_srcu)) {
struct mpam_msc *msc = vmsc->msc;
struct mpam_msc_ris *ris;
list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list,
srcu_read_lock_held(&mpam_srcu)) {
arg->ris = ris;
err = smp_call_function_any(&msc->accessibility,
__ris_msmon_read, arg,
true);
if (!err && arg->err)
err = arg->err;
/*
* Save one error to be returned to the caller, but
* keep reading counters so that get reprogrammed. On
* platforms with NRDY this lets us wait once.
*/
if (err)
any_err = err;
}
}
return any_err;
}
static enum mpam_device_features mpam_msmon_choose_counter(struct mpam_class *class)
{
struct mpam_props *cprops = &class->props;
if (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, cprops))
return mpam_feat_msmon_mbwu_63counter;
if (mpam_has_feature(mpam_feat_msmon_mbwu_44counter, cprops))
return mpam_feat_msmon_mbwu_44counter;
return mpam_feat_msmon_mbwu_31counter;
}
int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx,
enum mpam_device_features type, u64 *val)
{
int err;
struct mon_read arg;
u64 wait_jiffies = 0;
struct mpam_class *class = comp->class;
struct mpam_props *cprops = &class->props;
might_sleep();
if (!mpam_is_enabled())
return -EIO;
if (!mpam_has_feature(type, cprops))
return -EOPNOTSUPP;
if (type == mpam_feat_msmon_mbwu)
type = mpam_msmon_choose_counter(class);
arg = (struct mon_read) {
.ctx = ctx,
.type = type,
.val = val,
};
*val = 0;
err = _msmon_read(comp, &arg);
if (err == -EBUSY && class->nrdy_usec)
wait_jiffies = usecs_to_jiffies(class->nrdy_usec);
while (wait_jiffies)
wait_jiffies = schedule_timeout_uninterruptible(wait_jiffies);
if (err == -EBUSY) {
arg = (struct mon_read) {
.ctx = ctx,
.type = type,
.val = val,
};
*val = 0;
err = _msmon_read(comp, &arg);
}
return err;
}
void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx)
{
struct mpam_msc *msc;
struct mpam_vmsc *vmsc;
struct mpam_msc_ris *ris;
if (!mpam_is_enabled())
return;
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list,
srcu_read_lock_held(&mpam_srcu)) {
if (!mpam_has_feature(mpam_feat_msmon_mbwu, &vmsc->props))
continue;
msc = vmsc->msc;
list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list,
srcu_read_lock_held(&mpam_srcu)) {
if (!mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props))
continue;
if (WARN_ON_ONCE(!mpam_mon_sel_lock(msc)))
continue;
ris->mbwu_state[ctx->mon].correction = 0;
ris->mbwu_state[ctx->mon].reset_on_next_read = true;
mpam_mon_sel_unlock(msc);
}
}
}
2025-11-19 12:22:48 +00:00
static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd)
{
u32 num_words, msb;
u32 bm = ~0;
int i;
lockdep_assert_held(&msc->part_sel_lock);
if (wd == 0)
return;
/*
* Write all ~0 to all but the last 32bit-word, which may
* have fewer bits...
*/
num_words = DIV_ROUND_UP(wd, 32);
for (i = 0; i < num_words - 1; i++, reg += sizeof(bm))
__mpam_write_reg(msc, reg, bm);
/*
* ....and then the last (maybe) partial 32bit word. When wd is a
* multiple of 32, msb should be 31 to write a full 32bit word.
*/
msb = (wd - 1) % 32;
bm = GENMASK(msb, 0);
__mpam_write_reg(msc, reg, bm);
}
/* Called via IPI. Call while holding an SRCU reference */
static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid,
struct mpam_config *cfg)
2025-11-19 12:22:48 +00:00
{
u32 pri_val = 0;
u16 cmax = MPAMCFG_CMAX_CMAX;
2025-11-19 12:22:48 +00:00
struct mpam_msc *msc = ris->vmsc->msc;
struct mpam_props *rprops = &ris->props;
u16 dspri = GENMASK(rprops->dspri_wd, 0);
u16 intpri = GENMASK(rprops->intpri_wd, 0);
2025-11-19 12:22:48 +00:00
mutex_lock(&msc->part_sel_lock);
__mpam_part_sel(ris->ris_idx, partid, msc);
if (mpam_has_feature(mpam_feat_partid_nrw, rprops)) {
/* Update the intpartid mapping */
mpam_write_partsel_reg(msc, INTPARTID,
MPAMCFG_INTPARTID_INTERNAL | partid);
/*
* Then switch to the 'internal' partid to update the
* configuration.
*/
__mpam_intpart_sel(ris->ris_idx, partid, msc);
}
if (mpam_has_feature(mpam_feat_cpor_part, rprops) &&
mpam_has_feature(mpam_feat_cpor_part, cfg)) {
if (cfg->reset_cpbm)
mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, rprops->cpbm_wd);
else
mpam_write_partsel_reg(msc, CPBM, cfg->cpbm);
}
2025-11-19 12:22:48 +00:00
if (mpam_has_feature(mpam_feat_mbw_part, rprops) &&
mpam_has_feature(mpam_feat_mbw_part, cfg)) {
if (cfg->reset_mbw_pbm)
mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM, rprops->mbw_pbm_bits);
else
mpam_write_partsel_reg(msc, MBW_PBM, cfg->mbw_pbm);
}
2025-11-19 12:22:48 +00:00
if (mpam_has_feature(mpam_feat_mbw_min, rprops) &&
mpam_has_feature(mpam_feat_mbw_min, cfg))
2025-11-19 12:22:48 +00:00
mpam_write_partsel_reg(msc, MBW_MIN, 0);
if (mpam_has_feature(mpam_feat_mbw_max, rprops) &&
mpam_has_feature(mpam_feat_mbw_max, cfg)) {
if (cfg->reset_mbw_max)
mpam_write_partsel_reg(msc, MBW_MAX, MPAMCFG_MBW_MAX_MAX);
else
mpam_write_partsel_reg(msc, MBW_MAX, cfg->mbw_max);
}
2025-11-19 12:22:48 +00:00
if (mpam_has_feature(mpam_feat_mbw_prop, rprops) &&
mpam_has_feature(mpam_feat_mbw_prop, cfg))
mpam_write_partsel_reg(msc, MBW_PROP, 0);
if (mpam_has_feature(mpam_feat_cmax_cmax, rprops))
mpam_write_partsel_reg(msc, CMAX, cmax);
if (mpam_has_feature(mpam_feat_cmax_cmin, rprops))
mpam_write_partsel_reg(msc, CMIN, 0);
if (mpam_has_feature(mpam_feat_cmax_cassoc, rprops))
mpam_write_partsel_reg(msc, CASSOC, MPAMCFG_CASSOC_CASSOC);
if (mpam_has_feature(mpam_feat_intpri_part, rprops) ||
mpam_has_feature(mpam_feat_dspri_part, rprops)) {
/* aces high? */
if (!mpam_has_feature(mpam_feat_intpri_part_0_low, rprops))
intpri = 0;
if (!mpam_has_feature(mpam_feat_dspri_part_0_low, rprops))
dspri = 0;
if (mpam_has_feature(mpam_feat_intpri_part, rprops))
pri_val |= FIELD_PREP(MPAMCFG_PRI_INTPRI, intpri);
if (mpam_has_feature(mpam_feat_dspri_part, rprops))
pri_val |= FIELD_PREP(MPAMCFG_PRI_DSPRI, dspri);
mpam_write_partsel_reg(msc, PRI, pri_val);
}
2025-11-19 12:22:48 +00:00
mutex_unlock(&msc->part_sel_lock);
}
/* Call with msc cfg_lock held */
static int mpam_restore_mbwu_state(void *_ris)
{
int i;
struct mon_read mwbu_arg;
struct mpam_msc_ris *ris = _ris;
struct mpam_class *class = ris->vmsc->comp->class;
for (i = 0; i < ris->props.num_mbwu_mon; i++) {
if (ris->mbwu_state[i].enabled) {
mwbu_arg.ris = ris;
mwbu_arg.ctx = &ris->mbwu_state[i].cfg;
mwbu_arg.type = mpam_msmon_choose_counter(class);
__ris_msmon_read(&mwbu_arg);
}
}
return 0;
}
/* Call with MSC cfg_lock held */
static int mpam_save_mbwu_state(void *arg)
{
int i;
u64 val;
struct mon_cfg *cfg;
u32 cur_flt, cur_ctl, mon_sel;
struct mpam_msc_ris *ris = arg;
struct msmon_mbwu_state *mbwu_state;
struct mpam_msc *msc = ris->vmsc->msc;
for (i = 0; i < ris->props.num_mbwu_mon; i++) {
mbwu_state = &ris->mbwu_state[i];
cfg = &mbwu_state->cfg;
if (WARN_ON_ONCE(!mpam_mon_sel_lock(msc)))
return -EIO;
mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, i) |
FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx);
mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel);
cur_flt = mpam_read_monsel_reg(msc, CFG_MBWU_FLT);
cur_ctl = mpam_read_monsel_reg(msc, CFG_MBWU_CTL);
mpam_write_monsel_reg(msc, CFG_MBWU_CTL, 0);
if (mpam_ris_has_mbwu_long_counter(ris)) {
val = mpam_msc_read_mbwu_l(msc);
mpam_msc_zero_mbwu_l(msc);
} else {
val = mpam_read_monsel_reg(msc, MBWU);
mpam_write_monsel_reg(msc, MBWU, 0);
}
cfg->mon = i;
cfg->pmg = FIELD_GET(MSMON_CFG_x_FLT_PMG, cur_flt);
cfg->match_pmg = FIELD_GET(MSMON_CFG_x_CTL_MATCH_PMG, cur_ctl);
cfg->partid = FIELD_GET(MSMON_CFG_x_FLT_PARTID, cur_flt);
mbwu_state->correction += val;
mbwu_state->enabled = FIELD_GET(MSMON_CFG_x_CTL_EN, cur_ctl);
mpam_mon_sel_unlock(msc);
}
return 0;
}
static void mpam_init_reset_cfg(struct mpam_config *reset_cfg)
{
*reset_cfg = (struct mpam_config) {
.reset_cpbm = true,
.reset_mbw_pbm = true,
.reset_mbw_max = true,
};
bitmap_fill(reset_cfg->features, MPAM_FEATURE_LAST);
}
/*
* Called via smp_call_on_cpu() to prevent migration, while still being
* pre-emptible. Caller must hold mpam_srcu.
*/
static int mpam_reset_ris(void *arg)
2025-11-19 12:22:48 +00:00
{
u16 partid, partid_max;
struct mpam_config reset_cfg;
struct mpam_msc_ris *ris = arg;
2025-11-19 12:22:48 +00:00
if (ris->in_reset_state)
return 0;
2025-11-19 12:22:48 +00:00
mpam_init_reset_cfg(&reset_cfg);
2025-11-19 12:22:48 +00:00
spin_lock(&partid_max_lock);
partid_max = mpam_partid_max;
spin_unlock(&partid_max_lock);
for (partid = 0; partid <= partid_max; partid++)
mpam_reprogram_ris_partid(ris, partid, &reset_cfg);
return 0;
}
/*
* Get the preferred CPU for this MSC. If it is accessible from this CPU,
* this CPU is preferred. This can be preempted/migrated, it will only result
* in more work.
*/
static int mpam_get_msc_preferred_cpu(struct mpam_msc *msc)
{
int cpu = raw_smp_processor_id();
if (cpumask_test_cpu(cpu, &msc->accessibility))
return cpu;
return cpumask_first_and(&msc->accessibility, cpu_online_mask);
}
static int mpam_touch_msc(struct mpam_msc *msc, int (*fn)(void *a), void *arg)
{
lockdep_assert_irqs_enabled();
lockdep_assert_cpus_held();
WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu)));
return smp_call_on_cpu(mpam_get_msc_preferred_cpu(msc), fn, arg, true);
2025-11-19 12:22:48 +00:00
}
struct mpam_write_config_arg {
struct mpam_msc_ris *ris;
struct mpam_component *comp;
u16 partid;
};
static int __write_config(void *arg)
{
struct mpam_write_config_arg *c = arg;
mpam_reprogram_ris_partid(c->ris, c->partid, &c->comp->cfg[c->partid]);
return 0;
}
static void mpam_reprogram_msc(struct mpam_msc *msc)
2025-11-19 12:22:48 +00:00
{
u16 partid;
bool reset;
struct mpam_config *cfg;
2025-11-19 12:22:48 +00:00
struct mpam_msc_ris *ris;
struct mpam_write_config_arg arg;
/*
* No lock for mpam_partid_max as partid_max_published has been
* set by mpam_enabled(), so the values can no longer change.
*/
mpam_assert_partid_sizes_fixed();
2025-11-19 12:22:48 +00:00
mutex_lock(&msc->cfg_lock);
list_for_each_entry_srcu(ris, &msc->ris, msc_list,
srcu_read_lock_held(&mpam_srcu)) {
if (!mpam_is_enabled() && !ris->in_reset_state) {
mpam_touch_msc(msc, &mpam_reset_ris, ris);
ris->in_reset_state = true;
continue;
}
2025-11-19 12:22:48 +00:00
arg.comp = ris->vmsc->comp;
arg.ris = ris;
reset = true;
for (partid = 0; partid <= mpam_partid_max; partid++) {
cfg = &ris->vmsc->comp->cfg[partid];
if (!bitmap_empty(cfg->features, MPAM_FEATURE_LAST))
reset = false;
arg.partid = partid;
mpam_touch_msc(msc, __write_config, &arg);
}
ris->in_reset_state = reset;
if (mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props))
mpam_touch_msc(msc, &mpam_restore_mbwu_state, ris);
2025-11-19 12:22:48 +00:00
}
mutex_unlock(&msc->cfg_lock);
2025-11-19 12:22:48 +00:00
}
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
static void _enable_percpu_irq(void *_irq)
{
int *irq = _irq;
enable_percpu_irq(*irq, IRQ_TYPE_NONE);
}
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
static int mpam_cpu_online(unsigned int cpu)
{
2025-11-19 12:22:48 +00:00
struct mpam_msc *msc;
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
srcu_read_lock_held(&mpam_srcu)) {
if (!cpumask_test_cpu(cpu, &msc->accessibility))
continue;
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
if (msc->reenable_error_ppi)
_enable_percpu_irq(&msc->reenable_error_ppi);
2025-11-19 12:22:48 +00:00
if (atomic_fetch_inc(&msc->online_refs) == 0)
mpam_reprogram_msc(msc);
2025-11-19 12:22:48 +00:00
}
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
return 0;
}
/* Before mpam is enabled, try to probe new MSC */
static int mpam_discovery_cpu_online(unsigned int cpu)
{
int err = 0;
struct mpam_msc *msc;
bool new_device_probed = false;
if (mpam_is_enabled())
return 0;
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
srcu_read_lock_held(&mpam_srcu)) {
if (!cpumask_test_cpu(cpu, &msc->accessibility))
continue;
mutex_lock(&msc->probe_lock);
if (!msc->probed)
err = mpam_msc_hw_probe(msc);
mutex_unlock(&msc->probe_lock);
if (err)
break;
new_device_probed = true;
}
if (new_device_probed && !err)
schedule_work(&mpam_enable_work);
if (err) {
mpam_disable_reason = "error during probing";
schedule_work(&mpam_broken_work);
}
return err;
}
static int mpam_cpu_offline(unsigned int cpu)
{
2025-11-19 12:22:48 +00:00
struct mpam_msc *msc;
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
srcu_read_lock_held(&mpam_srcu)) {
if (!cpumask_test_cpu(cpu, &msc->accessibility))
continue;
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
if (msc->reenable_error_ppi)
disable_percpu_irq(msc->reenable_error_ppi);
if (atomic_dec_and_test(&msc->online_refs)) {
struct mpam_msc_ris *ris;
mutex_lock(&msc->cfg_lock);
list_for_each_entry_srcu(ris, &msc->ris, msc_list,
srcu_read_lock_held(&mpam_srcu)) {
mpam_touch_msc(msc, &mpam_reset_ris, ris);
/*
* The reset state for non-zero partid may be
* lost while the CPUs are offline.
*/
ris->in_reset_state = false;
if (mpam_is_enabled())
mpam_touch_msc(msc, &mpam_save_mbwu_state, ris);
}
mutex_unlock(&msc->cfg_lock);
}
2025-11-19 12:22:48 +00:00
}
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
return 0;
}
static void mpam_register_cpuhp_callbacks(int (*online)(unsigned int online),
int (*offline)(unsigned int offline),
char *name)
{
mutex_lock(&mpam_cpuhp_state_lock);
if (mpam_cpuhp_state) {
cpuhp_remove_state(mpam_cpuhp_state);
mpam_cpuhp_state = 0;
}
mpam_cpuhp_state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, name, online,
offline);
if (mpam_cpuhp_state <= 0) {
pr_err("Failed to register cpuhp callbacks");
mpam_cpuhp_state = 0;
}
mutex_unlock(&mpam_cpuhp_state_lock);
}
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
static int __setup_ppi(struct mpam_msc *msc)
{
int cpu;
msc->error_dev_id = alloc_percpu(struct mpam_msc *);
if (!msc->error_dev_id)
return -ENOMEM;
for_each_cpu(cpu, &msc->accessibility)
*per_cpu_ptr(msc->error_dev_id, cpu) = msc;
return 0;
}
static int mpam_msc_setup_error_irq(struct mpam_msc *msc)
{
int irq;
irq = platform_get_irq_byname_optional(msc->pdev, "error");
if (irq <= 0)
return 0;
/* Allocate and initialise the percpu device pointer for PPI */
if (irq_is_percpu(irq))
return __setup_ppi(msc);
/* sanity check: shared interrupts can be routed anywhere? */
if (!cpumask_equal(&msc->accessibility, cpu_possible_mask)) {
pr_err_once("msc:%u is a private resource with a shared error interrupt",
msc->id);
return -EINVAL;
}
return 0;
}
/*
* An MSC can control traffic from a set of CPUs, but may only be accessible
* from a (hopefully wider) set of CPUs. The common reason for this is power
* management. If all the CPUs in a cluster are in PSCI:CPU_SUSPEND, the
* corresponding cache may also be powered off. By making accesses from
* one of those CPUs, we ensure we don't access a cache that's powered off.
*/
static void update_msc_accessibility(struct mpam_msc *msc)
{
u32 affinity_id;
int err;
err = device_property_read_u32(&msc->pdev->dev, "cpu_affinity",
&affinity_id);
if (err)
cpumask_copy(&msc->accessibility, cpu_possible_mask);
else
acpi_pptt_get_cpus_from_container(affinity_id, &msc->accessibility);
}
/*
* There are two ways of reaching a struct mpam_msc_ris. Via the
* class->component->vmsc->ris, or via the msc.
* When destroying the msc, the other side needs unlinking and cleaning up too.
*/
static void mpam_msc_destroy(struct mpam_msc *msc)
{
struct platform_device *pdev = msc->pdev;
struct mpam_msc_ris *ris, *tmp;
lockdep_assert_held(&mpam_list_lock);
list_for_each_entry_safe(ris, tmp, &msc->ris, msc_list)
mpam_ris_destroy(ris);
list_del_rcu(&msc->all_msc_list);
platform_set_drvdata(pdev, NULL);
add_to_garbage(msc);
}
static void mpam_msc_drv_remove(struct platform_device *pdev)
{
struct mpam_msc *msc = platform_get_drvdata(pdev);
mutex_lock(&mpam_list_lock);
mpam_msc_destroy(msc);
mutex_unlock(&mpam_list_lock);
mpam_free_garbage();
}
static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev)
{
int err;
u32 tmp;
struct mpam_msc *msc;
struct resource *msc_res;
struct device *dev = &pdev->dev;
lockdep_assert_held(&mpam_list_lock);
msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL);
if (!msc)
return ERR_PTR(-ENOMEM);
init_garbage(&msc->garbage);
msc->garbage.pdev = pdev;
err = devm_mutex_init(dev, &msc->probe_lock);
if (err)
return ERR_PTR(err);
err = devm_mutex_init(dev, &msc->part_sel_lock);
if (err)
return ERR_PTR(err);
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
err = devm_mutex_init(dev, &msc->error_irq_lock);
if (err)
return ERR_PTR(err);
err = devm_mutex_init(dev, &msc->cfg_lock);
if (err)
return ERR_PTR(err);
arm_mpam: Add helpers for managing the locking around the mon_sel registers The MSC MON_SEL register needs to be accessed from hardirq for the overflow interrupt, and when taking an IPI to access these registers on platforms where MSC are not accessible from every CPU. This makes an irqsave spinlock the obvious lock to protect these registers. On systems with SCMI or PCC mailboxes it must be able to sleep, meaning a mutex must be used. The SCMI or PCC platforms can't support an overflow interrupt, and can't access the registers from hardirq context. Clearly these two can't exist for one MSC at the same time. Add helpers for the MON_SEL locking. For now, use a irqsave spinlock and only support 'real' MMIO platforms. In the future this lock will be split in two allowing SCMI/PCC platforms to take a mutex. Because there are contexts where the SCMI/PCC platforms can't make an access, mpam_mon_sel_lock() needs to be able to fail. Do this now, so that all the error handling on these paths is present. This allows the relevant paths to fail if they are needed on a platform where this isn't possible, instead of having to make explicit checks of the interface type. Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:45 +00:00
mpam_mon_sel_lock_init(msc);
msc->id = pdev->id;
msc->pdev = pdev;
INIT_LIST_HEAD_RCU(&msc->all_msc_list);
INIT_LIST_HEAD_RCU(&msc->ris);
update_msc_accessibility(msc);
if (cpumask_empty(&msc->accessibility)) {
dev_err_once(dev, "MSC is not accessible from any CPU!");
return ERR_PTR(-EINVAL);
}
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
err = mpam_msc_setup_error_irq(msc);
if (err)
return ERR_PTR(err);
if (device_property_read_u32(&pdev->dev, "pcc-channel", &tmp))
msc->iface = MPAM_IFACE_MMIO;
else
msc->iface = MPAM_IFACE_PCC;
if (msc->iface == MPAM_IFACE_MMIO) {
void __iomem *io;
io = devm_platform_get_and_ioremap_resource(pdev, 0,
&msc_res);
if (IS_ERR(io)) {
dev_err_once(dev, "Failed to map MSC base address\n");
return ERR_CAST(io);
}
msc->mapped_hwpage_sz = msc_res->end - msc_res->start;
msc->mapped_hwpage = io;
} else {
return ERR_PTR(-EINVAL);
}
list_add_rcu(&msc->all_msc_list, &mpam_all_msc);
platform_set_drvdata(pdev, msc);
return msc;
}
static int fw_num_msc;
static int mpam_msc_drv_probe(struct platform_device *pdev)
{
int err;
struct mpam_msc *msc = NULL;
void *plat_data = pdev->dev.platform_data;
mutex_lock(&mpam_list_lock);
msc = do_mpam_msc_drv_probe(pdev);
mutex_unlock(&mpam_list_lock);
if (IS_ERR(msc))
return PTR_ERR(msc);
/* Create RIS entries described by firmware */
err = acpi_mpam_parse_resources(msc, plat_data);
if (err) {
mpam_msc_drv_remove(pdev);
return err;
}
if (atomic_add_return(1, &mpam_num_msc) == fw_num_msc)
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
mpam_register_cpuhp_callbacks(mpam_discovery_cpu_online, NULL,
"mpam:drv_probe");
return 0;
}
static struct platform_driver mpam_msc_driver = {
.driver = {
.name = "mpam_msc",
},
.probe = mpam_msc_drv_probe,
.remove = mpam_msc_drv_remove,
};
/* Any of these features mean the BWA_WD field is valid. */
static bool mpam_has_bwa_wd_feature(struct mpam_props *props)
{
if (mpam_has_feature(mpam_feat_mbw_min, props))
return true;
if (mpam_has_feature(mpam_feat_mbw_max, props))
return true;
if (mpam_has_feature(mpam_feat_mbw_prop, props))
return true;
return false;
}
/* Any of these features mean the CMAX_WD field is valid. */
static bool mpam_has_cmax_wd_feature(struct mpam_props *props)
{
if (mpam_has_feature(mpam_feat_cmax_cmax, props))
return true;
if (mpam_has_feature(mpam_feat_cmax_cmin, props))
return true;
return false;
}
#define MISMATCHED_HELPER(parent, child, helper, field, alias) \
helper(parent) && \
((helper(child) && (parent)->field != (child)->field) || \
(!helper(child) && !(alias)))
#define MISMATCHED_FEAT(parent, child, feat, field, alias) \
mpam_has_feature((feat), (parent)) && \
((mpam_has_feature((feat), (child)) && (parent)->field != (child)->field) || \
(!mpam_has_feature((feat), (child)) && !(alias)))
#define CAN_MERGE_FEAT(parent, child, feat, alias) \
(alias) && !mpam_has_feature((feat), (parent)) && \
mpam_has_feature((feat), (child))
/*
* Combine two props fields.
* If this is for controls that alias the same resource, it is safe to just
* copy the values over. If two aliasing controls implement the same scheme
* a safe value must be picked.
* For non-aliasing controls, these control different resources, and the
* resulting safe value must be compatible with both. When merging values in
* the tree, all the aliasing resources must be handled first.
* On mismatch, parent is modified.
*/
static void __props_mismatch(struct mpam_props *parent,
struct mpam_props *child, bool alias)
{
if (CAN_MERGE_FEAT(parent, child, mpam_feat_cpor_part, alias)) {
parent->cpbm_wd = child->cpbm_wd;
} else if (MISMATCHED_FEAT(parent, child, mpam_feat_cpor_part,
cpbm_wd, alias)) {
pr_debug("cleared cpor_part\n");
mpam_clear_feature(mpam_feat_cpor_part, parent);
parent->cpbm_wd = 0;
}
if (CAN_MERGE_FEAT(parent, child, mpam_feat_mbw_part, alias)) {
parent->mbw_pbm_bits = child->mbw_pbm_bits;
} else if (MISMATCHED_FEAT(parent, child, mpam_feat_mbw_part,
mbw_pbm_bits, alias)) {
pr_debug("cleared mbw_part\n");
mpam_clear_feature(mpam_feat_mbw_part, parent);
parent->mbw_pbm_bits = 0;
}
/* bwa_wd is a count of bits, fewer bits means less precision */
if (alias && !mpam_has_bwa_wd_feature(parent) &&
mpam_has_bwa_wd_feature(child)) {
parent->bwa_wd = child->bwa_wd;
} else if (MISMATCHED_HELPER(parent, child, mpam_has_bwa_wd_feature,
bwa_wd, alias)) {
pr_debug("took the min bwa_wd\n");
parent->bwa_wd = min(parent->bwa_wd, child->bwa_wd);
}
if (alias && !mpam_has_cmax_wd_feature(parent) && mpam_has_cmax_wd_feature(child)) {
parent->cmax_wd = child->cmax_wd;
} else if (MISMATCHED_HELPER(parent, child, mpam_has_cmax_wd_feature,
cmax_wd, alias)) {
pr_debug("%s took the min cmax_wd\n", __func__);
parent->cmax_wd = min(parent->cmax_wd, child->cmax_wd);
}
if (CAN_MERGE_FEAT(parent, child, mpam_feat_cmax_cassoc, alias)) {
parent->cassoc_wd = child->cassoc_wd;
} else if (MISMATCHED_FEAT(parent, child, mpam_feat_cmax_cassoc,
cassoc_wd, alias)) {
pr_debug("%s cleared cassoc_wd\n", __func__);
mpam_clear_feature(mpam_feat_cmax_cassoc, parent);
parent->cassoc_wd = 0;
}
/* For num properties, take the minimum */
if (CAN_MERGE_FEAT(parent, child, mpam_feat_msmon_csu, alias)) {
parent->num_csu_mon = child->num_csu_mon;
} else if (MISMATCHED_FEAT(parent, child, mpam_feat_msmon_csu,
num_csu_mon, alias)) {
pr_debug("took the min num_csu_mon\n");
parent->num_csu_mon = min(parent->num_csu_mon,
child->num_csu_mon);
}
if (CAN_MERGE_FEAT(parent, child, mpam_feat_msmon_mbwu, alias)) {
parent->num_mbwu_mon = child->num_mbwu_mon;
} else if (MISMATCHED_FEAT(parent, child, mpam_feat_msmon_mbwu,
num_mbwu_mon, alias)) {
pr_debug("took the min num_mbwu_mon\n");
parent->num_mbwu_mon = min(parent->num_mbwu_mon,
child->num_mbwu_mon);
}
if (CAN_MERGE_FEAT(parent, child, mpam_feat_intpri_part, alias)) {
parent->intpri_wd = child->intpri_wd;
} else if (MISMATCHED_FEAT(parent, child, mpam_feat_intpri_part,
intpri_wd, alias)) {
pr_debug("%s took the min intpri_wd\n", __func__);
parent->intpri_wd = min(parent->intpri_wd, child->intpri_wd);
}
if (CAN_MERGE_FEAT(parent, child, mpam_feat_dspri_part, alias)) {
parent->dspri_wd = child->dspri_wd;
} else if (MISMATCHED_FEAT(parent, child, mpam_feat_dspri_part,
dspri_wd, alias)) {
pr_debug("%s took the min dspri_wd\n", __func__);
parent->dspri_wd = min(parent->dspri_wd, child->dspri_wd);
}
/* TODO: alias support for these two */
/* {int,ds}pri may not have differing 0-low behaviour */
if (mpam_has_feature(mpam_feat_intpri_part, parent) &&
(!mpam_has_feature(mpam_feat_intpri_part, child) ||
mpam_has_feature(mpam_feat_intpri_part_0_low, parent) !=
mpam_has_feature(mpam_feat_intpri_part_0_low, child))) {
pr_debug("%s cleared intpri_part\n", __func__);
mpam_clear_feature(mpam_feat_intpri_part, parent);
mpam_clear_feature(mpam_feat_intpri_part_0_low, parent);
}
if (mpam_has_feature(mpam_feat_dspri_part, parent) &&
(!mpam_has_feature(mpam_feat_dspri_part, child) ||
mpam_has_feature(mpam_feat_dspri_part_0_low, parent) !=
mpam_has_feature(mpam_feat_dspri_part_0_low, child))) {
pr_debug("%s cleared dspri_part\n", __func__);
mpam_clear_feature(mpam_feat_dspri_part, parent);
mpam_clear_feature(mpam_feat_dspri_part_0_low, parent);
}
if (alias) {
/* Merge features for aliased resources */
bitmap_or(parent->features, parent->features, child->features, MPAM_FEATURE_LAST);
} else {
/* Clear missing features for non aliasing */
bitmap_and(parent->features, parent->features, child->features, MPAM_FEATURE_LAST);
}
}
/*
* If a vmsc doesn't match class feature/configuration, do the right thing(tm).
* For 'num' properties we can just take the minimum.
* For properties where the mismatched unused bits would make a difference, we
* nobble the class feature, as we can't configure all the resources.
* e.g. The L3 cache is composed of two resources with 13 and 17 portion
* bitmaps respectively.
*/
static void
__class_props_mismatch(struct mpam_class *class, struct mpam_vmsc *vmsc)
{
struct mpam_props *cprops = &class->props;
struct mpam_props *vprops = &vmsc->props;
struct device *dev = &vmsc->msc->pdev->dev;
lockdep_assert_held(&mpam_list_lock); /* we modify class */
dev_dbg(dev, "Merging features for class:0x%lx &= vmsc:0x%lx\n",
(long)cprops->features, (long)vprops->features);
/* Take the safe value for any common features */
__props_mismatch(cprops, vprops, false);
}
static void
__vmsc_props_mismatch(struct mpam_vmsc *vmsc, struct mpam_msc_ris *ris)
{
struct mpam_props *rprops = &ris->props;
struct mpam_props *vprops = &vmsc->props;
struct device *dev = &vmsc->msc->pdev->dev;
lockdep_assert_held(&mpam_list_lock); /* we modify vmsc */
dev_dbg(dev, "Merging features for vmsc:0x%lx |= ris:0x%lx\n",
(long)vprops->features, (long)rprops->features);
/*
* Merge mismatched features - Copy any features that aren't common,
* but take the safe value for any common features.
*/
__props_mismatch(vprops, rprops, true);
}
/*
* Copy the first component's first vMSC's properties and features to the
* class. __class_props_mismatch() will remove conflicts.
* It is not possible to have a class with no components, or a component with
* no resources. The vMSC properties have already been built.
*/
static void mpam_enable_init_class_features(struct mpam_class *class)
{
struct mpam_vmsc *vmsc;
struct mpam_component *comp;
comp = list_first_entry(&class->components,
struct mpam_component, class_list);
vmsc = list_first_entry(&comp->vmsc,
struct mpam_vmsc, comp_list);
class->props = vmsc->props;
}
static void mpam_enable_merge_vmsc_features(struct mpam_component *comp)
{
struct mpam_vmsc *vmsc;
struct mpam_msc_ris *ris;
struct mpam_class *class = comp->class;
list_for_each_entry(vmsc, &comp->vmsc, comp_list) {
list_for_each_entry(ris, &vmsc->ris, vmsc_list) {
__vmsc_props_mismatch(vmsc, ris);
class->nrdy_usec = max(class->nrdy_usec,
vmsc->msc->nrdy_usec);
}
}
}
static void mpam_enable_merge_class_features(struct mpam_component *comp)
{
struct mpam_vmsc *vmsc;
struct mpam_class *class = comp->class;
list_for_each_entry(vmsc, &comp->vmsc, comp_list)
__class_props_mismatch(class, vmsc);
}
/*
* Merge all the common resource features into class.
* vmsc features are bitwise-or'd together by mpam_enable_merge_vmsc_features()
* as the first step so that mpam_enable_init_class_features() can initialise
* the class with a representative set of features.
* Next the mpam_enable_merge_class_features() bitwise-and's all the vmsc
* features to form the class features.
* Other features are the min/max as appropriate.
*
* To avoid walking the whole tree twice, the class->nrdy_usec property is
* updated when working with the vmsc as it is a max(), and doesn't need
* initialising first.
*/
static void mpam_enable_merge_features(struct list_head *all_classes_list)
{
struct mpam_class *class;
struct mpam_component *comp;
lockdep_assert_held(&mpam_list_lock);
list_for_each_entry(class, all_classes_list, classes_list) {
list_for_each_entry(comp, &class->components, class_list)
mpam_enable_merge_vmsc_features(comp);
mpam_enable_init_class_features(class);
list_for_each_entry(comp, &class->components, class_list)
mpam_enable_merge_class_features(comp);
}
}
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
static char *mpam_errcode_names[16] = {
[MPAM_ERRCODE_NONE] = "No error",
[MPAM_ERRCODE_PARTID_SEL_RANGE] = "PARTID_SEL_Range",
[MPAM_ERRCODE_REQ_PARTID_RANGE] = "Req_PARTID_Range",
[MPAM_ERRCODE_MSMONCFG_ID_RANGE] = "MSMONCFG_ID_RANGE",
[MPAM_ERRCODE_REQ_PMG_RANGE] = "Req_PMG_Range",
[MPAM_ERRCODE_MONITOR_RANGE] = "Monitor_Range",
[MPAM_ERRCODE_INTPARTID_RANGE] = "intPARTID_Range",
[MPAM_ERRCODE_UNEXPECTED_INTERNAL] = "Unexpected_INTERNAL",
[MPAM_ERRCODE_UNDEFINED_RIS_PART_SEL] = "Undefined_RIS_PART_SEL",
[MPAM_ERRCODE_RIS_NO_CONTROL] = "RIS_No_Control",
[MPAM_ERRCODE_UNDEFINED_RIS_MON_SEL] = "Undefined_RIS_MON_SEL",
[MPAM_ERRCODE_RIS_NO_MONITOR] = "RIS_No_Monitor",
[12 ... 15] = "Reserved"
};
static int mpam_enable_msc_ecr(void *_msc)
{
struct mpam_msc *msc = _msc;
__mpam_write_reg(msc, MPAMF_ECR, MPAMF_ECR_INTEN);
return 0;
}
/* This can run in mpam_disable(), and the interrupt handler on the same CPU */
static int mpam_disable_msc_ecr(void *_msc)
{
struct mpam_msc *msc = _msc;
__mpam_write_reg(msc, MPAMF_ECR, 0);
return 0;
}
static irqreturn_t __mpam_irq_handler(int irq, struct mpam_msc *msc)
{
u64 reg;
u16 partid;
u8 errcode, pmg, ris;
if (WARN_ON_ONCE(!msc) ||
WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
&msc->accessibility)))
return IRQ_NONE;
reg = mpam_msc_read_esr(msc);
errcode = FIELD_GET(MPAMF_ESR_ERRCODE, reg);
if (!errcode)
return IRQ_NONE;
/* Clear level triggered irq */
mpam_msc_clear_esr(msc);
partid = FIELD_GET(MPAMF_ESR_PARTID_MON, reg);
pmg = FIELD_GET(MPAMF_ESR_PMG, reg);
ris = FIELD_GET(MPAMF_ESR_RIS, reg);
pr_err_ratelimited("error irq from msc:%u '%s', partid:%u, pmg: %u, ris: %u\n",
msc->id, mpam_errcode_names[errcode], partid, pmg,
ris);
/* Disable this interrupt. */
mpam_disable_msc_ecr(msc);
/* Are we racing with the thread disabling MPAM? */
if (!mpam_is_enabled())
return IRQ_HANDLED;
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
/*
* Schedule the teardown work. Don't use a threaded IRQ as we can't
* unregister the interrupt from the threaded part of the handler.
*/
mpam_disable_reason = "hardware error interrupt";
schedule_work(&mpam_broken_work);
return IRQ_HANDLED;
}
static irqreturn_t mpam_ppi_handler(int irq, void *dev_id)
{
struct mpam_msc *msc = *(struct mpam_msc **)dev_id;
return __mpam_irq_handler(irq, msc);
}
static irqreturn_t mpam_spi_handler(int irq, void *dev_id)
{
struct mpam_msc *msc = dev_id;
return __mpam_irq_handler(irq, msc);
}
static int mpam_register_irqs(void)
{
int err, irq;
struct mpam_msc *msc;
lockdep_assert_cpus_held();
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
srcu_read_lock_held(&mpam_srcu)) {
irq = platform_get_irq_byname_optional(msc->pdev, "error");
if (irq <= 0)
continue;
/* The MPAM spec says the interrupt can be SPI, PPI or LPI */
/* We anticipate sharing the interrupt with other MSCs */
if (irq_is_percpu(irq)) {
err = request_percpu_irq(irq, &mpam_ppi_handler,
"mpam:msc:error",
msc->error_dev_id);
if (err)
return err;
msc->reenable_error_ppi = irq;
smp_call_function_many(&msc->accessibility,
&_enable_percpu_irq, &irq,
true);
} else {
err = devm_request_irq(&msc->pdev->dev, irq,
&mpam_spi_handler, IRQF_SHARED,
"mpam:msc:error", msc);
if (err)
return err;
}
mutex_lock(&msc->error_irq_lock);
msc->error_irq_req = true;
mpam_touch_msc(msc, mpam_enable_msc_ecr, msc);
msc->error_irq_hw_enabled = true;
mutex_unlock(&msc->error_irq_lock);
}
return 0;
}
static void mpam_unregister_irqs(void)
{
int irq;
struct mpam_msc *msc;
guard(cpus_read_lock)();
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
srcu_read_lock_held(&mpam_srcu)) {
irq = platform_get_irq_byname_optional(msc->pdev, "error");
if (irq <= 0)
continue;
mutex_lock(&msc->error_irq_lock);
if (msc->error_irq_hw_enabled) {
mpam_touch_msc(msc, mpam_disable_msc_ecr, msc);
msc->error_irq_hw_enabled = false;
}
if (msc->error_irq_req) {
if (irq_is_percpu(irq)) {
msc->reenable_error_ppi = 0;
free_percpu_irq(irq, msc->error_dev_id);
} else {
devm_free_irq(&msc->pdev->dev, irq, msc);
}
msc->error_irq_req = false;
}
mutex_unlock(&msc->error_irq_lock);
}
}
static void __destroy_component_cfg(struct mpam_component *comp)
{
struct mpam_msc *msc;
struct mpam_vmsc *vmsc;
struct mpam_msc_ris *ris;
lockdep_assert_held(&mpam_list_lock);
add_to_garbage(comp->cfg);
list_for_each_entry(vmsc, &comp->vmsc, comp_list) {
msc = vmsc->msc;
if (mpam_mon_sel_lock(msc)) {
list_for_each_entry(ris, &vmsc->ris, vmsc_list)
add_to_garbage(ris->mbwu_state);
mpam_mon_sel_unlock(msc);
}
}
}
static void mpam_reset_component_cfg(struct mpam_component *comp)
{
int i;
struct mpam_props *cprops = &comp->class->props;
mpam_assert_partid_sizes_fixed();
if (!comp->cfg)
return;
for (i = 0; i <= mpam_partid_max; i++) {
comp->cfg[i] = (struct mpam_config) {};
if (cprops->cpbm_wd)
comp->cfg[i].cpbm = GENMASK(cprops->cpbm_wd - 1, 0);
if (cprops->mbw_pbm_bits)
comp->cfg[i].mbw_pbm = GENMASK(cprops->mbw_pbm_bits - 1, 0);
if (cprops->bwa_wd)
comp->cfg[i].mbw_max = GENMASK(15, 16 - cprops->bwa_wd);
}
}
static int __allocate_component_cfg(struct mpam_component *comp)
{
struct mpam_vmsc *vmsc;
mpam_assert_partid_sizes_fixed();
if (comp->cfg)
return 0;
comp->cfg = kcalloc(mpam_partid_max + 1, sizeof(*comp->cfg), GFP_KERNEL);
if (!comp->cfg)
return -ENOMEM;
/*
* The array is free()d in one go, so only cfg[0]'s structure needs
* to be initialised.
*/
init_garbage(&comp->cfg[0].garbage);
mpam_reset_component_cfg(comp);
list_for_each_entry(vmsc, &comp->vmsc, comp_list) {
struct mpam_msc *msc;
struct mpam_msc_ris *ris;
struct msmon_mbwu_state *mbwu_state;
if (!vmsc->props.num_mbwu_mon)
continue;
msc = vmsc->msc;
list_for_each_entry(ris, &vmsc->ris, vmsc_list) {
if (!ris->props.num_mbwu_mon)
continue;
mbwu_state = kcalloc(ris->props.num_mbwu_mon,
sizeof(*ris->mbwu_state),
GFP_KERNEL);
if (!mbwu_state) {
__destroy_component_cfg(comp);
return -ENOMEM;
}
init_garbage(&mbwu_state[0].garbage);
if (mpam_mon_sel_lock(msc)) {
ris->mbwu_state = mbwu_state;
mpam_mon_sel_unlock(msc);
}
}
}
return 0;
}
static int mpam_allocate_config(void)
{
struct mpam_class *class;
struct mpam_component *comp;
lockdep_assert_held(&mpam_list_lock);
list_for_each_entry(class, &mpam_classes, classes_list) {
list_for_each_entry(comp, &class->components, class_list) {
int err = __allocate_component_cfg(comp);
if (err)
return err;
}
}
return 0;
}
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
static void mpam_enable_once(void)
{
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
int err;
/*
* Once the cpuhp callbacks have been changed, mpam_partid_max can no
* longer change.
*/
spin_lock(&partid_max_lock);
partid_max_published = true;
spin_unlock(&partid_max_lock);
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
/*
* If all the MSC have been probed, enabling the IRQs happens next.
* That involves cross-calling to a CPU that can reach the MSC, and
* the locks must be taken in this order:
*/
cpus_read_lock();
mutex_lock(&mpam_list_lock);
do {
mpam_enable_merge_features(&mpam_classes);
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
err = mpam_register_irqs();
if (err) {
pr_warn("Failed to register irqs: %d\n", err);
break;
}
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
err = mpam_allocate_config();
if (err) {
pr_err("Failed to allocate configuration arrays.\n");
break;
}
} while (0);
mutex_unlock(&mpam_list_lock);
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
cpus_read_unlock();
if (err) {
mpam_disable_reason = "Failed to enable.";
schedule_work(&mpam_broken_work);
return;
}
static_branch_enable(&mpam_enabled);
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline,
"mpam:online");
/* Use printk() to avoid the pr_fmt adding the function name. */
printk(KERN_INFO "MPAM enabled with %u PARTIDs and %u PMGs\n",
mpam_partid_max + 1, mpam_pmg_max + 1);
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
}
static void mpam_reset_component_locked(struct mpam_component *comp)
{
struct mpam_vmsc *vmsc;
lockdep_assert_cpus_held();
mpam_assert_partid_sizes_fixed();
mpam_reset_component_cfg(comp);
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list,
srcu_read_lock_held(&mpam_srcu)) {
struct mpam_msc *msc = vmsc->msc;
struct mpam_msc_ris *ris;
list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list,
srcu_read_lock_held(&mpam_srcu)) {
if (!ris->in_reset_state)
mpam_touch_msc(msc, mpam_reset_ris, ris);
ris->in_reset_state = true;
}
}
}
static void mpam_reset_class_locked(struct mpam_class *class)
{
struct mpam_component *comp;
lockdep_assert_cpus_held();
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(comp, &class->components, class_list,
srcu_read_lock_held(&mpam_srcu))
mpam_reset_component_locked(comp);
}
static void mpam_reset_class(struct mpam_class *class)
{
cpus_read_lock();
mpam_reset_class_locked(class);
cpus_read_unlock();
}
/*
* Called in response to an error IRQ.
* All of MPAMs errors indicate a software bug, restore any modified
* controls to their reset values.
*/
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
void mpam_disable(struct work_struct *ignored)
{
int idx;
struct mpam_class *class;
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
struct mpam_msc *msc, *tmp;
mutex_lock(&mpam_cpuhp_state_lock);
if (mpam_cpuhp_state) {
cpuhp_remove_state(mpam_cpuhp_state);
mpam_cpuhp_state = 0;
}
mutex_unlock(&mpam_cpuhp_state_lock);
static_branch_disable(&mpam_enabled);
arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew <rohit.mathew@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Rohit Mathew <rohit.mathew@arm.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:51 +00:00
mpam_unregister_irqs();
idx = srcu_read_lock(&mpam_srcu);
list_for_each_entry_srcu(class, &mpam_classes, classes_list,
srcu_read_lock_held(&mpam_srcu))
mpam_reset_class(class);
srcu_read_unlock(&mpam_srcu, idx);
arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen <lecopzerc@nvidia.com> Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Fenghua Yu <fenghuay@nvidia.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-11-19 12:22:43 +00:00
mutex_lock(&mpam_list_lock);
list_for_each_entry_safe(msc, tmp, &mpam_all_msc, all_msc_list)
mpam_msc_destroy(msc);
mutex_unlock(&mpam_list_lock);
mpam_free_garbage();
pr_err_once("MPAM disabled due to %s\n", mpam_disable_reason);
}
/*
* Enable mpam once all devices have been probed.
* Scheduled by mpam_discovery_cpu_online() once all devices have been created.
* Also scheduled when new devices are probed when new CPUs come online.
*/
void mpam_enable(struct work_struct *work)
{
static atomic_t once;
struct mpam_msc *msc;
bool all_devices_probed = true;
/* Have we probed all the hw devices? */
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
srcu_read_lock_held(&mpam_srcu)) {
mutex_lock(&msc->probe_lock);
if (!msc->probed)
all_devices_probed = false;
mutex_unlock(&msc->probe_lock);
if (!all_devices_probed)
break;
}
if (all_devices_probed && !atomic_fetch_inc(&once))
mpam_enable_once();
}
#define maybe_update_config(cfg, feature, newcfg, member, changes) do { \
if (mpam_has_feature(feature, newcfg) && \
(newcfg)->member != (cfg)->member) { \
(cfg)->member = (newcfg)->member; \
mpam_set_feature(feature, cfg); \
\
(changes) = true; \
} \
} while (0)
static bool mpam_update_config(struct mpam_config *cfg,
const struct mpam_config *newcfg)
{
bool has_changes = false;
maybe_update_config(cfg, mpam_feat_cpor_part, newcfg, cpbm, has_changes);
maybe_update_config(cfg, mpam_feat_mbw_part, newcfg, mbw_pbm, has_changes);
maybe_update_config(cfg, mpam_feat_mbw_max, newcfg, mbw_max, has_changes);
return has_changes;
}
int mpam_apply_config(struct mpam_component *comp, u16 partid,
struct mpam_config *cfg)
{
struct mpam_write_config_arg arg;
struct mpam_msc_ris *ris;
struct mpam_vmsc *vmsc;
struct mpam_msc *msc;
lockdep_assert_cpus_held();
/* Don't pass in the current config! */
WARN_ON_ONCE(&comp->cfg[partid] == cfg);
if (!mpam_update_config(&comp->cfg[partid], cfg))
return 0;
arg.comp = comp;
arg.partid = partid;
guard(srcu)(&mpam_srcu);
list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list,
srcu_read_lock_held(&mpam_srcu)) {
msc = vmsc->msc;
mutex_lock(&msc->cfg_lock);
list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list,
srcu_read_lock_held(&mpam_srcu)) {
arg.ris = ris;
mpam_touch_msc(msc, __write_config, &arg);
}
mutex_unlock(&msc->cfg_lock);
}
return 0;
}
static int __init mpam_msc_driver_init(void)
{
if (!system_supports_mpam())
return -EOPNOTSUPP;
init_srcu_struct(&mpam_srcu);
fw_num_msc = acpi_mpam_count_msc();
if (fw_num_msc <= 0) {
pr_err("No MSC devices found in firmware\n");
return -EINVAL;
}
return platform_driver_register(&mpam_msc_driver);
}
/* Must occur after arm64_mpam_register_cpus() from arch_initcall() */
subsys_initcall(mpam_msc_driver_init);
#ifdef CONFIG_MPAM_KUNIT_TEST
#include "test_mpam_devices.c"
#endif