Centos-kernel-stream-9/drivers/infiniband/hw/mlx5/counters.c

1085 lines
30 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
*/
#include "mlx5_ib.h"
#include <linux/mlx5/eswitch.h>
#include <linux/mlx5/vport.h>
#include "counters.h"
#include "ib_rep.h"
#include "qp.h"
struct mlx5_ib_counter {
const char *name;
size_t offset;
u32 type;
};
#define INIT_Q_COUNTER(_name) \
{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
#define INIT_VPORT_Q_COUNTER(_name) \
{ .name = "vport_" #_name, .offset = \
MLX5_BYTE_OFF(query_q_counter_out, _name)}
static const struct mlx5_ib_counter basic_q_cnts[] = {
INIT_Q_COUNTER(rx_write_requests),
INIT_Q_COUNTER(rx_read_requests),
INIT_Q_COUNTER(rx_atomic_requests),
INIT_Q_COUNTER(rx_dct_connect),
INIT_Q_COUNTER(out_of_buffer),
};
static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
INIT_Q_COUNTER(out_of_sequence),
};
static const struct mlx5_ib_counter retrans_q_cnts[] = {
INIT_Q_COUNTER(duplicate_request),
INIT_Q_COUNTER(rnr_nak_retry_err),
INIT_Q_COUNTER(packet_seq_err),
INIT_Q_COUNTER(implied_nak_seq_err),
INIT_Q_COUNTER(local_ack_timeout_err),
};
static const struct mlx5_ib_counter vport_basic_q_cnts[] = {
INIT_VPORT_Q_COUNTER(rx_write_requests),
INIT_VPORT_Q_COUNTER(rx_read_requests),
INIT_VPORT_Q_COUNTER(rx_atomic_requests),
INIT_VPORT_Q_COUNTER(rx_dct_connect),
INIT_VPORT_Q_COUNTER(out_of_buffer),
};
static const struct mlx5_ib_counter vport_out_of_seq_q_cnts[] = {
INIT_VPORT_Q_COUNTER(out_of_sequence),
};
static const struct mlx5_ib_counter vport_retrans_q_cnts[] = {
INIT_VPORT_Q_COUNTER(duplicate_request),
INIT_VPORT_Q_COUNTER(rnr_nak_retry_err),
INIT_VPORT_Q_COUNTER(packet_seq_err),
INIT_VPORT_Q_COUNTER(implied_nak_seq_err),
INIT_VPORT_Q_COUNTER(local_ack_timeout_err),
};
#define INIT_CONG_COUNTER(_name) \
{ .name = #_name, .offset = \
MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
static const struct mlx5_ib_counter cong_cnts[] = {
INIT_CONG_COUNTER(rp_cnp_ignored),
INIT_CONG_COUNTER(rp_cnp_handled),
INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
INIT_CONG_COUNTER(np_cnp_sent),
};
static const struct mlx5_ib_counter extended_err_cnts[] = {
INIT_Q_COUNTER(resp_local_length_error),
INIT_Q_COUNTER(resp_cqe_error),
INIT_Q_COUNTER(req_cqe_error),
INIT_Q_COUNTER(req_remote_invalid_request),
INIT_Q_COUNTER(req_remote_access_errors),
INIT_Q_COUNTER(resp_remote_access_errors),
INIT_Q_COUNTER(resp_cqe_flush_error),
INIT_Q_COUNTER(req_cqe_flush_error),
INIT_Q_COUNTER(req_transport_retries_exceeded),
INIT_Q_COUNTER(req_rnr_retries_exceeded),
};
static const struct mlx5_ib_counter roce_accl_cnts[] = {
INIT_Q_COUNTER(roce_adp_retrans),
INIT_Q_COUNTER(roce_adp_retrans_to),
INIT_Q_COUNTER(roce_slow_restart),
INIT_Q_COUNTER(roce_slow_restart_cnps),
INIT_Q_COUNTER(roce_slow_restart_trans),
};
static const struct mlx5_ib_counter vport_extended_err_cnts[] = {
INIT_VPORT_Q_COUNTER(resp_local_length_error),
INIT_VPORT_Q_COUNTER(resp_cqe_error),
INIT_VPORT_Q_COUNTER(req_cqe_error),
INIT_VPORT_Q_COUNTER(req_remote_invalid_request),
INIT_VPORT_Q_COUNTER(req_remote_access_errors),
INIT_VPORT_Q_COUNTER(resp_remote_access_errors),
INIT_VPORT_Q_COUNTER(resp_cqe_flush_error),
INIT_VPORT_Q_COUNTER(req_cqe_flush_error),
INIT_VPORT_Q_COUNTER(req_transport_retries_exceeded),
INIT_VPORT_Q_COUNTER(req_rnr_retries_exceeded),
};
static const struct mlx5_ib_counter vport_roce_accl_cnts[] = {
INIT_VPORT_Q_COUNTER(roce_adp_retrans),
INIT_VPORT_Q_COUNTER(roce_adp_retrans_to),
INIT_VPORT_Q_COUNTER(roce_slow_restart),
INIT_VPORT_Q_COUNTER(roce_slow_restart_cnps),
INIT_VPORT_Q_COUNTER(roce_slow_restart_trans),
};
#define INIT_EXT_PPCNT_COUNTER(_name) \
{ .name = #_name, .offset = \
MLX5_BYTE_OFF(ppcnt_reg, \
counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
};
#define INIT_OP_COUNTER(_name, _type) \
{ .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type}
static const struct mlx5_ib_counter basic_op_cnts[] = {
INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS),
};
static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = {
INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS),
};
static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
};
static int mlx5_ib_read_counters(struct ib_counters *counters,
struct ib_counters_read_attr *read_attr,
struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
struct mlx5_read_counters_attr mread_attr = {};
struct mlx5_ib_flow_counters_desc *desc;
int ret, i;
mutex_lock(&mcounters->mcntrs_mutex);
if (mcounters->cntrs_max_index > read_attr->ncounters) {
ret = -EINVAL;
goto err_bound;
}
mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
GFP_KERNEL);
if (!mread_attr.out) {
ret = -ENOMEM;
goto err_bound;
}
mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
mread_attr.flags = read_attr->flags;
ret = mcounters->read_counters(counters->device, &mread_attr);
if (ret)
goto err_read;
/* do the pass over the counters data array to assign according to the
* descriptions and indexing pairs
*/
desc = mcounters->counters_data;
for (i = 0; i < mcounters->ncounters; i++)
read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
err_read:
kfree(mread_attr.out);
err_bound:
mutex_unlock(&mcounters->mcntrs_mutex);
return ret;
}
static int mlx5_ib_destroy_counters(struct ib_counters *counters)
{
struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
mlx5_ib_counters_clear_description(counters);
if (mcounters->hw_cntrs_hndl)
mlx5_fc_destroy(to_mdev(counters->device)->mdev,
mcounters->hw_cntrs_hndl);
return 0;
}
static int mlx5_ib_create_counters(struct ib_counters *counters,
struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
mutex_init(&mcounters->mcntrs_mutex);
return 0;
}
static bool vport_qcounters_supported(struct mlx5_ib_dev *dev)
{
return MLX5_CAP_GEN(dev->mdev, q_counter_other_vport) &&
MLX5_CAP_GEN(dev->mdev, q_counter_aggregation);
}
static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
RDMA: Support more than 255 rdma ports Current code uses many different types when dealing with a port of a RDMA device: u8, unsigned int and u32. Switch to u32 to clean up the logic. This allows us to make (at least) the core view consistent and use the same type. Unfortunately not all places can be converted. Many uverbs functions expect port to be u8 so keep those places in order not to break UAPIs. HW/Spec defined values must also not be changed. With the switch to u32 we now can support devices with more than 255 ports. U32_MAX is reserved to make control logic a bit easier to deal with. As a device with U32_MAX ports probably isn't going to happen any time soon this seems like a non issue. When a device with more than 255 ports is created uverbs will report the RDMA device as having 255 ports as this is the max currently supported. The verbs interface is not changed yet because the IBTA spec limits the port size in too many places to be u8 and all applications that relies in verbs won't be able to cope with this change. At this stage, we are extending the interfaces that are using vendor channel solely Once the limitation is lifted mlx5 in switchdev mode will be able to have thousands of SFs created by the device. As the only instance of an RDMA device that reports more than 255 ports will be a representor device and it exposes itself as a RAW Ethernet only device CM/MAD/IPoIB and other ULPs aren't effected by this change and their sysfs/interfaces that are exposes to userspace can remain unchanged. While here cleanup some alignment issues and remove unneeded sanity checks (mainly in rdmavt), Link: https://lore.kernel.org/r/20210301070420.439400-1-leon@kernel.org Signed-off-by: Mark Bloch <mbloch@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2021-03-01 07:04:20 +00:00
u32 port_num)
{
if ((is_mdev_switchdev_mode(dev->mdev) &&
!vport_qcounters_supported(dev)) || !port_num)
return &dev->port[0].cnts;
return is_mdev_switchdev_mode(dev->mdev) ?
&dev->port[1].cnts : &dev->port[port_num - 1].cnts;
}
/**
* mlx5_ib_get_counters_id - Returns counters id to use for device+port
* @dev: Pointer to mlx5 IB device
* @port_num: Zero based port number
*
* mlx5_ib_get_counters_id() Returns counters set id to use for given
* device port combination in switchdev and non switchdev mode of the
* parent device.
*/
RDMA: Support more than 255 rdma ports Current code uses many different types when dealing with a port of a RDMA device: u8, unsigned int and u32. Switch to u32 to clean up the logic. This allows us to make (at least) the core view consistent and use the same type. Unfortunately not all places can be converted. Many uverbs functions expect port to be u8 so keep those places in order not to break UAPIs. HW/Spec defined values must also not be changed. With the switch to u32 we now can support devices with more than 255 ports. U32_MAX is reserved to make control logic a bit easier to deal with. As a device with U32_MAX ports probably isn't going to happen any time soon this seems like a non issue. When a device with more than 255 ports is created uverbs will report the RDMA device as having 255 ports as this is the max currently supported. The verbs interface is not changed yet because the IBTA spec limits the port size in too many places to be u8 and all applications that relies in verbs won't be able to cope with this change. At this stage, we are extending the interfaces that are using vendor channel solely Once the limitation is lifted mlx5 in switchdev mode will be able to have thousands of SFs created by the device. As the only instance of an RDMA device that reports more than 255 ports will be a representor device and it exposes itself as a RAW Ethernet only device CM/MAD/IPoIB and other ULPs aren't effected by this change and their sysfs/interfaces that are exposes to userspace can remain unchanged. While here cleanup some alignment issues and remove unneeded sanity checks (mainly in rdmavt), Link: https://lore.kernel.org/r/20210301070420.439400-1-leon@kernel.org Signed-off-by: Mark Bloch <mbloch@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2021-03-01 07:04:20 +00:00
u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
{
const struct mlx5_ib_counters *cnts = get_counters(dev, port_num + 1);
return cnts->set_id;
}
static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts)
{
struct rdma_hw_stats *stats;
u32 num_hw_counters;
int i;
num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
cnts->num_ext_ppcnt_counters;
stats = rdma_alloc_hw_stats_struct(cnts->descs,
num_hw_counters +
cnts->num_op_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
if (!stats)
return NULL;
for (i = 0; i < cnts->num_op_counters; i++)
set_bit(num_hw_counters + i, stats->is_disabled);
return stats;
}
static struct rdma_hw_stats *
mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
return do_alloc_stats(cnts);
}
static struct rdma_hw_stats *
mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
return do_alloc_stats(cnts);
}
static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
const struct mlx5_ib_counters *cnts,
struct rdma_hw_stats *stats,
u16 set_id)
{
u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
__be32 val;
int ret, i;
MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
if (ret)
return ret;
for (i = 0; i < cnts->num_q_counters; i++) {
val = *(__be32 *)((void *)out + cnts->offsets[i]);
stats->value[i] = (u64)be32_to_cpu(val);
}
return 0;
}
static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
const struct mlx5_ib_counters *cnts,
struct rdma_hw_stats *stats)
{
int offset = cnts->num_q_counters + cnts->num_cong_counters;
u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
int ret, i;
void *out;
out = kvzalloc(sz, GFP_KERNEL);
if (!out)
return -ENOMEM;
MLX5_SET(ppcnt_reg, in, local_port, 1);
MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
0, 0);
if (ret)
goto free;
for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
stats->value[i + offset] =
be64_to_cpup((__be64 *)(out +
cnts->offsets[i + offset]));
free:
kvfree(out);
return ret;
}
static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
u32 port_num,
const struct mlx5_ib_counters *cnts,
struct rdma_hw_stats *stats)
{
u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
struct mlx5_core_dev *mdev;
__be32 val;
int ret, i;
if (!dev->port[port_num].rep ||
dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK)
return 0;
mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw);
if (!mdev)
return -EOPNOTSUPP;
MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
MLX5_SET(query_q_counter_in, in, other_vport, 1);
MLX5_SET(query_q_counter_in, in, vport_number,
dev->port[port_num].rep->vport);
MLX5_SET(query_q_counter_in, in, aggregate, 1);
ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
if (ret)
return ret;
for (i = 0; i < cnts->num_q_counters; i++) {
val = *(__be32 *)((void *)out + cnts->offsets[i]);
stats->value[i] = (u64)be32_to_cpu(val);
}
return 0;
}
static int do_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u32 port_num, int index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
struct mlx5_core_dev *mdev;
int ret, num_counters;
if (!stats)
return -EINVAL;
num_counters = cnts->num_q_counters +
cnts->num_cong_counters +
cnts->num_ext_ppcnt_counters;
if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
ret = mlx5_ib_query_q_counters_vport(dev, port_num - 1, cnts,
stats);
else
ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats,
cnts->set_id);
if (ret)
return ret;
/* We don't expose device counters over Vports */
if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0)
goto done;
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
if (ret)
return ret;
}
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
RDMA/mlx5: Fix mlx5_ib_get_hw_stats when used for device Bugzilla: https://bugzilla.redhat.com/2157856 Upstream-status: v6.2-rc3 commit 38b50aa44495d5eb4218f0b82fc2da76505cec53 Author: Shay Drory <shayd@nvidia.com> Date: Wed Dec 28 14:56:09 2022 +0200 RDMA/mlx5: Fix mlx5_ib_get_hw_stats when used for device Currently, when mlx5_ib_get_hw_stats() is used for device (port_num = 0), there is a special handling in order to use the correct counters, but, port_num is being passed down the stack without any change. Also, some functions assume that port_num >=1. As a result, the following oops can occur. BUG: unable to handle page fault for address: ffff89510294f1a8 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] SMP CPU: 8 PID: 1382 Comm: devlink Tainted: G W 6.1.0-rc4_for_upstream_base_2022_11_10_16_12 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:_raw_spin_lock+0xc/0x20 Call Trace: <TASK> mlx5_ib_get_native_port_mdev+0x73/0xe0 [mlx5_ib] do_get_hw_stats.constprop.0+0x109/0x160 [mlx5_ib] mlx5_ib_get_hw_stats+0xad/0x180 [mlx5_ib] ib_setup_device_attrs+0xf0/0x290 [ib_core] ib_register_device+0x3bb/0x510 [ib_core] ? atomic_notifier_chain_register+0x67/0x80 __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5r_probe+0xb8/0x150 [mlx5_ib] ? auxiliary_match_id+0x6a/0x90 auxiliary_bus_probe+0x3c/0x70 ? driver_sysfs_add+0x6b/0x90 really_probe+0xcd/0x380 __driver_probe_device+0x80/0x170 driver_probe_device+0x1e/0x90 __device_attach_driver+0x7d/0x100 ? driver_allows_async_probing+0x60/0x60 ? driver_allows_async_probing+0x60/0x60 bus_for_each_drv+0x7b/0xc0 __device_attach+0xbc/0x200 bus_probe_device+0x87/0xa0 device_add+0x404/0x940 ? dev_set_name+0x53/0x70 __auxiliary_device_add+0x43/0x60 add_adev+0x99/0xe0 [mlx5_core] mlx5_attach_device+0xc8/0x120 [mlx5_core] mlx5_load_one_devl_locked+0xb2/0xe0 [mlx5_core] devlink_reload+0x133/0x250 devlink_nl_cmd_reload+0x480/0x570 ? devlink_nl_pre_doit+0x44/0x2b0 genl_family_rcv_msg_doit.isra.0+0xc2/0x110 genl_rcv_msg+0x180/0x2b0 ? devlink_nl_cmd_region_read_dumpit+0x540/0x540 ? devlink_reload+0x250/0x250 ? devlink_put+0x50/0x50 ? genl_family_rcv_msg_doit.isra.0+0x110/0x110 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1f6/0x2c0 netlink_sendmsg+0x237/0x490 sock_sendmsg+0x33/0x40 __sys_sendto+0x103/0x160 ? handle_mm_fault+0x10e/0x290 ? do_user_addr_fault+0x1c0/0x5f0 __x64_sys_sendto+0x25/0x30 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fix it by setting port_num to 1 in order to get device status and remove unused variable. Fixes: aac4492ef23a ("IB/mlx5: Update counter implementation for dual port RoCE") Link: https://lore.kernel.org/r/98b82994c3cd3fa593b8a75ed3f3901e208beb0f.1672231736.git.leonro@nvidia.com Signed-off-by: Shay Drory <shayd@nvidia.com> Reviewed-by: Patrisious Haddad <phaddad@nvidia.com> Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-01-19 11:10:13 +00:00
if (!port_num)
port_num = 1;
mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL);
if (!mdev) {
/* If port is not affiliated yet, its in down state
* which doesn't have any counters yet, so it would be
* zero. So no need to read from the HCA.
*/
goto done;
}
ret = mlx5_lag_query_cong_counters(dev->mdev,
stats->value +
cnts->num_q_counters,
cnts->num_cong_counters,
cnts->offsets +
cnts->num_q_counters);
mlx5_ib_put_native_port_mdev(dev, port_num);
if (ret)
return ret;
}
done:
return num_counters;
}
static int do_get_op_stat(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u32 port_num, int index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts;
const struct mlx5_ib_op_fc *opfcs;
u64 packets = 0, bytes;
u32 type;
int ret;
cnts = get_counters(dev, port_num);
opfcs = cnts->opfcs;
type = *(u32 *)cnts->descs[index].priv;
if (type >= MLX5_IB_OPCOUNTER_MAX)
return -EINVAL;
if (!opfcs[type].fc)
goto out;
ret = mlx5_fc_query(dev->mdev, opfcs[type].fc,
&packets, &bytes);
if (ret)
return ret;
out:
stats->value[index] = packets;
return index;
}
static int do_get_op_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u32 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts;
int index, ret, num_hw_counters;
cnts = get_counters(dev, port_num);
num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
cnts->num_ext_ppcnt_counters;
for (index = num_hw_counters;
index < (num_hw_counters + cnts->num_op_counters); index++) {
ret = do_get_op_stat(ibdev, stats, port_num, index);
if (ret != index)
return ret;
}
return cnts->num_op_counters;
}
static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u32 port_num, int index)
{
int num_counters, num_hw_counters, num_op_counters;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts;
cnts = get_counters(dev, port_num);
num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
cnts->num_ext_ppcnt_counters;
num_counters = num_hw_counters + cnts->num_op_counters;
if (index < 0 || index > num_counters)
return -EINVAL;
else if (index > 0 && index < num_hw_counters)
return do_get_hw_stats(ibdev, stats, port_num, index);
else if (index >= num_hw_counters && index < num_counters)
return do_get_op_stat(ibdev, stats, port_num, index);
num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index);
if (num_hw_counters < 0)
return num_hw_counters;
num_op_counters = do_get_op_stats(ibdev, stats, port_num);
if (num_op_counters < 0)
return num_op_counters;
return num_hw_counters + num_op_counters;
}
static struct rdma_hw_stats *
mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
{
struct mlx5_ib_dev *dev = to_mdev(counter->device);
const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
return do_alloc_stats(cnts);
}
static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
{
struct mlx5_ib_dev *dev = to_mdev(counter->device);
const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
return mlx5_ib_query_q_counters(dev->mdev, cnts,
counter->stats, counter->id);
}
static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
{
struct mlx5_ib_dev *dev = to_mdev(counter->device);
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
if (!counter->id)
return 0;
MLX5_SET(dealloc_q_counter_in, in, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
}
static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
struct ib_qp *qp)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
int err;
if (!counter->id) {
u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
MLX5_SET(alloc_q_counter_in, in, opcode,
MLX5_CMD_OP_ALLOC_Q_COUNTER);
MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
if (err)
return err;
counter->id =
MLX5_GET(alloc_q_counter_out, out, counter_set_id);
}
err = mlx5_ib_qp_set_counter(qp, counter);
if (err)
goto fail_set_counter;
return 0;
fail_set_counter:
mlx5_ib_counter_dealloc(counter);
counter->id = 0;
return err;
}
static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
{
return mlx5_ib_qp_set_counter(qp, NULL);
}
static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
struct rdma_stat_desc *descs, size_t *offsets,
u32 port_num)
{
bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
port_num != MLX5_VPORT_PF;
const struct mlx5_ib_counter *names;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
int j = 0, i, size;
names = is_vport ? vport_basic_q_cnts : basic_q_cnts;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
ARRAY_SIZE(basic_q_cnts);
for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
offsets[j] = names[i].offset;
}
names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
ARRAY_SIZE(out_of_seq_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
offsets[j] = names[i].offset;
}
}
names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
ARRAY_SIZE(retrans_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
offsets[j] = names[i].offset;
}
}
names = is_vport ? vport_extended_err_cnts : extended_err_cnts;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
ARRAY_SIZE(extended_err_cnts);
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
offsets[j] = names[i].offset;
}
}
names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
ARRAY_SIZE(roce_accl_cnts);
if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
offsets[j] = names[i].offset;
}
}
if (is_vport)
return;
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
descs[j].name = cong_cnts[i].name;
offsets[j] = cong_cnts[i].offset;
}
}
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
descs[j].name = ext_ppcnt_cnts[i].name;
offsets[j] = ext_ppcnt_cnts[i].offset;
}
}
for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) {
descs[j].name = basic_op_cnts[i].name;
descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
descs[j].priv = &basic_op_cnts[i].type;
}
if (MLX5_CAP_FLOWTABLE(dev->mdev,
ft_field_support_2_nic_receive_rdma.bth_opcode)) {
for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) {
descs[j].name = rdmarx_cnp_op_cnts[i].name;
descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
descs[j].priv = &rdmarx_cnp_op_cnts[i].type;
}
}
if (MLX5_CAP_FLOWTABLE(dev->mdev,
ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) {
descs[j].name = rdmatx_cnp_op_cnts[i].name;
descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
}
}
}
static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
struct mlx5_ib_counters *cnts, u32 port_num)
{
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
port_num != MLX5_VPORT_PF;
u32 num_counters, num_op_counters = 0, size;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
ARRAY_SIZE(basic_q_cnts);
num_counters = size;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
ARRAY_SIZE(out_of_seq_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
num_counters += size;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
ARRAY_SIZE(retrans_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
num_counters += size;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
ARRAY_SIZE(extended_err_cnts);
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
num_counters += size;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
ARRAY_SIZE(roce_accl_cnts);
if (MLX5_CAP_GEN(dev->mdev, roce_accl))
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
num_counters += size;
cnts->num_q_counters = num_counters;
RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc7 commit e80ef139488fb4f481c877a81f6ba54f1f0ee416 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Mon Jun 5 13:33:20 2023 +0300 RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 </TASK> Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:45 +00:00
if (is_vport)
goto skip_non_qcounters;
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
num_counters += ARRAY_SIZE(cong_cnts);
}
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
}
num_op_counters = ARRAY_SIZE(basic_op_cnts);
if (MLX5_CAP_FLOWTABLE(dev->mdev,
ft_field_support_2_nic_receive_rdma.bth_opcode))
num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
if (MLX5_CAP_FLOWTABLE(dev->mdev,
ft_field_support_2_nic_transmit_rdma.bth_opcode))
num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts);
skip_non_qcounters:
cnts->num_op_counters = num_op_counters;
num_counters += num_op_counters;
cnts->descs = kcalloc(num_counters,
sizeof(struct rdma_stat_desc), GFP_KERNEL);
if (!cnts->descs)
return -ENOMEM;
cnts->offsets = kcalloc(num_counters,
sizeof(*cnts->offsets), GFP_KERNEL);
if (!cnts->offsets)
goto err;
return 0;
err:
kfree(cnts->descs);
cnts->descs = NULL;
return -ENOMEM;
}
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
int num_cnt_ports = dev->num_ports;
int i, j;
if (is_mdev_switchdev_mode(dev->mdev))
num_cnt_ports = min(2, num_cnt_ports);
MLX5_SET(dealloc_q_counter_in, in, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
for (i = 0; i < num_cnt_ports; i++) {
if (dev->port[i].cnts.set_id) {
MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
dev->port[i].cnts.set_id);
mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
}
kfree(dev->port[i].cnts.descs);
kfree(dev->port[i].cnts.offsets);
for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) {
if (!dev->port[i].cnts.opfcs[j].fc)
continue;
if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
mlx5_ib_fs_remove_op_fc(dev,
&dev->port[i].cnts.opfcs[j], j);
mlx5_fc_destroy(dev->mdev,
dev->port[i].cnts.opfcs[j].fc);
dev->port[i].cnts.opfcs[j].fc = NULL;
}
}
}
static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
int num_cnt_ports = dev->num_ports;
int err = 0;
int i;
bool is_shared;
MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
/*
* In switchdev we need to allocate two ports, one that is used for
* the device Q_counters and it is essentially the real Q_counters of
* this device, while the other is used as a helper for PF to be able to
* query all other vports.
*/
if (is_mdev_switchdev_mode(dev->mdev))
num_cnt_ports = min(2, num_cnt_ports);
for (i = 0; i < num_cnt_ports; i++) {
err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i);
if (err)
goto err_alloc;
mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs,
dev->port[i].cnts.offsets, i);
MLX5_SET(alloc_q_counter_in, in, uid,
is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
if (err) {
mlx5_ib_warn(dev,
"couldn't allocate queue counter for port %d, err %d\n",
i + 1, err);
goto err_alloc;
}
dev->port[i].cnts.set_id =
MLX5_GET(alloc_q_counter_out, out, counter_set_id);
}
return 0;
err_alloc:
mlx5_ib_dealloc_counters(dev);
return err;
}
static int read_flow_counters(struct ib_device *ibdev,
struct mlx5_read_counters_attr *read_attr)
{
struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
return mlx5_fc_query(dev->mdev, fc,
&read_attr->out[IB_COUNTER_PACKETS],
&read_attr->out[IB_COUNTER_BYTES]);
}
/* flow counters currently expose two counters packets and bytes */
#define FLOW_COUNTERS_NUM 2
static int counters_set_description(
struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
{
struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
u32 cntrs_max_index = 0;
int i;
if (counters_type != MLX5_IB_COUNTERS_FLOW)
return -EINVAL;
/* init the fields for the object */
mcounters->type = counters_type;
mcounters->read_counters = read_flow_counters;
mcounters->counters_num = FLOW_COUNTERS_NUM;
mcounters->ncounters = ncounters;
/* each counter entry have both description and index pair */
for (i = 0; i < ncounters; i++) {
if (desc_data[i].description > IB_COUNTER_BYTES)
return -EINVAL;
if (cntrs_max_index <= desc_data[i].index)
cntrs_max_index = desc_data[i].index + 1;
}
mutex_lock(&mcounters->mcntrs_mutex);
mcounters->counters_data = desc_data;
mcounters->cntrs_max_index = cntrs_max_index;
mutex_unlock(&mcounters->mcntrs_mutex);
return 0;
}
#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
struct mlx5_ib_create_flow *ucmd)
{
struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
struct mlx5_ib_flow_counters_desc *desc_data = NULL;
bool hw_hndl = false;
int ret = 0;
if (ucmd && ucmd->ncounters_data != 0) {
cntrs_data = ucmd->data;
if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
return -EINVAL;
desc_data = kcalloc(cntrs_data->ncounters,
sizeof(*desc_data),
GFP_KERNEL);
if (!desc_data)
return -ENOMEM;
if (copy_from_user(desc_data,
u64_to_user_ptr(cntrs_data->counters_data),
sizeof(*desc_data) * cntrs_data->ncounters)) {
ret = -EFAULT;
goto free;
}
}
if (!mcounters->hw_cntrs_hndl) {
mcounters->hw_cntrs_hndl = mlx5_fc_create(
to_mdev(ibcounters->device)->mdev, false);
if (IS_ERR(mcounters->hw_cntrs_hndl)) {
ret = PTR_ERR(mcounters->hw_cntrs_hndl);
goto free;
}
hw_hndl = true;
}
if (desc_data) {
/* counters already bound to at least one flow */
if (mcounters->cntrs_max_index) {
ret = -EINVAL;
goto free_hndl;
}
ret = counters_set_description(ibcounters,
MLX5_IB_COUNTERS_FLOW,
desc_data,
cntrs_data->ncounters);
if (ret)
goto free_hndl;
} else if (!mcounters->cntrs_max_index) {
/* counters not bound yet, must have udata passed */
ret = -EINVAL;
goto free_hndl;
}
return 0;
free_hndl:
if (hw_hndl) {
mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
mcounters->hw_cntrs_hndl);
mcounters->hw_cntrs_hndl = NULL;
}
free:
kfree(desc_data);
return ret;
}
void mlx5_ib_counters_clear_description(struct ib_counters *counters)
{
struct mlx5_ib_mcounters *mcounters;
if (!counters || atomic_read(&counters->usecnt) != 1)
return;
mcounters = to_mcounters(counters);
mutex_lock(&mcounters->mcntrs_mutex);
kfree(mcounters->counters_data);
mcounters->counters_data = NULL;
mcounters->cntrs_max_index = 0;
mutex_unlock(&mcounters->mcntrs_mutex);
}
static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
unsigned int index, bool enable)
{
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_ib_counters *cnts;
struct mlx5_ib_op_fc *opfc;
u32 num_hw_counters, type;
int ret;
cnts = &dev->port[port - 1].cnts;
num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
cnts->num_ext_ppcnt_counters;
if (index < num_hw_counters ||
index >= (num_hw_counters + cnts->num_op_counters))
return -EINVAL;
if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
return -EINVAL;
type = *(u32 *)cnts->descs[index].priv;
if (type >= MLX5_IB_OPCOUNTER_MAX)
return -EINVAL;
opfc = &cnts->opfcs[type];
if (enable) {
if (opfc->fc)
return -EEXIST;
opfc->fc = mlx5_fc_create(dev->mdev, false);
if (IS_ERR(opfc->fc))
return PTR_ERR(opfc->fc);
ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type);
if (ret) {
mlx5_fc_destroy(dev->mdev, opfc->fc);
opfc->fc = NULL;
}
return ret;
}
if (!opfc->fc)
return -EINVAL;
mlx5_ib_fs_remove_op_fc(dev, opfc, type);
mlx5_fc_destroy(dev->mdev, opfc->fc);
opfc->fc = NULL;
return 0;
}
static const struct ib_device_ops hw_stats_ops = {
.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
.get_hw_stats = mlx5_ib_get_hw_stats,
.counter_bind_qp = mlx5_ib_counter_bind_qp,
.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
.counter_dealloc = mlx5_ib_counter_dealloc,
.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
.counter_update_stats = mlx5_ib_counter_update_stats,
.modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ?
mlx5_ib_modify_stat : NULL,
};
static const struct ib_device_ops hw_switchdev_vport_op = {
.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
};
static const struct ib_device_ops hw_switchdev_stats_ops = {
.alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats,
.get_hw_stats = mlx5_ib_get_hw_stats,
.counter_bind_qp = mlx5_ib_counter_bind_qp,
.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
.counter_dealloc = mlx5_ib_counter_dealloc,
.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
.counter_update_stats = mlx5_ib_counter_update_stats,
};
static const struct ib_device_ops counters_ops = {
.create_counters = mlx5_ib_create_counters,
.destroy_counters = mlx5_ib_destroy_counters,
.read_counters = mlx5_ib_read_counters,
INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
};
int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
{
ib_set_device_ops(&dev->ib_dev, &counters_ops);
if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
return 0;
if (is_mdev_switchdev_mode(dev->mdev)) {
ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops);
if (vport_qcounters_supported(dev))
ib_set_device_ops(&dev->ib_dev, &hw_switchdev_vport_op);
} else
ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
return mlx5_ib_alloc_counters(dev);
}
void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
{
if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
return;
mlx5_ib_dealloc_counters(dev);
}