Centos-kernel-stream-9/lib/dim/net_dim.c

311 lines
7.7 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/dim.h>
ethtool: provide customized dim profile management JIRA: https://issues.redhat.com/browse/RHEL-57750 commit f750dfe825b904164688adeb147950e0e0c4d262 Author: Heng Qi <hengqi@linux.alibaba.com> Date: Fri Jun 21 18:13:51 2024 +0800 ethtool: provide customized dim profile management The NetDIM library, currently leveraged by an array of NICs, delivers excellent acceleration benefits. Nevertheless, NICs vary significantly in their dim profile list prerequisites. Specifically, virtio-net backends may present diverse sw or hw device implementation, making a one-size-fits-all parameter list impractical. On Alibaba Cloud, the virtio DPU's performance under the default DIM profile falls short of expectations, partly due to a mismatch in parameter configuration. I also noticed that ice/idpf/ena and other NICs have customized profilelist or placed some restrictions on dim capabilities. Motivated by this, I tried adding new params for "ethtool -C" that provides a per-device control to modify and access a device's interrupt parameters. Usage ======== The target NIC is named ethx. Assume that ethx only declares support for rx profile setting (with DIM_PROFILE_RX flag set in profile_flags) and supports modification of usec and pkt fields. 1. Query the currently customized list of the device $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 256, .comps = n/a,}, {.usec = 8, .pkts = 256, .comps = n/a,}, {.usec = 64, .pkts = 256, .comps = n/a,}, {.usec = 128, .pkts = 256, .comps = n/a,}, {.usec = 256, .pkts = 256, .comps = n/a,} tx-profile: n/a 2. Tune $ ethtool -C ethx rx-profile 1,1,n_2,n,n_3,3,n_4,4,n_n,5,n "n" means do not modify this field. $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 1, .comps = n/a,}, {.usec = 2, .pkts = 256, .comps = n/a,}, {.usec = 3, .pkts = 3, .comps = n/a,}, {.usec = 4, .pkts = 4, .comps = n/a,}, {.usec = 256, .pkts = 5, .comps = n/a,} tx-profile: n/a 3. Hint If the device does not support some type of customized dim profiles, the corresponding "n/a" will display. If the "n/a" field is being modified, -EOPNOTSUPP will be reported. Signed-off-by: Heng Qi <hengqi@linux.alibaba.com> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://patch.msgid.link/20240621101353.107425-4-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Conflicts: include/linux/netdevice.h - RH_KABI_RESERVEs at the end. Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
2024-09-17 11:04:21 +00:00
#include <linux/rtnetlink.h>
/*
* Net DIM profiles:
* There are different set of profiles for each CQ period mode.
* There are different set of profiles for RX/TX CQs.
* Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
*/
#define NET_DIM_RX_EQE_PROFILES { \
dim: initialize all struct fields JIRA: https://issues.redhat.com/browse/RHEL-57750 commit ee1444b5e1df4155b591d0d9b1e72853a99ea861 Author: Jesse Brandeburg <jesse.brandeburg@intel.com> Date: Fri May 6 18:10:38 2022 -0700 dim: initialize all struct fields The W=2 build pointed out that the code wasn't initializing all the variables in the dim_cq_moder declarations with the struct initializers. The net change here is zero since these structs were already static const globals and were initialized with zeros by the compiler, but removing compiler warnings has value in and of itself. lib/dim/net_dim.c: At top level: lib/dim/net_dim.c:54:9: warning: missing initializer for field ‘comps’ of ‘const struct dim_cq_moder’ [-Wmissing-field-initializers] 54 | NET_DIM_RX_EQE_PROFILES, | ^~~~~~~~~~~~~~~~~~~~~~~ In file included from lib/dim/net_dim.c:6: ./include/linux/dim.h:45:13: note: ‘comps’ declared here 45 | u16 comps; | ^~~~~ and repeats for the tx struct, and once you fix the comps entry then the cq_period_mode field needs the same treatment. Use the commonly accepted style to indicate to the compiler that we know what we're doing, and add a comma at the end of each struct initializer to clean up the issue, and use explicit initializers for the fields we are initializing which makes the compiler happy. While here and fixing these lines, clean up the code slightly with a fix for the super long lines by removing the word "_MODERATION" from a couple defines only used in this file. Fixes: f8be17b81d44 ("lib/dim: Fix -Wunused-const-variable warnings") Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com> Link: https://lore.kernel.org/r/20220507011038.14568-1-jesse.brandeburg@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
2024-09-17 11:04:20 +00:00
{.usec = 1, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
{.usec = 8, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
{.usec = 64, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
{.usec = 128, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
{.usec = 256, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,} \
}
#define NET_DIM_RX_CQE_PROFILES { \
dim: initialize all struct fields JIRA: https://issues.redhat.com/browse/RHEL-57750 commit ee1444b5e1df4155b591d0d9b1e72853a99ea861 Author: Jesse Brandeburg <jesse.brandeburg@intel.com> Date: Fri May 6 18:10:38 2022 -0700 dim: initialize all struct fields The W=2 build pointed out that the code wasn't initializing all the variables in the dim_cq_moder declarations with the struct initializers. The net change here is zero since these structs were already static const globals and were initialized with zeros by the compiler, but removing compiler warnings has value in and of itself. lib/dim/net_dim.c: At top level: lib/dim/net_dim.c:54:9: warning: missing initializer for field ‘comps’ of ‘const struct dim_cq_moder’ [-Wmissing-field-initializers] 54 | NET_DIM_RX_EQE_PROFILES, | ^~~~~~~~~~~~~~~~~~~~~~~ In file included from lib/dim/net_dim.c:6: ./include/linux/dim.h:45:13: note: ‘comps’ declared here 45 | u16 comps; | ^~~~~ and repeats for the tx struct, and once you fix the comps entry then the cq_period_mode field needs the same treatment. Use the commonly accepted style to indicate to the compiler that we know what we're doing, and add a comma at the end of each struct initializer to clean up the issue, and use explicit initializers for the fields we are initializing which makes the compiler happy. While here and fixing these lines, clean up the code slightly with a fix for the super long lines by removing the word "_MODERATION" from a couple defines only used in this file. Fixes: f8be17b81d44 ("lib/dim: Fix -Wunused-const-variable warnings") Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com> Link: https://lore.kernel.org/r/20220507011038.14568-1-jesse.brandeburg@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
2024-09-17 11:04:20 +00:00
{.usec = 2, .pkts = 256,}, \
{.usec = 8, .pkts = 128,}, \
{.usec = 16, .pkts = 64,}, \
{.usec = 32, .pkts = 64,}, \
{.usec = 64, .pkts = 64,} \
}
#define NET_DIM_TX_EQE_PROFILES { \
dim: initialize all struct fields JIRA: https://issues.redhat.com/browse/RHEL-57750 commit ee1444b5e1df4155b591d0d9b1e72853a99ea861 Author: Jesse Brandeburg <jesse.brandeburg@intel.com> Date: Fri May 6 18:10:38 2022 -0700 dim: initialize all struct fields The W=2 build pointed out that the code wasn't initializing all the variables in the dim_cq_moder declarations with the struct initializers. The net change here is zero since these structs were already static const globals and were initialized with zeros by the compiler, but removing compiler warnings has value in and of itself. lib/dim/net_dim.c: At top level: lib/dim/net_dim.c:54:9: warning: missing initializer for field ‘comps’ of ‘const struct dim_cq_moder’ [-Wmissing-field-initializers] 54 | NET_DIM_RX_EQE_PROFILES, | ^~~~~~~~~~~~~~~~~~~~~~~ In file included from lib/dim/net_dim.c:6: ./include/linux/dim.h:45:13: note: ‘comps’ declared here 45 | u16 comps; | ^~~~~ and repeats for the tx struct, and once you fix the comps entry then the cq_period_mode field needs the same treatment. Use the commonly accepted style to indicate to the compiler that we know what we're doing, and add a comma at the end of each struct initializer to clean up the issue, and use explicit initializers for the fields we are initializing which makes the compiler happy. While here and fixing these lines, clean up the code slightly with a fix for the super long lines by removing the word "_MODERATION" from a couple defines only used in this file. Fixes: f8be17b81d44 ("lib/dim: Fix -Wunused-const-variable warnings") Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com> Link: https://lore.kernel.org/r/20220507011038.14568-1-jesse.brandeburg@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
2024-09-17 11:04:20 +00:00
{.usec = 1, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
{.usec = 8, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
{.usec = 32, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
{.usec = 64, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
{.usec = 128, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,} \
}
#define NET_DIM_TX_CQE_PROFILES { \
dim: initialize all struct fields JIRA: https://issues.redhat.com/browse/RHEL-57750 commit ee1444b5e1df4155b591d0d9b1e72853a99ea861 Author: Jesse Brandeburg <jesse.brandeburg@intel.com> Date: Fri May 6 18:10:38 2022 -0700 dim: initialize all struct fields The W=2 build pointed out that the code wasn't initializing all the variables in the dim_cq_moder declarations with the struct initializers. The net change here is zero since these structs were already static const globals and were initialized with zeros by the compiler, but removing compiler warnings has value in and of itself. lib/dim/net_dim.c: At top level: lib/dim/net_dim.c:54:9: warning: missing initializer for field ‘comps’ of ‘const struct dim_cq_moder’ [-Wmissing-field-initializers] 54 | NET_DIM_RX_EQE_PROFILES, | ^~~~~~~~~~~~~~~~~~~~~~~ In file included from lib/dim/net_dim.c:6: ./include/linux/dim.h:45:13: note: ‘comps’ declared here 45 | u16 comps; | ^~~~~ and repeats for the tx struct, and once you fix the comps entry then the cq_period_mode field needs the same treatment. Use the commonly accepted style to indicate to the compiler that we know what we're doing, and add a comma at the end of each struct initializer to clean up the issue, and use explicit initializers for the fields we are initializing which makes the compiler happy. While here and fixing these lines, clean up the code slightly with a fix for the super long lines by removing the word "_MODERATION" from a couple defines only used in this file. Fixes: f8be17b81d44 ("lib/dim: Fix -Wunused-const-variable warnings") Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com> Link: https://lore.kernel.org/r/20220507011038.14568-1-jesse.brandeburg@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
2024-09-17 11:04:20 +00:00
{.usec = 5, .pkts = 128,}, \
{.usec = 8, .pkts = 64,}, \
{.usec = 16, .pkts = 32,}, \
{.usec = 32, .pkts = 32,}, \
{.usec = 64, .pkts = 32,} \
}
static const struct dim_cq_moder
rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
NET_DIM_RX_EQE_PROFILES,
NET_DIM_RX_CQE_PROFILES,
};
static const struct dim_cq_moder
tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
NET_DIM_TX_EQE_PROFILES,
NET_DIM_TX_CQE_PROFILES,
};
struct dim_cq_moder
net_dim_get_rx_moderation(u8 cq_period_mode, int ix)
{
struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix];
cq_moder.cq_period_mode = cq_period_mode;
return cq_moder;
}
EXPORT_SYMBOL(net_dim_get_rx_moderation);
struct dim_cq_moder
net_dim_get_def_rx_moderation(u8 cq_period_mode)
{
u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
return net_dim_get_rx_moderation(cq_period_mode, profile_ix);
}
EXPORT_SYMBOL(net_dim_get_def_rx_moderation);
struct dim_cq_moder
net_dim_get_tx_moderation(u8 cq_period_mode, int ix)
{
struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix];
cq_moder.cq_period_mode = cq_period_mode;
return cq_moder;
}
EXPORT_SYMBOL(net_dim_get_tx_moderation);
struct dim_cq_moder
net_dim_get_def_tx_moderation(u8 cq_period_mode)
{
u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
return net_dim_get_tx_moderation(cq_period_mode, profile_ix);
}
EXPORT_SYMBOL(net_dim_get_def_tx_moderation);
ethtool: provide customized dim profile management JIRA: https://issues.redhat.com/browse/RHEL-57750 commit f750dfe825b904164688adeb147950e0e0c4d262 Author: Heng Qi <hengqi@linux.alibaba.com> Date: Fri Jun 21 18:13:51 2024 +0800 ethtool: provide customized dim profile management The NetDIM library, currently leveraged by an array of NICs, delivers excellent acceleration benefits. Nevertheless, NICs vary significantly in their dim profile list prerequisites. Specifically, virtio-net backends may present diverse sw or hw device implementation, making a one-size-fits-all parameter list impractical. On Alibaba Cloud, the virtio DPU's performance under the default DIM profile falls short of expectations, partly due to a mismatch in parameter configuration. I also noticed that ice/idpf/ena and other NICs have customized profilelist or placed some restrictions on dim capabilities. Motivated by this, I tried adding new params for "ethtool -C" that provides a per-device control to modify and access a device's interrupt parameters. Usage ======== The target NIC is named ethx. Assume that ethx only declares support for rx profile setting (with DIM_PROFILE_RX flag set in profile_flags) and supports modification of usec and pkt fields. 1. Query the currently customized list of the device $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 256, .comps = n/a,}, {.usec = 8, .pkts = 256, .comps = n/a,}, {.usec = 64, .pkts = 256, .comps = n/a,}, {.usec = 128, .pkts = 256, .comps = n/a,}, {.usec = 256, .pkts = 256, .comps = n/a,} tx-profile: n/a 2. Tune $ ethtool -C ethx rx-profile 1,1,n_2,n,n_3,3,n_4,4,n_n,5,n "n" means do not modify this field. $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 1, .comps = n/a,}, {.usec = 2, .pkts = 256, .comps = n/a,}, {.usec = 3, .pkts = 3, .comps = n/a,}, {.usec = 4, .pkts = 4, .comps = n/a,}, {.usec = 256, .pkts = 5, .comps = n/a,} tx-profile: n/a 3. Hint If the device does not support some type of customized dim profiles, the corresponding "n/a" will display. If the "n/a" field is being modified, -EOPNOTSUPP will be reported. Signed-off-by: Heng Qi <hengqi@linux.alibaba.com> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://patch.msgid.link/20240621101353.107425-4-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Conflicts: include/linux/netdevice.h - RH_KABI_RESERVEs at the end. Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
2024-09-17 11:04:21 +00:00
int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags,
u8 coal_flags, u8 rx_mode, u8 tx_mode,
void (*rx_dim_work)(struct work_struct *work),
void (*tx_dim_work)(struct work_struct *work))
{
struct dim_cq_moder *rxp = NULL, *txp;
struct dim_irq_moder *moder;
int len;
dev->irq_moder = kzalloc(sizeof(*dev->irq_moder), GFP_KERNEL);
if (!dev->irq_moder)
return -ENOMEM;
moder = dev->irq_moder;
len = NET_DIM_PARAMS_NUM_PROFILES * sizeof(*moder->rx_profile);
moder->coal_flags = coal_flags;
moder->profile_flags = profile_flags;
if (profile_flags & DIM_PROFILE_RX) {
moder->rx_dim_work = rx_dim_work;
moder->dim_rx_mode = rx_mode;
rxp = kmemdup(rx_profile[rx_mode], len, GFP_KERNEL);
if (!rxp)
goto free_moder;
rcu_assign_pointer(moder->rx_profile, rxp);
}
if (profile_flags & DIM_PROFILE_TX) {
moder->tx_dim_work = tx_dim_work;
moder->dim_tx_mode = tx_mode;
txp = kmemdup(tx_profile[tx_mode], len, GFP_KERNEL);
if (!txp)
goto free_rxp;
rcu_assign_pointer(moder->tx_profile, txp);
}
return 0;
free_rxp:
kfree(rxp);
free_moder:
kfree(moder);
return -ENOMEM;
}
EXPORT_SYMBOL(net_dim_init_irq_moder);
/* RTNL lock is held. */
void net_dim_free_irq_moder(struct net_device *dev)
{
struct dim_cq_moder *rxp, *txp;
if (!dev->irq_moder)
return;
rxp = rtnl_dereference(dev->irq_moder->rx_profile);
txp = rtnl_dereference(dev->irq_moder->tx_profile);
rcu_assign_pointer(dev->irq_moder->rx_profile, NULL);
rcu_assign_pointer(dev->irq_moder->tx_profile, NULL);
kfree_rcu(rxp, rcu);
kfree_rcu(txp, rcu);
kfree(dev->irq_moder);
}
EXPORT_SYMBOL(net_dim_free_irq_moder);
static int net_dim_step(struct dim *dim)
{
if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2))
return DIM_TOO_TIRED;
switch (dim->tune_state) {
case DIM_PARKING_ON_TOP:
case DIM_PARKING_TIRED:
break;
case DIM_GOING_RIGHT:
if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1))
return DIM_ON_EDGE;
dim->profile_ix++;
dim->steps_right++;
break;
case DIM_GOING_LEFT:
if (dim->profile_ix == 0)
return DIM_ON_EDGE;
dim->profile_ix--;
dim->steps_left++;
break;
}
dim->tired++;
return DIM_STEPPED;
}
static void net_dim_exit_parking(struct dim *dim)
{
dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : DIM_GOING_RIGHT;
net_dim_step(dim);
}
static int net_dim_stats_compare(struct dim_stats *curr,
struct dim_stats *prev)
{
if (!prev->bpms)
return curr->bpms ? DIM_STATS_BETTER : DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
if (!prev->ppms)
return curr->ppms ? DIM_STATS_BETTER :
DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
if (!prev->epms)
return DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
return (curr->epms < prev->epms) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
return DIM_STATS_SAME;
}
static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
{
int prev_state = dim->tune_state;
int prev_ix = dim->profile_ix;
int stats_res;
int step_res;
switch (dim->tune_state) {
case DIM_PARKING_ON_TOP:
stats_res = net_dim_stats_compare(curr_stats,
&dim->prev_stats);
if (stats_res != DIM_STATS_SAME)
net_dim_exit_parking(dim);
break;
case DIM_PARKING_TIRED:
dim->tired--;
if (!dim->tired)
net_dim_exit_parking(dim);
break;
case DIM_GOING_RIGHT:
case DIM_GOING_LEFT:
stats_res = net_dim_stats_compare(curr_stats,
&dim->prev_stats);
if (stats_res != DIM_STATS_BETTER)
dim_turn(dim);
if (dim_on_top(dim)) {
dim_park_on_top(dim);
break;
}
step_res = net_dim_step(dim);
switch (step_res) {
case DIM_ON_EDGE:
dim_park_on_top(dim);
break;
case DIM_TOO_TIRED:
dim_park_tired(dim);
break;
}
break;
}
if (prev_state != DIM_PARKING_ON_TOP ||
dim->tune_state != DIM_PARKING_ON_TOP)
dim->prev_stats = *curr_stats;
return dim->profile_ix != prev_ix;
}
void net_dim(struct dim *dim, struct dim_sample end_sample)
{
struct dim_stats curr_stats;
u16 nevents;
switch (dim->state) {
case DIM_MEASURE_IN_PROGRESS:
nevents = BIT_GAP(BITS_PER_TYPE(u16),
end_sample.event_ctr,
dim->start_sample.event_ctr);
if (nevents < DIM_NEVENTS)
break;
dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats);
if (net_dim_decision(&curr_stats, dim)) {
dim->state = DIM_APPLY_NEW_PROFILE;
schedule_work(&dim->work);
break;
}
fallthrough;
case DIM_START_MEASURE:
dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr,
end_sample.byte_ctr, &dim->start_sample);
dim->state = DIM_MEASURE_IN_PROGRESS;
break;
case DIM_APPLY_NEW_PROFILE:
break;
}
}
EXPORT_SYMBOL(net_dim);