Centos-kernel-stream-9/tools/testing/selftests/resctrl/mba_test.c

200 lines
4.7 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Memory Bandwidth Allocation (MBA) test
*
* Copyright (C) 2018 Intel Corporation
*
* Authors:
* Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
* Fenghua Yu <fenghua.yu@intel.com>
*/
#include "resctrl.h"
#define RESULT_FILE_NAME "result_mba"
#define NUM_OF_RUNS 5
selftests/resctrl: Reduce failures due to outliers in MBA/MBM tests JIRA: https://issues.redhat.com/browse/RHEL-20790 commit ef43c30858754d99373a63dff33280a9969b49bc Author: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> Date: Mon Oct 2 12:48:13 2023 +0300 selftests/resctrl: Reduce failures due to outliers in MBA/MBM tests The initial value of 5% chosen for the maximum allowed percentage difference between resctrl mbm value and IMC mbm value in commit 06bd03a57f8c ("selftests/resctrl: Fix MBA/MBM results reporting format") was "randomly chosen value" (as admitted by the changelog). When running tests in our lab across a large number platforms, 5% difference upper bound for success seems a bit on the low side for the MBA and MBM tests. Some platforms produce outliers that are slightly above that, typically 6-7%, which leads MBA/MBM test frequently failing. Replace the "randomly chosen value" with a success bound that is based on those measurements across large number of platforms by relaxing the MBA/MBM success bound to 8%. The relaxed bound removes the failures due the frequent outliers. Fixed commit description style error during merge: Shuah Khan <skhan@linuxfoundation.org> Fixes: 06bd03a57f8c ("selftests/resctrl: Fix MBA/MBM results reporting format") Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Reinette Chatre <reinette.chatre@intel.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Cc: <stable@vger.kernel.org> Signed-off-by: Shuah Khan <skhan@linuxfoundation.org> Signed-off-by: David Arcari <darcari@redhat.com>
2024-09-04 19:38:25 +00:00
#define MAX_DIFF_PERCENT 8
#define ALLOCATION_MAX 100
#define ALLOCATION_MIN 10
#define ALLOCATION_STEP 10
static int mba_init(const struct resctrl_val_param *param, int domain_id)
{
int ret;
ret = initialize_mem_bw_imc();
if (ret)
return ret;
initialize_mem_bw_resctrl(param, domain_id);
return 0;
}
/*
* Change schemata percentage from 100 to 10%. Write schemata to specified
* con_mon grp, mon_grp in resctrl FS.
* For each allocation, run 5 times in order to get average values.
*/
static int mba_setup(const struct resctrl_test *test,
const struct user_params *uparams,
struct resctrl_val_param *p)
{
static int runs_per_allocation, allocation = 100;
char allocation_str[64];
int ret;
if (runs_per_allocation >= NUM_OF_RUNS)
runs_per_allocation = 0;
/* Only set up schemata once every NUM_OF_RUNS of allocations */
if (runs_per_allocation++ != 0)
return 0;
if (allocation < ALLOCATION_MIN || allocation > ALLOCATION_MAX)
return END_OF_TESTS;
sprintf(allocation_str, "%d", allocation);
ret = write_schemata(p->ctrlgrp, allocation_str, uparams->cpu, test->resource);
if (ret < 0)
return ret;
allocation -= ALLOCATION_STEP;
return 0;
}
static int mba_measure(const struct user_params *uparams,
struct resctrl_val_param *param, pid_t bm_pid)
{
return measure_mem_bw(uparams, param, bm_pid, "reads");
}
static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc)
{
int allocation, runs;
bool ret = false;
ksft_print_msg("Results are displayed in (MB)\n");
/* Memory bandwidth from 100% down to 10% */
for (allocation = 0; allocation < ALLOCATION_MAX / ALLOCATION_STEP;
allocation++) {
unsigned long sum_bw_imc = 0, sum_bw_resc = 0;
long avg_bw_imc, avg_bw_resc;
selftests/resctrl: Fix MBA/MBM results reporting format MBM unit test starts fill_buf (default built-in benchmark) in a new con_mon group (c1, m1) and records resctrl reported mbm values and iMC (Integrated Memory Controller) values every second. It does this for five seconds (randomly chosen value) in total. It then calculates average of resctrl_mbm values and imc_mbm values and if the difference is greater than 300 MB/sec (randomly chosen value), the test treats it as a failure. MBA unit test is similar to MBM but after every run it changes schemata. Checking for a difference of 300 MB/sec doesn't look very meaningful when the mbm values are changing over a wide range. For example, below are the values running MBA test on SKL with different allocations 1. With 10% as schemata both iMC and resctrl mbm_values are around 2000 MB/sec 2. With 100% as schemata both iMC and resctrl mbm_values are around 10000 MB/sec A 300 MB/sec difference between resctrl_mbm and imc_mbm values is acceptable at 100% schemata but it isn't acceptable at 10% schemata because that's a huge difference. So, fix this by checking for percentage difference instead of absolute difference i.e. check if the difference between resctrl_mbm value and imc_mbm value is within 5% (randomly chosen value) of imc_mbm value. If the difference is greater than 5% of imc_mbm value, treat it is a failure. Tested-by: Babu Moger <babu.moger@amd.com> Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2021-03-17 02:22:48 +00:00
int avg_diff_per;
float avg_diff;
/*
* The first run is discarded due to inaccurate value from
* phase transition.
*/
for (runs = NUM_OF_RUNS * allocation + 1;
runs < NUM_OF_RUNS * allocation + NUM_OF_RUNS ; runs++) {
sum_bw_imc += bw_imc[runs];
sum_bw_resc += bw_resc[runs];
}
avg_bw_imc = sum_bw_imc / (NUM_OF_RUNS - 1);
avg_bw_resc = sum_bw_resc / (NUM_OF_RUNS - 1);
selftests/resctrl: Fix MBA/MBM results reporting format MBM unit test starts fill_buf (default built-in benchmark) in a new con_mon group (c1, m1) and records resctrl reported mbm values and iMC (Integrated Memory Controller) values every second. It does this for five seconds (randomly chosen value) in total. It then calculates average of resctrl_mbm values and imc_mbm values and if the difference is greater than 300 MB/sec (randomly chosen value), the test treats it as a failure. MBA unit test is similar to MBM but after every run it changes schemata. Checking for a difference of 300 MB/sec doesn't look very meaningful when the mbm values are changing over a wide range. For example, below are the values running MBA test on SKL with different allocations 1. With 10% as schemata both iMC and resctrl mbm_values are around 2000 MB/sec 2. With 100% as schemata both iMC and resctrl mbm_values are around 10000 MB/sec A 300 MB/sec difference between resctrl_mbm and imc_mbm values is acceptable at 100% schemata but it isn't acceptable at 10% schemata because that's a huge difference. So, fix this by checking for percentage difference instead of absolute difference i.e. check if the difference between resctrl_mbm value and imc_mbm value is within 5% (randomly chosen value) of imc_mbm value. If the difference is greater than 5% of imc_mbm value, treat it is a failure. Tested-by: Babu Moger <babu.moger@amd.com> Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2021-03-17 02:22:48 +00:00
avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc;
avg_diff_per = (int)(avg_diff * 100);
ksft_print_msg("%s Check MBA diff within %d%% for schemata %u\n",
selftests/resctrl: Fix MBA/MBM results reporting format MBM unit test starts fill_buf (default built-in benchmark) in a new con_mon group (c1, m1) and records resctrl reported mbm values and iMC (Integrated Memory Controller) values every second. It does this for five seconds (randomly chosen value) in total. It then calculates average of resctrl_mbm values and imc_mbm values and if the difference is greater than 300 MB/sec (randomly chosen value), the test treats it as a failure. MBA unit test is similar to MBM but after every run it changes schemata. Checking for a difference of 300 MB/sec doesn't look very meaningful when the mbm values are changing over a wide range. For example, below are the values running MBA test on SKL with different allocations 1. With 10% as schemata both iMC and resctrl mbm_values are around 2000 MB/sec 2. With 100% as schemata both iMC and resctrl mbm_values are around 10000 MB/sec A 300 MB/sec difference between resctrl_mbm and imc_mbm values is acceptable at 100% schemata but it isn't acceptable at 10% schemata because that's a huge difference. So, fix this by checking for percentage difference instead of absolute difference i.e. check if the difference between resctrl_mbm value and imc_mbm value is within 5% (randomly chosen value) of imc_mbm value. If the difference is greater than 5% of imc_mbm value, treat it is a failure. Tested-by: Babu Moger <babu.moger@amd.com> Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2021-03-17 02:22:48 +00:00
avg_diff_per > MAX_DIFF_PERCENT ?
"Fail:" : "Pass:",
MAX_DIFF_PERCENT,
ALLOCATION_MAX - ALLOCATION_STEP * allocation);
ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per);
ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc);
ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc);
selftests/resctrl: Fix MBA/MBM results reporting format MBM unit test starts fill_buf (default built-in benchmark) in a new con_mon group (c1, m1) and records resctrl reported mbm values and iMC (Integrated Memory Controller) values every second. It does this for five seconds (randomly chosen value) in total. It then calculates average of resctrl_mbm values and imc_mbm values and if the difference is greater than 300 MB/sec (randomly chosen value), the test treats it as a failure. MBA unit test is similar to MBM but after every run it changes schemata. Checking for a difference of 300 MB/sec doesn't look very meaningful when the mbm values are changing over a wide range. For example, below are the values running MBA test on SKL with different allocations 1. With 10% as schemata both iMC and resctrl mbm_values are around 2000 MB/sec 2. With 100% as schemata both iMC and resctrl mbm_values are around 10000 MB/sec A 300 MB/sec difference between resctrl_mbm and imc_mbm values is acceptable at 100% schemata but it isn't acceptable at 10% schemata because that's a huge difference. So, fix this by checking for percentage difference instead of absolute difference i.e. check if the difference between resctrl_mbm value and imc_mbm value is within 5% (randomly chosen value) of imc_mbm value. If the difference is greater than 5% of imc_mbm value, treat it is a failure. Tested-by: Babu Moger <babu.moger@amd.com> Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2021-03-17 02:22:48 +00:00
if (avg_diff_per > MAX_DIFF_PERCENT)
ret = true;
}
ksft_print_msg("%s Check schemata change using MBA\n",
ret ? "Fail:" : "Pass:");
if (ret)
ksft_print_msg("At least one test failed\n");
return ret;
}
static int check_results(void)
{
char *token_array[8], output[] = RESULT_FILE_NAME, temp[512];
unsigned long bw_imc[1024], bw_resc[1024];
int runs;
FILE *fp;
fp = fopen(output, "r");
if (!fp) {
ksft_perror(output);
return -1;
}
runs = 0;
while (fgets(temp, sizeof(temp), fp)) {
char *token = strtok(temp, ":\t");
int fields = 0;
while (token) {
token_array[fields++] = token;
token = strtok(NULL, ":\t");
}
/* Field 3 is perf imc value */
bw_imc[runs] = strtoul(token_array[3], NULL, 0);
/* Field 5 is resctrl value */
bw_resc[runs] = strtoul(token_array[5], NULL, 0);
runs++;
}
fclose(fp);
return show_mba_info(bw_imc, bw_resc);
}
static void mba_test_cleanup(void)
{
remove(RESULT_FILE_NAME);
}
static int mba_run_test(const struct resctrl_test *test, const struct user_params *uparams)
{
struct resctrl_val_param param = {
.ctrlgrp = "c1",
.filename = RESULT_FILE_NAME,
.init = mba_init,
.setup = mba_setup,
.measure = mba_measure,
};
int ret;
remove(RESULT_FILE_NAME);
ret = resctrl_val(test, uparams, uparams->benchmark_cmd, &param);
if (ret)
return ret;
ret = check_results();
return ret;
}
static bool mba_feature_check(const struct resctrl_test *test)
{
return test_resource_feature_check(test) &&
resctrl_mon_feature_exists("L3_MON", "mbm_local_bytes");
}
struct resctrl_test mba_test = {
.name = "MBA",
.resource = "MB",
.vendor_specific = ARCH_INTEL,
.feature_check = mba_feature_check,
.run_test = mba_run_test,
.cleanup = mba_test_cleanup,
};