ipvs: add sysctl_run_estimation to support disable estimation
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2174312 Upstream Status: net.git commit 2232642ec3fb Conflicts: we backported 174c37627894 ("netfilter: ipvs: make global sysctl readonly in non-init netns") first. Note: A latter upstream commit 144361c1949f ("ipvs: run_estimation should control the kthread tasks") changes the run_estimation sysctl to control the kthread tasks, which depends a lot of other commits, so I will just skip it. commit 2232642ec3fb4aad6ae4da1e109f55a0e7f2d204 Author: Dust Li <dust.li@linux.alibaba.com> Date: Fri Aug 20 13:37:52 2021 +0800 ipvs: add sysctl_run_estimation to support disable estimation estimation_timer will iterate the est_list to do estimation for each ipvs stats. When there are lots of services, the list can be very large. We found that estimation_timer() run for more then 200ms on a machine with 104 CPU and 50K services. yunhong-cgl jiang report the same phenomenon before: https://www.spinics.net/lists/lvs-devel/msg05426.html In some cases(for example a large K8S cluster with many ipvs services), ipvs estimation may not be needed. So adding a sysctl blob to allow users to disable this completely. Default is: 1 (enable) Cc: yunhong-cgl jiang <xintian1976@gmail.com> Signed-off-by: Dust Li <dust.li@linux.alibaba.com> Acked-by: Julian Anastasov <ja@ssi.bg> Acked-by: Simon Horman <horms@verge.net.au> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Hangbin Liu <haliu@redhat.com>
This commit is contained in:
parent
c4f8cebfbb
commit
f2822a6a4f
|
@ -299,3 +299,14 @@ sync_version - INTEGER
|
||||||
|
|
||||||
Kernels with this sync_version entry are able to receive messages
|
Kernels with this sync_version entry are able to receive messages
|
||||||
of both version 1 and version 2 of the synchronisation protocol.
|
of both version 1 and version 2 of the synchronisation protocol.
|
||||||
|
|
||||||
|
run_estimation - BOOLEAN
|
||||||
|
0 - disabled
|
||||||
|
not 0 - enabled (default)
|
||||||
|
|
||||||
|
If disabled, the estimation will be stop, and you can't see
|
||||||
|
any update on speed estimation data.
|
||||||
|
|
||||||
|
You can always re-enable estimation by setting this value to 1.
|
||||||
|
But be careful, the first estimation after re-enable is not
|
||||||
|
accurate.
|
||||||
|
|
|
@ -931,6 +931,7 @@ struct netns_ipvs {
|
||||||
int sysctl_conn_reuse_mode;
|
int sysctl_conn_reuse_mode;
|
||||||
int sysctl_schedule_icmp;
|
int sysctl_schedule_icmp;
|
||||||
int sysctl_ignore_tunneled;
|
int sysctl_ignore_tunneled;
|
||||||
|
int sysctl_run_estimation;
|
||||||
|
|
||||||
/* ip_vs_lblc */
|
/* ip_vs_lblc */
|
||||||
int sysctl_lblc_expiration;
|
int sysctl_lblc_expiration;
|
||||||
|
@ -1071,6 +1072,11 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs)
|
||||||
return ipvs->sysctl_cache_bypass;
|
return ipvs->sysctl_cache_bypass;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
|
||||||
|
{
|
||||||
|
return ipvs->sysctl_run_estimation;
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
|
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
|
||||||
|
@ -1163,6 +1169,11 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* IPVS core functions
|
/* IPVS core functions
|
||||||
|
|
|
@ -2016,6 +2016,12 @@ static struct ctl_table vs_vars[] = {
|
||||||
.mode = 0644,
|
.mode = 0644,
|
||||||
.proc_handler = proc_dointvec,
|
.proc_handler = proc_dointvec,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.procname = "run_estimation",
|
||||||
|
.maxlen = sizeof(int),
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = proc_dointvec,
|
||||||
|
},
|
||||||
#ifdef CONFIG_IP_VS_DEBUG
|
#ifdef CONFIG_IP_VS_DEBUG
|
||||||
{
|
{
|
||||||
.procname = "debug_level",
|
.procname = "debug_level",
|
||||||
|
@ -4089,6 +4095,9 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
|
||||||
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
|
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
|
||||||
tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
|
tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
|
||||||
tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
|
tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
|
||||||
|
ipvs->sysctl_run_estimation = 1;
|
||||||
|
tbl[idx++].data = &ipvs->sysctl_run_estimation;
|
||||||
|
|
||||||
#ifdef CONFIG_IP_VS_DEBUG
|
#ifdef CONFIG_IP_VS_DEBUG
|
||||||
/* Global sysctls must be ro in non-init netns */
|
/* Global sysctls must be ro in non-init netns */
|
||||||
if (!net_eq(net, &init_net))
|
if (!net_eq(net, &init_net))
|
||||||
|
|
|
@ -100,6 +100,9 @@ static void estimation_timer(struct timer_list *t)
|
||||||
u64 rate;
|
u64 rate;
|
||||||
struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer);
|
struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer);
|
||||||
|
|
||||||
|
if (!sysctl_run_estimation(ipvs))
|
||||||
|
goto skip;
|
||||||
|
|
||||||
spin_lock(&ipvs->est_lock);
|
spin_lock(&ipvs->est_lock);
|
||||||
list_for_each_entry(e, &ipvs->est_list, list) {
|
list_for_each_entry(e, &ipvs->est_list, list) {
|
||||||
s = container_of(e, struct ip_vs_stats, est);
|
s = container_of(e, struct ip_vs_stats, est);
|
||||||
|
@ -131,6 +134,8 @@ static void estimation_timer(struct timer_list *t)
|
||||||
spin_unlock(&s->lock);
|
spin_unlock(&s->lock);
|
||||||
}
|
}
|
||||||
spin_unlock(&ipvs->est_lock);
|
spin_unlock(&ipvs->est_lock);
|
||||||
|
|
||||||
|
skip:
|
||||||
mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
|
mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue