xref: /openbmc/linux/net/netfilter/ipvs/ip_vs_est.c (revision e4d0fe71)
12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
2cb7f6a7bSJulius Volz /*
3cb7f6a7bSJulius Volz  * ip_vs_est.c: simple rate estimator for IPVS
4cb7f6a7bSJulius Volz  *
5cb7f6a7bSJulius Volz  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
6cb7f6a7bSJulius Volz  *
729c2026fSHans Schillstrom  * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
829c2026fSHans Schillstrom  *              Network name space (netns) aware.
929c2026fSHans Schillstrom  *              Global data moved to netns i.e struct netns_ipvs
1029c2026fSHans Schillstrom  *              Affected data: est_list and est_lock.
1129c2026fSHans Schillstrom  *              estimation_timer() runs with timer per netns.
1229c2026fSHans Schillstrom  *              get_stats()) do the per cpu summing.
13cb7f6a7bSJulius Volz  */
149aada7acSHannes Eder 
159aada7acSHannes Eder #define KMSG_COMPONENT "IPVS"
169aada7acSHannes Eder #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
179aada7acSHannes Eder 
18cb7f6a7bSJulius Volz #include <linux/kernel.h>
19cb7f6a7bSJulius Volz #include <linux/jiffies.h>
20cb7f6a7bSJulius Volz #include <linux/types.h>
21cb7f6a7bSJulius Volz #include <linux/interrupt.h>
22cb7f6a7bSJulius Volz #include <linux/sysctl.h>
23cb7f6a7bSJulius Volz #include <linux/list.h>
24cb7f6a7bSJulius Volz 
25cb7f6a7bSJulius Volz #include <net/ip_vs.h>
26cb7f6a7bSJulius Volz 
27cb7f6a7bSJulius Volz /*
28cb7f6a7bSJulius Volz   This code is to estimate rate in a shorter interval (such as 8
29cb7f6a7bSJulius Volz   seconds) for virtual services and real servers. For measure rate in a
30cb7f6a7bSJulius Volz   long interval, it is easy to implement a user level daemon which
31cb7f6a7bSJulius Volz   periodically reads those statistical counters and measure rate.
32cb7f6a7bSJulius Volz 
33cb7f6a7bSJulius Volz   We measure rate during the last 8 seconds every 2 seconds:
34cb7f6a7bSJulius Volz 
35cb7f6a7bSJulius Volz     avgrate = avgrate*(1-W) + rate*W
36cb7f6a7bSJulius Volz 
37cb7f6a7bSJulius Volz     where W = 2^(-2)
38cb7f6a7bSJulius Volz 
39cb7f6a7bSJulius Volz   NOTES.
40cb7f6a7bSJulius Volz 
41cd67cd5eSJulian Anastasov   * Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10.
42cb7f6a7bSJulius Volz 
43cd67cd5eSJulian Anastasov   * Netlink users can see 64-bit values but sockopt users are restricted
44cd67cd5eSJulian Anastasov     to 32-bit values for conns, packets, bps, cps and pps.
45cd67cd5eSJulian Anastasov 
46cd67cd5eSJulian Anastasov   * A lot of code is taken from net/core/gen_estimator.c
47705dd344SJulian Anastasov 
48705dd344SJulian Anastasov   KEY POINTS:
49705dd344SJulian Anastasov   - cpustats counters are updated per-cpu in SoftIRQ context with BH disabled
50705dd344SJulian Anastasov   - kthreads read the cpustats to update the estimators (svcs, dests, total)
51705dd344SJulian Anastasov   - the states of estimators can be read (get stats) or modified (zero stats)
52705dd344SJulian Anastasov     from processes
53705dd344SJulian Anastasov 
54705dd344SJulian Anastasov   KTHREADS:
55705dd344SJulian Anastasov   - estimators are added initially to est_temp_list and later kthread 0
56705dd344SJulian Anastasov     distributes them to one or many kthreads for estimation
57705dd344SJulian Anastasov   - kthread contexts are created and attached to array
58705dd344SJulian Anastasov   - the kthread tasks are started when first service is added, before that
59705dd344SJulian Anastasov     the total stats are not estimated
60f0be83d5SJulian Anastasov   - when configuration (cpulist/nice) is changed, the tasks are restarted
61f0be83d5SJulian Anastasov     by work (est_reload_work)
62f0be83d5SJulian Anastasov   - kthread tasks are stopped while the cpulist is empty
63705dd344SJulian Anastasov   - the kthread context holds lists with estimators (chains) which are
64705dd344SJulian Anastasov     processed every 2 seconds
65705dd344SJulian Anastasov   - as estimators can be added dynamically and in bursts, we try to spread
66705dd344SJulian Anastasov     them to multiple chains which are estimated at different time
67705dd344SJulian Anastasov   - on start, kthread 0 enters calculation phase to determine the chain limits
68705dd344SJulian Anastasov     and the limit of estimators per kthread
69705dd344SJulian Anastasov   - est_add_ktid: ktid where to add new ests, can point to empty slot where
70705dd344SJulian Anastasov     we should add kt data
71cb7f6a7bSJulius Volz  */
72cb7f6a7bSJulius Volz 
73705dd344SJulian Anastasov static struct lock_class_key __ipvs_est_key;
74cb7f6a7bSJulius Volz 
75705dd344SJulian Anastasov static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs);
76705dd344SJulian Anastasov static void ip_vs_est_drain_temp_list(struct netns_ipvs *ipvs);
77b17fc996SHans Schillstrom 
ip_vs_chain_estimation(struct hlist_head * chain)78705dd344SJulian Anastasov static void ip_vs_chain_estimation(struct hlist_head *chain)
79cb7f6a7bSJulius Volz {
80cb7f6a7bSJulius Volz 	struct ip_vs_estimator *e;
81705dd344SJulian Anastasov 	struct ip_vs_cpu_stats *c;
82cb7f6a7bSJulius Volz 	struct ip_vs_stats *s;
83cd67cd5eSJulian Anastasov 	u64 rate;
84cb7f6a7bSJulius Volz 
85705dd344SJulian Anastasov 	hlist_for_each_entry_rcu(e, chain, list) {
86705dd344SJulian Anastasov 		u64 conns, inpkts, outpkts, inbytes, outbytes;
87705dd344SJulian Anastasov 		u64 kconns = 0, kinpkts = 0, koutpkts = 0;
88705dd344SJulian Anastasov 		u64 kinbytes = 0, koutbytes = 0;
89705dd344SJulian Anastasov 		unsigned int start;
90705dd344SJulian Anastasov 		int i;
912232642eSDust Li 
92705dd344SJulian Anastasov 		if (kthread_should_stop())
93705dd344SJulian Anastasov 			break;
94705dd344SJulian Anastasov 
95cb7f6a7bSJulius Volz 		s = container_of(e, struct ip_vs_stats, est);
96705dd344SJulian Anastasov 		for_each_possible_cpu(i) {
97705dd344SJulian Anastasov 			c = per_cpu_ptr(s->cpustats, i);
98705dd344SJulian Anastasov 			do {
99705dd344SJulian Anastasov 				start = u64_stats_fetch_begin(&c->syncp);
100705dd344SJulian Anastasov 				conns = u64_stats_read(&c->cnt.conns);
101705dd344SJulian Anastasov 				inpkts = u64_stats_read(&c->cnt.inpkts);
102705dd344SJulian Anastasov 				outpkts = u64_stats_read(&c->cnt.outpkts);
103705dd344SJulian Anastasov 				inbytes = u64_stats_read(&c->cnt.inbytes);
104705dd344SJulian Anastasov 				outbytes = u64_stats_read(&c->cnt.outbytes);
105705dd344SJulian Anastasov 			} while (u64_stats_fetch_retry(&c->syncp, start));
106705dd344SJulian Anastasov 			kconns += conns;
107705dd344SJulian Anastasov 			kinpkts += inpkts;
108705dd344SJulian Anastasov 			koutpkts += outpkts;
109705dd344SJulian Anastasov 			kinbytes += inbytes;
110705dd344SJulian Anastasov 			koutbytes += outbytes;
111705dd344SJulian Anastasov 		}
112cb7f6a7bSJulius Volz 
113cb7f6a7bSJulius Volz 		spin_lock(&s->lock);
114705dd344SJulian Anastasov 
115705dd344SJulian Anastasov 		s->kstats.conns = kconns;
116705dd344SJulian Anastasov 		s->kstats.inpkts = kinpkts;
117705dd344SJulian Anastasov 		s->kstats.outpkts = koutpkts;
118705dd344SJulian Anastasov 		s->kstats.inbytes = kinbytes;
119705dd344SJulian Anastasov 		s->kstats.outbytes = koutbytes;
120cb7f6a7bSJulius Volz 
121cb7f6a7bSJulius Volz 		/* scaled by 2^10, but divided 2 seconds */
122cd67cd5eSJulian Anastasov 		rate = (s->kstats.conns - e->last_conns) << 9;
123cd67cd5eSJulian Anastasov 		e->last_conns = s->kstats.conns;
124cd67cd5eSJulian Anastasov 		e->cps += ((s64)rate - (s64)e->cps) >> 2;
125cb7f6a7bSJulius Volz 
126cd67cd5eSJulian Anastasov 		rate = (s->kstats.inpkts - e->last_inpkts) << 9;
127cd67cd5eSJulian Anastasov 		e->last_inpkts = s->kstats.inpkts;
128cd67cd5eSJulian Anastasov 		e->inpps += ((s64)rate - (s64)e->inpps) >> 2;
129cb7f6a7bSJulius Volz 
130cd67cd5eSJulian Anastasov 		rate = (s->kstats.outpkts - e->last_outpkts) << 9;
131cd67cd5eSJulian Anastasov 		e->last_outpkts = s->kstats.outpkts;
132cd67cd5eSJulian Anastasov 		e->outpps += ((s64)rate - (s64)e->outpps) >> 2;
133cb7f6a7bSJulius Volz 
134cd67cd5eSJulian Anastasov 		/* scaled by 2^5, but divided 2 seconds */
135cd67cd5eSJulian Anastasov 		rate = (s->kstats.inbytes - e->last_inbytes) << 4;
136cd67cd5eSJulian Anastasov 		e->last_inbytes = s->kstats.inbytes;
137cd67cd5eSJulian Anastasov 		e->inbps += ((s64)rate - (s64)e->inbps) >> 2;
138cb7f6a7bSJulius Volz 
139cd67cd5eSJulian Anastasov 		rate = (s->kstats.outbytes - e->last_outbytes) << 4;
140cd67cd5eSJulian Anastasov 		e->last_outbytes = s->kstats.outbytes;
141cd67cd5eSJulian Anastasov 		e->outbps += ((s64)rate - (s64)e->outbps) >> 2;
142cb7f6a7bSJulius Volz 		spin_unlock(&s->lock);
143cb7f6a7bSJulius Volz 	}
144cb7f6a7bSJulius Volz }
145cb7f6a7bSJulius Volz 
ip_vs_tick_estimation(struct ip_vs_est_kt_data * kd,int row)146705dd344SJulian Anastasov static void ip_vs_tick_estimation(struct ip_vs_est_kt_data *kd, int row)
147705dd344SJulian Anastasov {
148705dd344SJulian Anastasov 	struct ip_vs_est_tick_data *td;
149705dd344SJulian Anastasov 	int cid;
150705dd344SJulian Anastasov 
151705dd344SJulian Anastasov 	rcu_read_lock();
152705dd344SJulian Anastasov 	td = rcu_dereference(kd->ticks[row]);
153705dd344SJulian Anastasov 	if (!td)
154705dd344SJulian Anastasov 		goto out;
155705dd344SJulian Anastasov 	for_each_set_bit(cid, td->present, IPVS_EST_TICK_CHAINS) {
156705dd344SJulian Anastasov 		if (kthread_should_stop())
157705dd344SJulian Anastasov 			break;
158705dd344SJulian Anastasov 		ip_vs_chain_estimation(&td->chains[cid]);
159705dd344SJulian Anastasov 		cond_resched_rcu();
160705dd344SJulian Anastasov 		td = rcu_dereference(kd->ticks[row]);
161705dd344SJulian Anastasov 		if (!td)
162705dd344SJulian Anastasov 			break;
163705dd344SJulian Anastasov 	}
164705dd344SJulian Anastasov 
165705dd344SJulian Anastasov out:
166705dd344SJulian Anastasov 	rcu_read_unlock();
167705dd344SJulian Anastasov }
168705dd344SJulian Anastasov 
ip_vs_estimation_kthread(void * data)169705dd344SJulian Anastasov static int ip_vs_estimation_kthread(void *data)
170705dd344SJulian Anastasov {
171705dd344SJulian Anastasov 	struct ip_vs_est_kt_data *kd = data;
172705dd344SJulian Anastasov 	struct netns_ipvs *ipvs = kd->ipvs;
173705dd344SJulian Anastasov 	int row = kd->est_row;
174705dd344SJulian Anastasov 	unsigned long now;
175705dd344SJulian Anastasov 	int id = kd->id;
176705dd344SJulian Anastasov 	long gap;
177705dd344SJulian Anastasov 
178705dd344SJulian Anastasov 	if (id > 0) {
179705dd344SJulian Anastasov 		if (!ipvs->est_chain_max)
180705dd344SJulian Anastasov 			return 0;
181705dd344SJulian Anastasov 	} else {
182705dd344SJulian Anastasov 		if (!ipvs->est_chain_max) {
183705dd344SJulian Anastasov 			ipvs->est_calc_phase = 1;
184705dd344SJulian Anastasov 			/* commit est_calc_phase before reading est_genid */
185705dd344SJulian Anastasov 			smp_mb();
186705dd344SJulian Anastasov 		}
187705dd344SJulian Anastasov 
188705dd344SJulian Anastasov 		/* kthread 0 will handle the calc phase */
189705dd344SJulian Anastasov 		if (ipvs->est_calc_phase)
190705dd344SJulian Anastasov 			ip_vs_est_calc_phase(ipvs);
191705dd344SJulian Anastasov 	}
192705dd344SJulian Anastasov 
193705dd344SJulian Anastasov 	while (1) {
194705dd344SJulian Anastasov 		if (!id && !hlist_empty(&ipvs->est_temp_list))
195705dd344SJulian Anastasov 			ip_vs_est_drain_temp_list(ipvs);
196705dd344SJulian Anastasov 		set_current_state(TASK_IDLE);
197705dd344SJulian Anastasov 		if (kthread_should_stop())
198705dd344SJulian Anastasov 			break;
199705dd344SJulian Anastasov 
200705dd344SJulian Anastasov 		/* before estimation, check if we should sleep */
201705dd344SJulian Anastasov 		now = jiffies;
202705dd344SJulian Anastasov 		gap = kd->est_timer - now;
203705dd344SJulian Anastasov 		if (gap > 0) {
204705dd344SJulian Anastasov 			if (gap > IPVS_EST_TICK) {
205705dd344SJulian Anastasov 				kd->est_timer = now - IPVS_EST_TICK;
206705dd344SJulian Anastasov 				gap = IPVS_EST_TICK;
207705dd344SJulian Anastasov 			}
208705dd344SJulian Anastasov 			schedule_timeout(gap);
209705dd344SJulian Anastasov 		} else {
210705dd344SJulian Anastasov 			__set_current_state(TASK_RUNNING);
211705dd344SJulian Anastasov 			if (gap < -8 * IPVS_EST_TICK)
212705dd344SJulian Anastasov 				kd->est_timer = now;
213705dd344SJulian Anastasov 		}
214705dd344SJulian Anastasov 
215144361c1SJulian Anastasov 		if (kd->tick_len[row])
216705dd344SJulian Anastasov 			ip_vs_tick_estimation(kd, row);
217705dd344SJulian Anastasov 
218705dd344SJulian Anastasov 		row++;
219705dd344SJulian Anastasov 		if (row >= IPVS_EST_NTICKS)
220705dd344SJulian Anastasov 			row = 0;
221705dd344SJulian Anastasov 		WRITE_ONCE(kd->est_row, row);
222705dd344SJulian Anastasov 		kd->est_timer += IPVS_EST_TICK;
223705dd344SJulian Anastasov 	}
224705dd344SJulian Anastasov 	__set_current_state(TASK_RUNNING);
225705dd344SJulian Anastasov 
226705dd344SJulian Anastasov 	return 0;
227705dd344SJulian Anastasov }
228705dd344SJulian Anastasov 
229705dd344SJulian Anastasov /* Schedule stop/start for kthread tasks */
ip_vs_est_reload_start(struct netns_ipvs * ipvs)230705dd344SJulian Anastasov void ip_vs_est_reload_start(struct netns_ipvs *ipvs)
231705dd344SJulian Anastasov {
232705dd344SJulian Anastasov 	/* Ignore reloads before first service is added */
233705dd344SJulian Anastasov 	if (!ipvs->enable)
234705dd344SJulian Anastasov 		return;
235f0be83d5SJulian Anastasov 	ip_vs_est_stopped_recalc(ipvs);
236705dd344SJulian Anastasov 	/* Bump the kthread configuration genid */
237705dd344SJulian Anastasov 	atomic_inc(&ipvs->est_genid);
238705dd344SJulian Anastasov 	queue_delayed_work(system_long_wq, &ipvs->est_reload_work, 0);
239705dd344SJulian Anastasov }
240705dd344SJulian Anastasov 
241705dd344SJulian Anastasov /* Start kthread task with current configuration */
ip_vs_est_kthread_start(struct netns_ipvs * ipvs,struct ip_vs_est_kt_data * kd)242705dd344SJulian Anastasov int ip_vs_est_kthread_start(struct netns_ipvs *ipvs,
243705dd344SJulian Anastasov 			    struct ip_vs_est_kt_data *kd)
244705dd344SJulian Anastasov {
245705dd344SJulian Anastasov 	unsigned long now;
246705dd344SJulian Anastasov 	int ret = 0;
247705dd344SJulian Anastasov 	long gap;
248705dd344SJulian Anastasov 
249705dd344SJulian Anastasov 	lockdep_assert_held(&ipvs->est_mutex);
250705dd344SJulian Anastasov 
251705dd344SJulian Anastasov 	if (kd->task)
252705dd344SJulian Anastasov 		goto out;
253705dd344SJulian Anastasov 	now = jiffies;
254705dd344SJulian Anastasov 	gap = kd->est_timer - now;
255705dd344SJulian Anastasov 	/* Sync est_timer if task is starting later */
256705dd344SJulian Anastasov 	if (abs(gap) > 4 * IPVS_EST_TICK)
257705dd344SJulian Anastasov 		kd->est_timer = now;
258705dd344SJulian Anastasov 	kd->task = kthread_create(ip_vs_estimation_kthread, kd, "ipvs-e:%d:%d",
259705dd344SJulian Anastasov 				  ipvs->gen, kd->id);
260705dd344SJulian Anastasov 	if (IS_ERR(kd->task)) {
261705dd344SJulian Anastasov 		ret = PTR_ERR(kd->task);
262705dd344SJulian Anastasov 		kd->task = NULL;
263705dd344SJulian Anastasov 		goto out;
264705dd344SJulian Anastasov 	}
265705dd344SJulian Anastasov 
266f0be83d5SJulian Anastasov 	set_user_nice(kd->task, sysctl_est_nice(ipvs));
267f0be83d5SJulian Anastasov 	set_cpus_allowed_ptr(kd->task, sysctl_est_cpulist(ipvs));
268f0be83d5SJulian Anastasov 
269705dd344SJulian Anastasov 	pr_info("starting estimator thread %d...\n", kd->id);
270705dd344SJulian Anastasov 	wake_up_process(kd->task);
271705dd344SJulian Anastasov 
272705dd344SJulian Anastasov out:
273705dd344SJulian Anastasov 	return ret;
274705dd344SJulian Anastasov }
275705dd344SJulian Anastasov 
ip_vs_est_kthread_stop(struct ip_vs_est_kt_data * kd)276705dd344SJulian Anastasov void ip_vs_est_kthread_stop(struct ip_vs_est_kt_data *kd)
277705dd344SJulian Anastasov {
278705dd344SJulian Anastasov 	if (kd->task) {
279705dd344SJulian Anastasov 		pr_info("stopping estimator thread %d...\n", kd->id);
280705dd344SJulian Anastasov 		kthread_stop(kd->task);
281705dd344SJulian Anastasov 		kd->task = NULL;
282705dd344SJulian Anastasov 	}
283705dd344SJulian Anastasov }
284705dd344SJulian Anastasov 
285705dd344SJulian Anastasov /* Apply parameters to kthread */
ip_vs_est_set_params(struct netns_ipvs * ipvs,struct ip_vs_est_kt_data * kd)286705dd344SJulian Anastasov static void ip_vs_est_set_params(struct netns_ipvs *ipvs,
287705dd344SJulian Anastasov 				 struct ip_vs_est_kt_data *kd)
288705dd344SJulian Anastasov {
289705dd344SJulian Anastasov 	kd->chain_max = ipvs->est_chain_max;
290705dd344SJulian Anastasov 	/* We are using single chain on RCU preemption */
291705dd344SJulian Anastasov 	if (IPVS_EST_TICK_CHAINS == 1)
292705dd344SJulian Anastasov 		kd->chain_max *= IPVS_EST_CHAIN_FACTOR;
293705dd344SJulian Anastasov 	kd->tick_max = IPVS_EST_TICK_CHAINS * kd->chain_max;
294705dd344SJulian Anastasov 	kd->est_max_count = IPVS_EST_NTICKS * kd->tick_max;
295705dd344SJulian Anastasov }
296705dd344SJulian Anastasov 
297705dd344SJulian Anastasov /* Create and start estimation kthread in a free or new array slot */
ip_vs_est_add_kthread(struct netns_ipvs * ipvs)298705dd344SJulian Anastasov static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
299705dd344SJulian Anastasov {
300705dd344SJulian Anastasov 	struct ip_vs_est_kt_data *kd = NULL;
301705dd344SJulian Anastasov 	int id = ipvs->est_kt_count;
302705dd344SJulian Anastasov 	int ret = -ENOMEM;
303705dd344SJulian Anastasov 	void *arr = NULL;
304705dd344SJulian Anastasov 	int i;
305705dd344SJulian Anastasov 
306705dd344SJulian Anastasov 	if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&
307705dd344SJulian Anastasov 	    ipvs->enable && ipvs->est_max_threads)
308705dd344SJulian Anastasov 		return -EINVAL;
309705dd344SJulian Anastasov 
310705dd344SJulian Anastasov 	mutex_lock(&ipvs->est_mutex);
311705dd344SJulian Anastasov 
312705dd344SJulian Anastasov 	for (i = 0; i < id; i++) {
313705dd344SJulian Anastasov 		if (!ipvs->est_kt_arr[i])
314705dd344SJulian Anastasov 			break;
315705dd344SJulian Anastasov 	}
316705dd344SJulian Anastasov 	if (i >= id) {
317705dd344SJulian Anastasov 		arr = krealloc_array(ipvs->est_kt_arr, id + 1,
318705dd344SJulian Anastasov 				     sizeof(struct ip_vs_est_kt_data *),
319705dd344SJulian Anastasov 				     GFP_KERNEL);
320705dd344SJulian Anastasov 		if (!arr)
321705dd344SJulian Anastasov 			goto out;
322705dd344SJulian Anastasov 		ipvs->est_kt_arr = arr;
323705dd344SJulian Anastasov 	} else {
324705dd344SJulian Anastasov 		id = i;
325705dd344SJulian Anastasov 	}
326705dd344SJulian Anastasov 
327705dd344SJulian Anastasov 	kd = kzalloc(sizeof(*kd), GFP_KERNEL);
328705dd344SJulian Anastasov 	if (!kd)
329705dd344SJulian Anastasov 		goto out;
330705dd344SJulian Anastasov 	kd->ipvs = ipvs;
331705dd344SJulian Anastasov 	bitmap_fill(kd->avail, IPVS_EST_NTICKS);
332705dd344SJulian Anastasov 	kd->est_timer = jiffies;
333705dd344SJulian Anastasov 	kd->id = id;
334705dd344SJulian Anastasov 	ip_vs_est_set_params(ipvs, kd);
335705dd344SJulian Anastasov 
336705dd344SJulian Anastasov 	/* Pre-allocate stats used in calc phase */
337705dd344SJulian Anastasov 	if (!id && !kd->calc_stats) {
338705dd344SJulian Anastasov 		kd->calc_stats = ip_vs_stats_alloc();
339705dd344SJulian Anastasov 		if (!kd->calc_stats)
340705dd344SJulian Anastasov 			goto out;
341705dd344SJulian Anastasov 	}
342705dd344SJulian Anastasov 
343705dd344SJulian Anastasov 	/* Start kthread tasks only when services are present */
344f0be83d5SJulian Anastasov 	if (ipvs->enable && !ip_vs_est_stopped(ipvs)) {
345705dd344SJulian Anastasov 		ret = ip_vs_est_kthread_start(ipvs, kd);
346705dd344SJulian Anastasov 		if (ret < 0)
347705dd344SJulian Anastasov 			goto out;
348705dd344SJulian Anastasov 	}
349705dd344SJulian Anastasov 
350705dd344SJulian Anastasov 	if (arr)
351705dd344SJulian Anastasov 		ipvs->est_kt_count++;
352705dd344SJulian Anastasov 	ipvs->est_kt_arr[id] = kd;
353705dd344SJulian Anastasov 	kd = NULL;
354705dd344SJulian Anastasov 	/* Use most recent kthread for new ests */
355705dd344SJulian Anastasov 	ipvs->est_add_ktid = id;
356705dd344SJulian Anastasov 	ret = 0;
357705dd344SJulian Anastasov 
358705dd344SJulian Anastasov out:
359705dd344SJulian Anastasov 	mutex_unlock(&ipvs->est_mutex);
360705dd344SJulian Anastasov 	if (kd) {
361705dd344SJulian Anastasov 		ip_vs_stats_free(kd->calc_stats);
362705dd344SJulian Anastasov 		kfree(kd);
363705dd344SJulian Anastasov 	}
364705dd344SJulian Anastasov 
365705dd344SJulian Anastasov 	return ret;
366705dd344SJulian Anastasov }
367705dd344SJulian Anastasov 
368705dd344SJulian Anastasov /* Select ktid where to add new ests: available, unused or new slot */
ip_vs_est_update_ktid(struct netns_ipvs * ipvs)369705dd344SJulian Anastasov static void ip_vs_est_update_ktid(struct netns_ipvs *ipvs)
370705dd344SJulian Anastasov {
371705dd344SJulian Anastasov 	int ktid, best = ipvs->est_kt_count;
372705dd344SJulian Anastasov 	struct ip_vs_est_kt_data *kd;
373705dd344SJulian Anastasov 
374705dd344SJulian Anastasov 	for (ktid = 0; ktid < ipvs->est_kt_count; ktid++) {
375705dd344SJulian Anastasov 		kd = ipvs->est_kt_arr[ktid];
376705dd344SJulian Anastasov 		if (kd) {
377705dd344SJulian Anastasov 			if (kd->est_count < kd->est_max_count) {
378705dd344SJulian Anastasov 				best = ktid;
379705dd344SJulian Anastasov 				break;
380705dd344SJulian Anastasov 			}
381705dd344SJulian Anastasov 		} else if (ktid < best) {
382705dd344SJulian Anastasov 			best = ktid;
383705dd344SJulian Anastasov 		}
384705dd344SJulian Anastasov 	}
385705dd344SJulian Anastasov 	ipvs->est_add_ktid = best;
386705dd344SJulian Anastasov }
387705dd344SJulian Anastasov 
388705dd344SJulian Anastasov /* Add estimator to current kthread (est_add_ktid) */
ip_vs_enqueue_estimator(struct netns_ipvs * ipvs,struct ip_vs_estimator * est)389705dd344SJulian Anastasov static int ip_vs_enqueue_estimator(struct netns_ipvs *ipvs,
390705dd344SJulian Anastasov 				   struct ip_vs_estimator *est)
391705dd344SJulian Anastasov {
392705dd344SJulian Anastasov 	struct ip_vs_est_kt_data *kd = NULL;
393705dd344SJulian Anastasov 	struct ip_vs_est_tick_data *td;
394705dd344SJulian Anastasov 	int ktid, row, crow, cid, ret;
395705dd344SJulian Anastasov 	int delay = est->ktrow;
396705dd344SJulian Anastasov 
397705dd344SJulian Anastasov 	BUILD_BUG_ON_MSG(IPVS_EST_TICK_CHAINS > 127,
398705dd344SJulian Anastasov 			 "Too many chains for ktcid");
399705dd344SJulian Anastasov 
400705dd344SJulian Anastasov 	if (ipvs->est_add_ktid < ipvs->est_kt_count) {
401705dd344SJulian Anastasov 		kd = ipvs->est_kt_arr[ipvs->est_add_ktid];
402705dd344SJulian Anastasov 		if (kd)
403705dd344SJulian Anastasov 			goto add_est;
404705dd344SJulian Anastasov 	}
405705dd344SJulian Anastasov 
406705dd344SJulian Anastasov 	ret = ip_vs_est_add_kthread(ipvs);
407705dd344SJulian Anastasov 	if (ret < 0)
408705dd344SJulian Anastasov 		goto out;
409705dd344SJulian Anastasov 	kd = ipvs->est_kt_arr[ipvs->est_add_ktid];
410705dd344SJulian Anastasov 
411705dd344SJulian Anastasov add_est:
412705dd344SJulian Anastasov 	ktid = kd->id;
413705dd344SJulian Anastasov 	/* For small number of estimators prefer to use few ticks,
414705dd344SJulian Anastasov 	 * otherwise try to add into the last estimated row.
415705dd344SJulian Anastasov 	 * est_row and add_row point after the row we should use
416705dd344SJulian Anastasov 	 */
417705dd344SJulian Anastasov 	if (kd->est_count >= 2 * kd->tick_max || delay < IPVS_EST_NTICKS - 1)
418705dd344SJulian Anastasov 		crow = READ_ONCE(kd->est_row);
419705dd344SJulian Anastasov 	else
420705dd344SJulian Anastasov 		crow = kd->add_row;
421705dd344SJulian Anastasov 	crow += delay;
422705dd344SJulian Anastasov 	if (crow >= IPVS_EST_NTICKS)
423705dd344SJulian Anastasov 		crow -= IPVS_EST_NTICKS;
424705dd344SJulian Anastasov 	/* Assume initial delay ? */
425705dd344SJulian Anastasov 	if (delay >= IPVS_EST_NTICKS - 1) {
426705dd344SJulian Anastasov 		/* Preserve initial delay or decrease it if no space in tick */
427705dd344SJulian Anastasov 		row = crow;
428705dd344SJulian Anastasov 		if (crow < IPVS_EST_NTICKS - 1) {
429705dd344SJulian Anastasov 			crow++;
430705dd344SJulian Anastasov 			row = find_last_bit(kd->avail, crow);
431705dd344SJulian Anastasov 		}
432705dd344SJulian Anastasov 		if (row >= crow)
433705dd344SJulian Anastasov 			row = find_last_bit(kd->avail, IPVS_EST_NTICKS);
434705dd344SJulian Anastasov 	} else {
435705dd344SJulian Anastasov 		/* Preserve delay or increase it if no space in tick */
436705dd344SJulian Anastasov 		row = IPVS_EST_NTICKS;
437705dd344SJulian Anastasov 		if (crow > 0)
438705dd344SJulian Anastasov 			row = find_next_bit(kd->avail, IPVS_EST_NTICKS, crow);
439705dd344SJulian Anastasov 		if (row >= IPVS_EST_NTICKS)
440705dd344SJulian Anastasov 			row = find_first_bit(kd->avail, IPVS_EST_NTICKS);
441705dd344SJulian Anastasov 	}
442705dd344SJulian Anastasov 
443705dd344SJulian Anastasov 	td = rcu_dereference_protected(kd->ticks[row], 1);
444705dd344SJulian Anastasov 	if (!td) {
445705dd344SJulian Anastasov 		td = kzalloc(sizeof(*td), GFP_KERNEL);
446705dd344SJulian Anastasov 		if (!td) {
447705dd344SJulian Anastasov 			ret = -ENOMEM;
448705dd344SJulian Anastasov 			goto out;
449705dd344SJulian Anastasov 		}
450705dd344SJulian Anastasov 		rcu_assign_pointer(kd->ticks[row], td);
451705dd344SJulian Anastasov 	}
452705dd344SJulian Anastasov 
453705dd344SJulian Anastasov 	cid = find_first_zero_bit(td->full, IPVS_EST_TICK_CHAINS);
454705dd344SJulian Anastasov 
455705dd344SJulian Anastasov 	kd->est_count++;
456705dd344SJulian Anastasov 	kd->tick_len[row]++;
457705dd344SJulian Anastasov 	if (!td->chain_len[cid])
458705dd344SJulian Anastasov 		__set_bit(cid, td->present);
459705dd344SJulian Anastasov 	td->chain_len[cid]++;
460705dd344SJulian Anastasov 	est->ktid = ktid;
461705dd344SJulian Anastasov 	est->ktrow = row;
462705dd344SJulian Anastasov 	est->ktcid = cid;
463705dd344SJulian Anastasov 	hlist_add_head_rcu(&est->list, &td->chains[cid]);
464705dd344SJulian Anastasov 
465705dd344SJulian Anastasov 	if (td->chain_len[cid] >= kd->chain_max) {
466705dd344SJulian Anastasov 		__set_bit(cid, td->full);
467705dd344SJulian Anastasov 		if (kd->tick_len[row] >= kd->tick_max)
468705dd344SJulian Anastasov 			__clear_bit(row, kd->avail);
469705dd344SJulian Anastasov 	}
470705dd344SJulian Anastasov 
471705dd344SJulian Anastasov 	/* Update est_add_ktid to point to first available/empty kt slot */
472705dd344SJulian Anastasov 	if (kd->est_count == kd->est_max_count)
473705dd344SJulian Anastasov 		ip_vs_est_update_ktid(ipvs);
474705dd344SJulian Anastasov 
475705dd344SJulian Anastasov 	ret = 0;
476705dd344SJulian Anastasov 
477705dd344SJulian Anastasov out:
478705dd344SJulian Anastasov 	return ret;
479705dd344SJulian Anastasov }
480705dd344SJulian Anastasov 
481705dd344SJulian Anastasov /* Start estimation for stats */
ip_vs_start_estimator(struct netns_ipvs * ipvs,struct ip_vs_stats * stats)482705dd344SJulian Anastasov int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
483cb7f6a7bSJulius Volz {
484cb7f6a7bSJulius Volz 	struct ip_vs_estimator *est = &stats->est;
485705dd344SJulian Anastasov 	int ret;
486cb7f6a7bSJulius Volz 
487705dd344SJulian Anastasov 	if (!ipvs->est_max_threads && ipvs->enable)
488f0be83d5SJulian Anastasov 		ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
489cb7f6a7bSJulius Volz 
490705dd344SJulian Anastasov 	est->ktid = -1;
491705dd344SJulian Anastasov 	est->ktrow = IPVS_EST_NTICKS - 1;	/* Initial delay */
492705dd344SJulian Anastasov 
493705dd344SJulian Anastasov 	/* We prefer this code to be short, kthread 0 will requeue the
494705dd344SJulian Anastasov 	 * estimator to available chain. If tasks are disabled, we
495705dd344SJulian Anastasov 	 * will not allocate much memory, just for kt 0.
496705dd344SJulian Anastasov 	 */
497705dd344SJulian Anastasov 	ret = 0;
498705dd344SJulian Anastasov 	if (!ipvs->est_kt_count || !ipvs->est_kt_arr[0])
499705dd344SJulian Anastasov 		ret = ip_vs_est_add_kthread(ipvs);
500705dd344SJulian Anastasov 	if (ret >= 0)
501705dd344SJulian Anastasov 		hlist_add_head(&est->list, &ipvs->est_temp_list);
502705dd344SJulian Anastasov 	else
503705dd344SJulian Anastasov 		INIT_HLIST_NODE(&est->list);
504705dd344SJulian Anastasov 	return ret;
505cb7f6a7bSJulius Volz }
506cb7f6a7bSJulius Volz 
ip_vs_est_kthread_destroy(struct ip_vs_est_kt_data * kd)507705dd344SJulian Anastasov static void ip_vs_est_kthread_destroy(struct ip_vs_est_kt_data *kd)
508705dd344SJulian Anastasov {
509705dd344SJulian Anastasov 	if (kd) {
510705dd344SJulian Anastasov 		if (kd->task) {
511705dd344SJulian Anastasov 			pr_info("stop unused estimator thread %d...\n", kd->id);
512705dd344SJulian Anastasov 			kthread_stop(kd->task);
513705dd344SJulian Anastasov 		}
514705dd344SJulian Anastasov 		ip_vs_stats_free(kd->calc_stats);
515705dd344SJulian Anastasov 		kfree(kd);
516705dd344SJulian Anastasov 	}
517705dd344SJulian Anastasov }
518705dd344SJulian Anastasov 
519705dd344SJulian Anastasov /* Unlink estimator from chain */
ip_vs_stop_estimator(struct netns_ipvs * ipvs,struct ip_vs_stats * stats)5200f34d54bSEric W. Biederman void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
521cb7f6a7bSJulius Volz {
522cb7f6a7bSJulius Volz 	struct ip_vs_estimator *est = &stats->est;
523705dd344SJulian Anastasov 	struct ip_vs_est_tick_data *td;
524705dd344SJulian Anastasov 	struct ip_vs_est_kt_data *kd;
525705dd344SJulian Anastasov 	int ktid = est->ktid;
526705dd344SJulian Anastasov 	int row = est->ktrow;
527705dd344SJulian Anastasov 	int cid = est->ktcid;
528cb7f6a7bSJulius Volz 
529705dd344SJulian Anastasov 	/* Failed to add to chain ? */
530705dd344SJulian Anastasov 	if (hlist_unhashed(&est->list))
531705dd344SJulian Anastasov 		return;
532705dd344SJulian Anastasov 
533705dd344SJulian Anastasov 	/* On return, estimator can be freed, dequeue it now */
534705dd344SJulian Anastasov 
535705dd344SJulian Anastasov 	/* In est_temp_list ? */
536705dd344SJulian Anastasov 	if (ktid < 0) {
537705dd344SJulian Anastasov 		hlist_del(&est->list);
538705dd344SJulian Anastasov 		goto end_kt0;
539705dd344SJulian Anastasov 	}
540705dd344SJulian Anastasov 
541705dd344SJulian Anastasov 	hlist_del_rcu(&est->list);
542705dd344SJulian Anastasov 	kd = ipvs->est_kt_arr[ktid];
543705dd344SJulian Anastasov 	td = rcu_dereference_protected(kd->ticks[row], 1);
544705dd344SJulian Anastasov 	__clear_bit(cid, td->full);
545705dd344SJulian Anastasov 	td->chain_len[cid]--;
546705dd344SJulian Anastasov 	if (!td->chain_len[cid])
547705dd344SJulian Anastasov 		__clear_bit(cid, td->present);
548705dd344SJulian Anastasov 	kd->tick_len[row]--;
549705dd344SJulian Anastasov 	__set_bit(row, kd->avail);
550705dd344SJulian Anastasov 	if (!kd->tick_len[row]) {
551705dd344SJulian Anastasov 		RCU_INIT_POINTER(kd->ticks[row], NULL);
552*e4d0fe71SJulian Anastasov 		kfree_rcu(td, rcu_head);
553705dd344SJulian Anastasov 	}
554705dd344SJulian Anastasov 	kd->est_count--;
555705dd344SJulian Anastasov 	if (kd->est_count) {
556705dd344SJulian Anastasov 		/* This kt slot can become available just now, prefer it */
557705dd344SJulian Anastasov 		if (ktid < ipvs->est_add_ktid)
558705dd344SJulian Anastasov 			ipvs->est_add_ktid = ktid;
559705dd344SJulian Anastasov 		return;
560705dd344SJulian Anastasov 	}
561705dd344SJulian Anastasov 
562705dd344SJulian Anastasov 	if (ktid > 0) {
563705dd344SJulian Anastasov 		mutex_lock(&ipvs->est_mutex);
564705dd344SJulian Anastasov 		ip_vs_est_kthread_destroy(kd);
565705dd344SJulian Anastasov 		ipvs->est_kt_arr[ktid] = NULL;
566705dd344SJulian Anastasov 		if (ktid == ipvs->est_kt_count - 1) {
567705dd344SJulian Anastasov 			ipvs->est_kt_count--;
568705dd344SJulian Anastasov 			while (ipvs->est_kt_count > 1 &&
569705dd344SJulian Anastasov 			       !ipvs->est_kt_arr[ipvs->est_kt_count - 1])
570705dd344SJulian Anastasov 				ipvs->est_kt_count--;
571705dd344SJulian Anastasov 		}
572705dd344SJulian Anastasov 		mutex_unlock(&ipvs->est_mutex);
573705dd344SJulian Anastasov 
574705dd344SJulian Anastasov 		/* This slot is now empty, prefer another available kt slot */
575705dd344SJulian Anastasov 		if (ktid == ipvs->est_add_ktid)
576705dd344SJulian Anastasov 			ip_vs_est_update_ktid(ipvs);
577705dd344SJulian Anastasov 	}
578705dd344SJulian Anastasov 
579705dd344SJulian Anastasov end_kt0:
580705dd344SJulian Anastasov 	/* kt 0 is freed after all other kthreads and chains are empty */
581705dd344SJulian Anastasov 	if (ipvs->est_kt_count == 1 && hlist_empty(&ipvs->est_temp_list)) {
582705dd344SJulian Anastasov 		kd = ipvs->est_kt_arr[0];
583705dd344SJulian Anastasov 		if (!kd || !kd->est_count) {
584705dd344SJulian Anastasov 			mutex_lock(&ipvs->est_mutex);
585705dd344SJulian Anastasov 			if (kd) {
586705dd344SJulian Anastasov 				ip_vs_est_kthread_destroy(kd);
587705dd344SJulian Anastasov 				ipvs->est_kt_arr[0] = NULL;
588705dd344SJulian Anastasov 			}
589705dd344SJulian Anastasov 			ipvs->est_kt_count--;
590705dd344SJulian Anastasov 			mutex_unlock(&ipvs->est_mutex);
591705dd344SJulian Anastasov 			ipvs->est_add_ktid = 0;
592705dd344SJulian Anastasov 		}
593705dd344SJulian Anastasov 	}
594705dd344SJulian Anastasov }
595705dd344SJulian Anastasov 
596705dd344SJulian Anastasov /* Register all ests from est_temp_list to kthreads */
ip_vs_est_drain_temp_list(struct netns_ipvs * ipvs)597705dd344SJulian Anastasov static void ip_vs_est_drain_temp_list(struct netns_ipvs *ipvs)
598705dd344SJulian Anastasov {
599705dd344SJulian Anastasov 	struct ip_vs_estimator *est;
600705dd344SJulian Anastasov 
601705dd344SJulian Anastasov 	while (1) {
602705dd344SJulian Anastasov 		int max = 16;
603705dd344SJulian Anastasov 
604705dd344SJulian Anastasov 		mutex_lock(&__ip_vs_mutex);
605705dd344SJulian Anastasov 
606705dd344SJulian Anastasov 		while (max-- > 0) {
607705dd344SJulian Anastasov 			est = hlist_entry_safe(ipvs->est_temp_list.first,
608705dd344SJulian Anastasov 					       struct ip_vs_estimator, list);
609705dd344SJulian Anastasov 			if (est) {
610705dd344SJulian Anastasov 				if (kthread_should_stop())
611705dd344SJulian Anastasov 					goto unlock;
612705dd344SJulian Anastasov 				hlist_del_init(&est->list);
613705dd344SJulian Anastasov 				if (ip_vs_enqueue_estimator(ipvs, est) >= 0)
614705dd344SJulian Anastasov 					continue;
615705dd344SJulian Anastasov 				est->ktid = -1;
616705dd344SJulian Anastasov 				hlist_add_head(&est->list,
617705dd344SJulian Anastasov 					       &ipvs->est_temp_list);
618705dd344SJulian Anastasov 				/* Abort, some entries will not be estimated
619705dd344SJulian Anastasov 				 * until next attempt
620705dd344SJulian Anastasov 				 */
621705dd344SJulian Anastasov 			}
622705dd344SJulian Anastasov 			goto unlock;
623705dd344SJulian Anastasov 		}
624705dd344SJulian Anastasov 		mutex_unlock(&__ip_vs_mutex);
625705dd344SJulian Anastasov 		cond_resched();
626705dd344SJulian Anastasov 	}
627705dd344SJulian Anastasov 
628705dd344SJulian Anastasov unlock:
629705dd344SJulian Anastasov 	mutex_unlock(&__ip_vs_mutex);
630705dd344SJulian Anastasov }
631705dd344SJulian Anastasov 
632705dd344SJulian Anastasov /* Calculate limits for all kthreads */
ip_vs_est_calc_limits(struct netns_ipvs * ipvs,int * chain_max)633705dd344SJulian Anastasov static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
634705dd344SJulian Anastasov {
635705dd344SJulian Anastasov 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
636705dd344SJulian Anastasov 	struct ip_vs_est_kt_data *kd;
637705dd344SJulian Anastasov 	struct hlist_head chain;
638705dd344SJulian Anastasov 	struct ip_vs_stats *s;
639705dd344SJulian Anastasov 	int cache_factor = 4;
640705dd344SJulian Anastasov 	int i, loops, ntest;
641705dd344SJulian Anastasov 	s32 min_est = 0;
642705dd344SJulian Anastasov 	ktime_t t1, t2;
643705dd344SJulian Anastasov 	int max = 8;
644705dd344SJulian Anastasov 	int ret = 1;
6457c4a6309SJakub Kicinski 	s64 diff;
6467c4a6309SJakub Kicinski 	u64 val;
647705dd344SJulian Anastasov 
648705dd344SJulian Anastasov 	INIT_HLIST_HEAD(&chain);
649705dd344SJulian Anastasov 	mutex_lock(&__ip_vs_mutex);
650705dd344SJulian Anastasov 	kd = ipvs->est_kt_arr[0];
651705dd344SJulian Anastasov 	mutex_unlock(&__ip_vs_mutex);
652705dd344SJulian Anastasov 	s = kd ? kd->calc_stats : NULL;
653705dd344SJulian Anastasov 	if (!s)
654705dd344SJulian Anastasov 		goto out;
655705dd344SJulian Anastasov 	hlist_add_head(&s->est.list, &chain);
656705dd344SJulian Anastasov 
657705dd344SJulian Anastasov 	loops = 1;
658705dd344SJulian Anastasov 	/* Get best result from many tests */
659705dd344SJulian Anastasov 	for (ntest = 0; ntest < 12; ntest++) {
660705dd344SJulian Anastasov 		if (!(ntest & 3)) {
661705dd344SJulian Anastasov 			/* Wait for cpufreq frequency transition */
662705dd344SJulian Anastasov 			wait_event_idle_timeout(wq, kthread_should_stop(),
663705dd344SJulian Anastasov 						HZ / 50);
664705dd344SJulian Anastasov 			if (!ipvs->enable || kthread_should_stop())
665705dd344SJulian Anastasov 				goto stop;
666705dd344SJulian Anastasov 		}
667705dd344SJulian Anastasov 
668705dd344SJulian Anastasov 		local_bh_disable();
669705dd344SJulian Anastasov 		rcu_read_lock();
670705dd344SJulian Anastasov 
671705dd344SJulian Anastasov 		/* Put stats in cache */
672705dd344SJulian Anastasov 		ip_vs_chain_estimation(&chain);
673705dd344SJulian Anastasov 
674705dd344SJulian Anastasov 		t1 = ktime_get();
675705dd344SJulian Anastasov 		for (i = loops * cache_factor; i > 0; i--)
676705dd344SJulian Anastasov 			ip_vs_chain_estimation(&chain);
677705dd344SJulian Anastasov 		t2 = ktime_get();
678705dd344SJulian Anastasov 
679705dd344SJulian Anastasov 		rcu_read_unlock();
680705dd344SJulian Anastasov 		local_bh_enable();
681705dd344SJulian Anastasov 
682705dd344SJulian Anastasov 		if (!ipvs->enable || kthread_should_stop())
683705dd344SJulian Anastasov 			goto stop;
684705dd344SJulian Anastasov 		cond_resched();
685705dd344SJulian Anastasov 
686705dd344SJulian Anastasov 		diff = ktime_to_ns(ktime_sub(t2, t1));
687705dd344SJulian Anastasov 		if (diff <= 1 * NSEC_PER_USEC) {
688705dd344SJulian Anastasov 			/* Do more loops on low time resolution */
689705dd344SJulian Anastasov 			loops *= 2;
690705dd344SJulian Anastasov 			continue;
691705dd344SJulian Anastasov 		}
692705dd344SJulian Anastasov 		if (diff >= NSEC_PER_SEC)
693705dd344SJulian Anastasov 			continue;
694705dd344SJulian Anastasov 		val = diff;
695705dd344SJulian Anastasov 		do_div(val, loops);
696705dd344SJulian Anastasov 		if (!min_est || val < min_est) {
697705dd344SJulian Anastasov 			min_est = val;
698705dd344SJulian Anastasov 			/* goal: 95usec per chain */
699705dd344SJulian Anastasov 			val = 95 * NSEC_PER_USEC;
700705dd344SJulian Anastasov 			if (val >= min_est) {
701705dd344SJulian Anastasov 				do_div(val, min_est);
702705dd344SJulian Anastasov 				max = (int)val;
703705dd344SJulian Anastasov 			} else {
704705dd344SJulian Anastasov 				max = 1;
705705dd344SJulian Anastasov 			}
706705dd344SJulian Anastasov 		}
707705dd344SJulian Anastasov 	}
708705dd344SJulian Anastasov 
709705dd344SJulian Anastasov out:
710705dd344SJulian Anastasov 	if (s)
711705dd344SJulian Anastasov 		hlist_del_init(&s->est.list);
712705dd344SJulian Anastasov 	*chain_max = max;
713705dd344SJulian Anastasov 	return ret;
714705dd344SJulian Anastasov 
715705dd344SJulian Anastasov stop:
716705dd344SJulian Anastasov 	ret = 0;
717705dd344SJulian Anastasov 	goto out;
718705dd344SJulian Anastasov }
719705dd344SJulian Anastasov 
720705dd344SJulian Anastasov /* Calculate the parameters and apply them in context of kt #0
721705dd344SJulian Anastasov  * ECP: est_calc_phase
722705dd344SJulian Anastasov  * ECM: est_chain_max
723705dd344SJulian Anastasov  * ECP	ECM	Insert Chain	enable	Description
724705dd344SJulian Anastasov  * ---------------------------------------------------------------------------
725705dd344SJulian Anastasov  * 0	0	est_temp_list	0	create kt #0 context
726705dd344SJulian Anastasov  * 0	0	est_temp_list	0->1	service added, start kthread #0 task
727705dd344SJulian Anastasov  * 0->1	0	est_temp_list	1	kt task #0 started, enters calc phase
728705dd344SJulian Anastasov  * 1	0	est_temp_list	1	kt #0: determine est_chain_max,
729705dd344SJulian Anastasov  *					stop tasks, move ests to est_temp_list
730705dd344SJulian Anastasov  *					and free kd for kthreads 1..last
731705dd344SJulian Anastasov  * 1->0	0->N	kt chains	1	ests can go to kthreads
732705dd344SJulian Anastasov  * 0	N	kt chains	1	drain est_temp_list, create new kthread
733705dd344SJulian Anastasov  *					contexts, start tasks, estimate
734705dd344SJulian Anastasov  */
ip_vs_est_calc_phase(struct netns_ipvs * ipvs)735705dd344SJulian Anastasov static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
736705dd344SJulian Anastasov {
737705dd344SJulian Anastasov 	int genid = atomic_read(&ipvs->est_genid);
738705dd344SJulian Anastasov 	struct ip_vs_est_tick_data *td;
739705dd344SJulian Anastasov 	struct ip_vs_est_kt_data *kd;
740705dd344SJulian Anastasov 	struct ip_vs_estimator *est;
741705dd344SJulian Anastasov 	struct ip_vs_stats *stats;
742705dd344SJulian Anastasov 	int id, row, cid, delay;
743705dd344SJulian Anastasov 	bool last, last_td;
744705dd344SJulian Anastasov 	int chain_max;
745705dd344SJulian Anastasov 	int step;
746705dd344SJulian Anastasov 
747705dd344SJulian Anastasov 	if (!ip_vs_est_calc_limits(ipvs, &chain_max))
748705dd344SJulian Anastasov 		return;
749705dd344SJulian Anastasov 
750705dd344SJulian Anastasov 	mutex_lock(&__ip_vs_mutex);
751705dd344SJulian Anastasov 
752705dd344SJulian Anastasov 	/* Stop all other tasks, so that we can immediately move the
753705dd344SJulian Anastasov 	 * estimators to est_temp_list without RCU grace period
754705dd344SJulian Anastasov 	 */
755705dd344SJulian Anastasov 	mutex_lock(&ipvs->est_mutex);
756705dd344SJulian Anastasov 	for (id = 1; id < ipvs->est_kt_count; id++) {
757705dd344SJulian Anastasov 		/* netns clean up started, abort */
758705dd344SJulian Anastasov 		if (!ipvs->enable)
759705dd344SJulian Anastasov 			goto unlock2;
760705dd344SJulian Anastasov 		kd = ipvs->est_kt_arr[id];
761705dd344SJulian Anastasov 		if (!kd)
762705dd344SJulian Anastasov 			continue;
763705dd344SJulian Anastasov 		ip_vs_est_kthread_stop(kd);
764705dd344SJulian Anastasov 	}
765705dd344SJulian Anastasov 	mutex_unlock(&ipvs->est_mutex);
766705dd344SJulian Anastasov 
767705dd344SJulian Anastasov 	/* Move all estimators to est_temp_list but carefully,
768705dd344SJulian Anastasov 	 * all estimators and kthread data can be released while
769705dd344SJulian Anastasov 	 * we reschedule. Even for kthread 0.
770705dd344SJulian Anastasov 	 */
771705dd344SJulian Anastasov 	step = 0;
772705dd344SJulian Anastasov 
773705dd344SJulian Anastasov 	/* Order entries in est_temp_list in ascending delay, so now
774705dd344SJulian Anastasov 	 * walk delay(desc), id(desc), cid(asc)
775705dd344SJulian Anastasov 	 */
776705dd344SJulian Anastasov 	delay = IPVS_EST_NTICKS;
777705dd344SJulian Anastasov 
778705dd344SJulian Anastasov next_delay:
779705dd344SJulian Anastasov 	delay--;
780705dd344SJulian Anastasov 	if (delay < 0)
781705dd344SJulian Anastasov 		goto end_dequeue;
782705dd344SJulian Anastasov 
783705dd344SJulian Anastasov last_kt:
784705dd344SJulian Anastasov 	/* Destroy contexts backwards */
785705dd344SJulian Anastasov 	id = ipvs->est_kt_count;
786705dd344SJulian Anastasov 
787705dd344SJulian Anastasov next_kt:
788705dd344SJulian Anastasov 	if (!ipvs->enable || kthread_should_stop())
789705dd344SJulian Anastasov 		goto unlock;
790705dd344SJulian Anastasov 	id--;
791705dd344SJulian Anastasov 	if (id < 0)
792705dd344SJulian Anastasov 		goto next_delay;
793705dd344SJulian Anastasov 	kd = ipvs->est_kt_arr[id];
794705dd344SJulian Anastasov 	if (!kd)
795705dd344SJulian Anastasov 		goto next_kt;
796705dd344SJulian Anastasov 	/* kt 0 can exist with empty chains */
797705dd344SJulian Anastasov 	if (!id && kd->est_count <= 1)
798705dd344SJulian Anastasov 		goto next_delay;
799705dd344SJulian Anastasov 
800705dd344SJulian Anastasov 	row = kd->est_row + delay;
801705dd344SJulian Anastasov 	if (row >= IPVS_EST_NTICKS)
802705dd344SJulian Anastasov 		row -= IPVS_EST_NTICKS;
803705dd344SJulian Anastasov 	td = rcu_dereference_protected(kd->ticks[row], 1);
804705dd344SJulian Anastasov 	if (!td)
805705dd344SJulian Anastasov 		goto next_kt;
806705dd344SJulian Anastasov 
807705dd344SJulian Anastasov 	cid = 0;
808705dd344SJulian Anastasov 
809705dd344SJulian Anastasov walk_chain:
810705dd344SJulian Anastasov 	if (kthread_should_stop())
811705dd344SJulian Anastasov 		goto unlock;
812705dd344SJulian Anastasov 	step++;
813705dd344SJulian Anastasov 	if (!(step & 63)) {
814705dd344SJulian Anastasov 		/* Give chance estimators to be added (to est_temp_list)
815705dd344SJulian Anastasov 		 * and deleted (releasing kthread contexts)
816705dd344SJulian Anastasov 		 */
817705dd344SJulian Anastasov 		mutex_unlock(&__ip_vs_mutex);
818705dd344SJulian Anastasov 		cond_resched();
819705dd344SJulian Anastasov 		mutex_lock(&__ip_vs_mutex);
820705dd344SJulian Anastasov 
821705dd344SJulian Anastasov 		/* Current kt released ? */
822705dd344SJulian Anastasov 		if (id >= ipvs->est_kt_count)
823705dd344SJulian Anastasov 			goto last_kt;
824705dd344SJulian Anastasov 		if (kd != ipvs->est_kt_arr[id])
825705dd344SJulian Anastasov 			goto next_kt;
826705dd344SJulian Anastasov 		/* Current td released ? */
827705dd344SJulian Anastasov 		if (td != rcu_dereference_protected(kd->ticks[row], 1))
828705dd344SJulian Anastasov 			goto next_kt;
829705dd344SJulian Anastasov 		/* No fatal changes on the current kd and td */
830705dd344SJulian Anastasov 	}
831705dd344SJulian Anastasov 	est = hlist_entry_safe(td->chains[cid].first, struct ip_vs_estimator,
832705dd344SJulian Anastasov 			       list);
833705dd344SJulian Anastasov 	if (!est) {
834705dd344SJulian Anastasov 		cid++;
835705dd344SJulian Anastasov 		if (cid >= IPVS_EST_TICK_CHAINS)
836705dd344SJulian Anastasov 			goto next_kt;
837705dd344SJulian Anastasov 		goto walk_chain;
838705dd344SJulian Anastasov 	}
839705dd344SJulian Anastasov 	/* We can cheat and increase est_count to protect kt 0 context
840705dd344SJulian Anastasov 	 * from release but we prefer to keep the last estimator
841705dd344SJulian Anastasov 	 */
842705dd344SJulian Anastasov 	last = kd->est_count <= 1;
843705dd344SJulian Anastasov 	/* Do not free kt #0 data */
844705dd344SJulian Anastasov 	if (!id && last)
845705dd344SJulian Anastasov 		goto next_delay;
846705dd344SJulian Anastasov 	last_td = kd->tick_len[row] <= 1;
847705dd344SJulian Anastasov 	stats = container_of(est, struct ip_vs_stats, est);
848705dd344SJulian Anastasov 	ip_vs_stop_estimator(ipvs, stats);
849705dd344SJulian Anastasov 	/* Tasks are stopped, move without RCU grace period */
850705dd344SJulian Anastasov 	est->ktid = -1;
851705dd344SJulian Anastasov 	est->ktrow = row - kd->est_row;
852705dd344SJulian Anastasov 	if (est->ktrow < 0)
853705dd344SJulian Anastasov 		est->ktrow += IPVS_EST_NTICKS;
854705dd344SJulian Anastasov 	hlist_add_head(&est->list, &ipvs->est_temp_list);
855705dd344SJulian Anastasov 	/* kd freed ? */
856705dd344SJulian Anastasov 	if (last)
857705dd344SJulian Anastasov 		goto next_kt;
858705dd344SJulian Anastasov 	/* td freed ? */
859705dd344SJulian Anastasov 	if (last_td)
860705dd344SJulian Anastasov 		goto next_kt;
861705dd344SJulian Anastasov 	goto walk_chain;
862705dd344SJulian Anastasov 
863705dd344SJulian Anastasov end_dequeue:
864705dd344SJulian Anastasov 	/* All estimators removed while calculating ? */
865705dd344SJulian Anastasov 	if (!ipvs->est_kt_count)
866705dd344SJulian Anastasov 		goto unlock;
867705dd344SJulian Anastasov 	kd = ipvs->est_kt_arr[0];
868705dd344SJulian Anastasov 	if (!kd)
869705dd344SJulian Anastasov 		goto unlock;
870705dd344SJulian Anastasov 	kd->add_row = kd->est_row;
871705dd344SJulian Anastasov 	ipvs->est_chain_max = chain_max;
872705dd344SJulian Anastasov 	ip_vs_est_set_params(ipvs, kd);
873705dd344SJulian Anastasov 
874705dd344SJulian Anastasov 	pr_info("using max %d ests per chain, %d per kthread\n",
875705dd344SJulian Anastasov 		kd->chain_max, kd->est_max_count);
876705dd344SJulian Anastasov 
877705dd344SJulian Anastasov 	/* Try to keep tot_stats in kt0, enqueue it early */
878705dd344SJulian Anastasov 	if (ipvs->tot_stats && !hlist_unhashed(&ipvs->tot_stats->s.est.list) &&
879705dd344SJulian Anastasov 	    ipvs->tot_stats->s.est.ktid == -1) {
880705dd344SJulian Anastasov 		hlist_del(&ipvs->tot_stats->s.est.list);
881705dd344SJulian Anastasov 		hlist_add_head(&ipvs->tot_stats->s.est.list,
882705dd344SJulian Anastasov 			       &ipvs->est_temp_list);
883705dd344SJulian Anastasov 	}
884705dd344SJulian Anastasov 
885705dd344SJulian Anastasov 	mutex_lock(&ipvs->est_mutex);
886705dd344SJulian Anastasov 
887705dd344SJulian Anastasov 	/* We completed the calc phase, new calc phase not requested */
888705dd344SJulian Anastasov 	if (genid == atomic_read(&ipvs->est_genid))
889705dd344SJulian Anastasov 		ipvs->est_calc_phase = 0;
890705dd344SJulian Anastasov 
891705dd344SJulian Anastasov unlock2:
892705dd344SJulian Anastasov 	mutex_unlock(&ipvs->est_mutex);
893705dd344SJulian Anastasov 
894705dd344SJulian Anastasov unlock:
895705dd344SJulian Anastasov 	mutex_unlock(&__ip_vs_mutex);
896cb7f6a7bSJulius Volz }
897cb7f6a7bSJulius Volz 
ip_vs_zero_estimator(struct ip_vs_stats * stats)898cb7f6a7bSJulius Volz void ip_vs_zero_estimator(struct ip_vs_stats *stats)
899cb7f6a7bSJulius Volz {
900cb7f6a7bSJulius Volz 	struct ip_vs_estimator *est = &stats->est;
901cd67cd5eSJulian Anastasov 	struct ip_vs_kstats *k = &stats->kstats;
902cb7f6a7bSJulius Volz 
90355a3d4e1SJulian Anastasov 	/* reset counters, caller must hold the stats->lock lock */
904cd67cd5eSJulian Anastasov 	est->last_inbytes = k->inbytes;
905cd67cd5eSJulian Anastasov 	est->last_outbytes = k->outbytes;
906cd67cd5eSJulian Anastasov 	est->last_conns = k->conns;
907cd67cd5eSJulian Anastasov 	est->last_inpkts = k->inpkts;
908cd67cd5eSJulian Anastasov 	est->last_outpkts = k->outpkts;
909cb7f6a7bSJulius Volz 	est->cps = 0;
910cb7f6a7bSJulius Volz 	est->inpps = 0;
911cb7f6a7bSJulius Volz 	est->outpps = 0;
912cb7f6a7bSJulius Volz 	est->inbps = 0;
913cb7f6a7bSJulius Volz 	est->outbps = 0;
914cb7f6a7bSJulius Volz }
915cb7f6a7bSJulius Volz 
916ea9f22ccSJulian Anastasov /* Get decoded rates */
ip_vs_read_estimator(struct ip_vs_kstats * dst,struct ip_vs_stats * stats)917cd67cd5eSJulian Anastasov void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
918ea9f22ccSJulian Anastasov {
919ea9f22ccSJulian Anastasov 	struct ip_vs_estimator *e = &stats->est;
920ea9f22ccSJulian Anastasov 
921ea9f22ccSJulian Anastasov 	dst->cps = (e->cps + 0x1FF) >> 10;
922ea9f22ccSJulian Anastasov 	dst->inpps = (e->inpps + 0x1FF) >> 10;
923ea9f22ccSJulian Anastasov 	dst->outpps = (e->outpps + 0x1FF) >> 10;
924ea9f22ccSJulian Anastasov 	dst->inbps = (e->inbps + 0xF) >> 5;
925ea9f22ccSJulian Anastasov 	dst->outbps = (e->outbps + 0xF) >> 5;
926ea9f22ccSJulian Anastasov }
927ea9f22ccSJulian Anastasov 
ip_vs_estimator_net_init(struct netns_ipvs * ipvs)928a4dd0360SEric W. Biederman int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs)
92961b1ab45SHans Schillstrom {
930705dd344SJulian Anastasov 	INIT_HLIST_HEAD(&ipvs->est_temp_list);
931705dd344SJulian Anastasov 	ipvs->est_kt_arr = NULL;
932705dd344SJulian Anastasov 	ipvs->est_max_threads = 0;
933705dd344SJulian Anastasov 	ipvs->est_calc_phase = 0;
934705dd344SJulian Anastasov 	ipvs->est_chain_max = 0;
935705dd344SJulian Anastasov 	ipvs->est_kt_count = 0;
936705dd344SJulian Anastasov 	ipvs->est_add_ktid = 0;
937705dd344SJulian Anastasov 	atomic_set(&ipvs->est_genid, 0);
938705dd344SJulian Anastasov 	atomic_set(&ipvs->est_genid_done, 0);
939705dd344SJulian Anastasov 	__mutex_init(&ipvs->est_mutex, "ipvs->est_mutex", &__ipvs_est_key);
94061b1ab45SHans Schillstrom 	return 0;
94161b1ab45SHans Schillstrom }
94261b1ab45SHans Schillstrom 
ip_vs_estimator_net_cleanup(struct netns_ipvs * ipvs)943a4dd0360SEric W. Biederman void __net_exit ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs)
94429c2026fSHans Schillstrom {
945705dd344SJulian Anastasov 	int i;
946705dd344SJulian Anastasov 
947705dd344SJulian Anastasov 	for (i = 0; i < ipvs->est_kt_count; i++)
948705dd344SJulian Anastasov 		ip_vs_est_kthread_destroy(ipvs->est_kt_arr[i]);
949705dd344SJulian Anastasov 	kfree(ipvs->est_kt_arr);
950705dd344SJulian Anastasov 	mutex_destroy(&ipvs->est_mutex);
95129c2026fSHans Schillstrom }
952