xref: /openbmc/linux/net/netfilter/ipvs/ip_vs_est.c (revision 2232642e)
12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
2cb7f6a7bSJulius Volz /*
3cb7f6a7bSJulius Volz  * ip_vs_est.c: simple rate estimator for IPVS
4cb7f6a7bSJulius Volz  *
5cb7f6a7bSJulius Volz  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
6cb7f6a7bSJulius Volz  *
729c2026fSHans Schillstrom  * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
829c2026fSHans Schillstrom  *              Network name space (netns) aware.
929c2026fSHans Schillstrom  *              Global data moved to netns i.e struct netns_ipvs
1029c2026fSHans Schillstrom  *              Affected data: est_list and est_lock.
1129c2026fSHans Schillstrom  *              estimation_timer() runs with timer per netns.
1229c2026fSHans Schillstrom  *              get_stats()) do the per cpu summing.
13cb7f6a7bSJulius Volz  */
149aada7acSHannes Eder 
159aada7acSHannes Eder #define KMSG_COMPONENT "IPVS"
169aada7acSHannes Eder #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
179aada7acSHannes Eder 
18cb7f6a7bSJulius Volz #include <linux/kernel.h>
19cb7f6a7bSJulius Volz #include <linux/jiffies.h>
20cb7f6a7bSJulius Volz #include <linux/types.h>
21cb7f6a7bSJulius Volz #include <linux/interrupt.h>
22cb7f6a7bSJulius Volz #include <linux/sysctl.h>
23cb7f6a7bSJulius Volz #include <linux/list.h>
24cb7f6a7bSJulius Volz 
25cb7f6a7bSJulius Volz #include <net/ip_vs.h>
26cb7f6a7bSJulius Volz 
27cb7f6a7bSJulius Volz /*
28cb7f6a7bSJulius Volz   This code is to estimate rate in a shorter interval (such as 8
29cb7f6a7bSJulius Volz   seconds) for virtual services and real servers. For measure rate in a
30cb7f6a7bSJulius Volz   long interval, it is easy to implement a user level daemon which
31cb7f6a7bSJulius Volz   periodically reads those statistical counters and measure rate.
32cb7f6a7bSJulius Volz 
33cb7f6a7bSJulius Volz   Currently, the measurement is activated by slow timer handler. Hope
34cb7f6a7bSJulius Volz   this measurement will not introduce too much load.
35cb7f6a7bSJulius Volz 
36cb7f6a7bSJulius Volz   We measure rate during the last 8 seconds every 2 seconds:
37cb7f6a7bSJulius Volz 
38cb7f6a7bSJulius Volz     avgrate = avgrate*(1-W) + rate*W
39cb7f6a7bSJulius Volz 
40cb7f6a7bSJulius Volz     where W = 2^(-2)
41cb7f6a7bSJulius Volz 
42cb7f6a7bSJulius Volz   NOTES.
43cb7f6a7bSJulius Volz 
44cd67cd5eSJulian Anastasov   * Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10.
45cb7f6a7bSJulius Volz 
46cd67cd5eSJulian Anastasov   * Netlink users can see 64-bit values but sockopt users are restricted
47cd67cd5eSJulian Anastasov     to 32-bit values for conns, packets, bps, cps and pps.
48cd67cd5eSJulian Anastasov 
49cd67cd5eSJulian Anastasov   * A lot of code is taken from net/core/gen_estimator.c
50cb7f6a7bSJulius Volz  */
51cb7f6a7bSJulius Volz 
52cb7f6a7bSJulius Volz 
53b17fc996SHans Schillstrom /*
54b17fc996SHans Schillstrom  * Make a summary from each cpu
55b17fc996SHans Schillstrom  */
56cd67cd5eSJulian Anastasov static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum,
57b962abdcSJulian Anastasov 				 struct ip_vs_cpu_stats __percpu *stats)
58b17fc996SHans Schillstrom {
59b17fc996SHans Schillstrom 	int i;
60d1ee4feaSJulian Anastasov 	bool add = false;
61b17fc996SHans Schillstrom 
62b17fc996SHans Schillstrom 	for_each_possible_cpu(i) {
63b17fc996SHans Schillstrom 		struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
64b17fc996SHans Schillstrom 		unsigned int start;
65cd67cd5eSJulian Anastasov 		u64 conns, inpkts, outpkts, inbytes, outbytes;
66cd67cd5eSJulian Anastasov 
67d1ee4feaSJulian Anastasov 		if (add) {
68b17fc996SHans Schillstrom 			do {
694a569c0cSJulian Anastasov 				start = u64_stats_fetch_begin(&s->syncp);
70cd67cd5eSJulian Anastasov 				conns = s->cnt.conns;
71cd67cd5eSJulian Anastasov 				inpkts = s->cnt.inpkts;
72cd67cd5eSJulian Anastasov 				outpkts = s->cnt.outpkts;
73cd67cd5eSJulian Anastasov 				inbytes = s->cnt.inbytes;
74cd67cd5eSJulian Anastasov 				outbytes = s->cnt.outbytes;
754a569c0cSJulian Anastasov 			} while (u64_stats_fetch_retry(&s->syncp, start));
76cd67cd5eSJulian Anastasov 			sum->conns += conns;
77cd67cd5eSJulian Anastasov 			sum->inpkts += inpkts;
78cd67cd5eSJulian Anastasov 			sum->outpkts += outpkts;
79b17fc996SHans Schillstrom 			sum->inbytes += inbytes;
80b17fc996SHans Schillstrom 			sum->outbytes += outbytes;
81b17fc996SHans Schillstrom 		} else {
82d1ee4feaSJulian Anastasov 			add = true;
83b17fc996SHans Schillstrom 			do {
844a569c0cSJulian Anastasov 				start = u64_stats_fetch_begin(&s->syncp);
85cd67cd5eSJulian Anastasov 				sum->conns = s->cnt.conns;
86cd67cd5eSJulian Anastasov 				sum->inpkts = s->cnt.inpkts;
87cd67cd5eSJulian Anastasov 				sum->outpkts = s->cnt.outpkts;
88cd67cd5eSJulian Anastasov 				sum->inbytes = s->cnt.inbytes;
89cd67cd5eSJulian Anastasov 				sum->outbytes = s->cnt.outbytes;
904a569c0cSJulian Anastasov 			} while (u64_stats_fetch_retry(&s->syncp, start));
91b17fc996SHans Schillstrom 		}
92b17fc996SHans Schillstrom 	}
93b17fc996SHans Schillstrom }
94b17fc996SHans Schillstrom 
95b17fc996SHans Schillstrom 
968ef81c65SKees Cook static void estimation_timer(struct timer_list *t)
97cb7f6a7bSJulius Volz {
98cb7f6a7bSJulius Volz 	struct ip_vs_estimator *e;
99cb7f6a7bSJulius Volz 	struct ip_vs_stats *s;
100cd67cd5eSJulian Anastasov 	u64 rate;
1018ef81c65SKees Cook 	struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer);
102cb7f6a7bSJulius Volz 
103*2232642eSDust Li 	if (!sysctl_run_estimation(ipvs))
104*2232642eSDust Li 		goto skip;
105*2232642eSDust Li 
10629c2026fSHans Schillstrom 	spin_lock(&ipvs->est_lock);
10729c2026fSHans Schillstrom 	list_for_each_entry(e, &ipvs->est_list, list) {
108cb7f6a7bSJulius Volz 		s = container_of(e, struct ip_vs_stats, est);
109cb7f6a7bSJulius Volz 
110cb7f6a7bSJulius Volz 		spin_lock(&s->lock);
111cd67cd5eSJulian Anastasov 		ip_vs_read_cpu_stats(&s->kstats, s->cpustats);
112cb7f6a7bSJulius Volz 
113cb7f6a7bSJulius Volz 		/* scaled by 2^10, but divided 2 seconds */
114cd67cd5eSJulian Anastasov 		rate = (s->kstats.conns - e->last_conns) << 9;
115cd67cd5eSJulian Anastasov 		e->last_conns = s->kstats.conns;
116cd67cd5eSJulian Anastasov 		e->cps += ((s64)rate - (s64)e->cps) >> 2;
117cb7f6a7bSJulius Volz 
118cd67cd5eSJulian Anastasov 		rate = (s->kstats.inpkts - e->last_inpkts) << 9;
119cd67cd5eSJulian Anastasov 		e->last_inpkts = s->kstats.inpkts;
120cd67cd5eSJulian Anastasov 		e->inpps += ((s64)rate - (s64)e->inpps) >> 2;
121cb7f6a7bSJulius Volz 
122cd67cd5eSJulian Anastasov 		rate = (s->kstats.outpkts - e->last_outpkts) << 9;
123cd67cd5eSJulian Anastasov 		e->last_outpkts = s->kstats.outpkts;
124cd67cd5eSJulian Anastasov 		e->outpps += ((s64)rate - (s64)e->outpps) >> 2;
125cb7f6a7bSJulius Volz 
126cd67cd5eSJulian Anastasov 		/* scaled by 2^5, but divided 2 seconds */
127cd67cd5eSJulian Anastasov 		rate = (s->kstats.inbytes - e->last_inbytes) << 4;
128cd67cd5eSJulian Anastasov 		e->last_inbytes = s->kstats.inbytes;
129cd67cd5eSJulian Anastasov 		e->inbps += ((s64)rate - (s64)e->inbps) >> 2;
130cb7f6a7bSJulius Volz 
131cd67cd5eSJulian Anastasov 		rate = (s->kstats.outbytes - e->last_outbytes) << 4;
132cd67cd5eSJulian Anastasov 		e->last_outbytes = s->kstats.outbytes;
133cd67cd5eSJulian Anastasov 		e->outbps += ((s64)rate - (s64)e->outbps) >> 2;
134cb7f6a7bSJulius Volz 		spin_unlock(&s->lock);
135cb7f6a7bSJulius Volz 	}
13629c2026fSHans Schillstrom 	spin_unlock(&ipvs->est_lock);
137*2232642eSDust Li 
138*2232642eSDust Li skip:
13929c2026fSHans Schillstrom 	mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
140cb7f6a7bSJulius Volz }
141cb7f6a7bSJulius Volz 
1420f34d54bSEric W. Biederman void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
143cb7f6a7bSJulius Volz {
144cb7f6a7bSJulius Volz 	struct ip_vs_estimator *est = &stats->est;
145cb7f6a7bSJulius Volz 
146cb7f6a7bSJulius Volz 	INIT_LIST_HEAD(&est->list);
147cb7f6a7bSJulius Volz 
14829c2026fSHans Schillstrom 	spin_lock_bh(&ipvs->est_lock);
14929c2026fSHans Schillstrom 	list_add(&est->list, &ipvs->est_list);
15029c2026fSHans Schillstrom 	spin_unlock_bh(&ipvs->est_lock);
151cb7f6a7bSJulius Volz }
152cb7f6a7bSJulius Volz 
1530f34d54bSEric W. Biederman void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
154cb7f6a7bSJulius Volz {
155cb7f6a7bSJulius Volz 	struct ip_vs_estimator *est = &stats->est;
156cb7f6a7bSJulius Volz 
15729c2026fSHans Schillstrom 	spin_lock_bh(&ipvs->est_lock);
158cb7f6a7bSJulius Volz 	list_del(&est->list);
15929c2026fSHans Schillstrom 	spin_unlock_bh(&ipvs->est_lock);
160cb7f6a7bSJulius Volz }
161cb7f6a7bSJulius Volz 
162cb7f6a7bSJulius Volz void ip_vs_zero_estimator(struct ip_vs_stats *stats)
163cb7f6a7bSJulius Volz {
164cb7f6a7bSJulius Volz 	struct ip_vs_estimator *est = &stats->est;
165cd67cd5eSJulian Anastasov 	struct ip_vs_kstats *k = &stats->kstats;
166cb7f6a7bSJulius Volz 
16755a3d4e1SJulian Anastasov 	/* reset counters, caller must hold the stats->lock lock */
168cd67cd5eSJulian Anastasov 	est->last_inbytes = k->inbytes;
169cd67cd5eSJulian Anastasov 	est->last_outbytes = k->outbytes;
170cd67cd5eSJulian Anastasov 	est->last_conns = k->conns;
171cd67cd5eSJulian Anastasov 	est->last_inpkts = k->inpkts;
172cd67cd5eSJulian Anastasov 	est->last_outpkts = k->outpkts;
173cb7f6a7bSJulius Volz 	est->cps = 0;
174cb7f6a7bSJulius Volz 	est->inpps = 0;
175cb7f6a7bSJulius Volz 	est->outpps = 0;
176cb7f6a7bSJulius Volz 	est->inbps = 0;
177cb7f6a7bSJulius Volz 	est->outbps = 0;
178cb7f6a7bSJulius Volz }
179cb7f6a7bSJulius Volz 
180ea9f22ccSJulian Anastasov /* Get decoded rates */
181cd67cd5eSJulian Anastasov void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
182ea9f22ccSJulian Anastasov {
183ea9f22ccSJulian Anastasov 	struct ip_vs_estimator *e = &stats->est;
184ea9f22ccSJulian Anastasov 
185ea9f22ccSJulian Anastasov 	dst->cps = (e->cps + 0x1FF) >> 10;
186ea9f22ccSJulian Anastasov 	dst->inpps = (e->inpps + 0x1FF) >> 10;
187ea9f22ccSJulian Anastasov 	dst->outpps = (e->outpps + 0x1FF) >> 10;
188ea9f22ccSJulian Anastasov 	dst->inbps = (e->inbps + 0xF) >> 5;
189ea9f22ccSJulian Anastasov 	dst->outbps = (e->outbps + 0xF) >> 5;
190ea9f22ccSJulian Anastasov }
191ea9f22ccSJulian Anastasov 
192a4dd0360SEric W. Biederman int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs)
19361b1ab45SHans Schillstrom {
19429c2026fSHans Schillstrom 	INIT_LIST_HEAD(&ipvs->est_list);
19529c2026fSHans Schillstrom 	spin_lock_init(&ipvs->est_lock);
1968ef81c65SKees Cook 	timer_setup(&ipvs->est_timer, estimation_timer, 0);
19729c2026fSHans Schillstrom 	mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
19861b1ab45SHans Schillstrom 	return 0;
19961b1ab45SHans Schillstrom }
20061b1ab45SHans Schillstrom 
201a4dd0360SEric W. Biederman void __net_exit ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs)
20229c2026fSHans Schillstrom {
203a4dd0360SEric W. Biederman 	del_timer_sync(&ipvs->est_timer);
20429c2026fSHans Schillstrom }
205