xref: /openbmc/linux/net/netfilter/ipvs/ip_vs_est.c (revision 0f34d54b)
1cb7f6a7bSJulius Volz /*
2cb7f6a7bSJulius Volz  * ip_vs_est.c: simple rate estimator for IPVS
3cb7f6a7bSJulius Volz  *
4cb7f6a7bSJulius Volz  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5cb7f6a7bSJulius Volz  *
6cb7f6a7bSJulius Volz  *              This program is free software; you can redistribute it and/or
7cb7f6a7bSJulius Volz  *              modify it under the terms of the GNU General Public License
8cb7f6a7bSJulius Volz  *              as published by the Free Software Foundation; either version
9cb7f6a7bSJulius Volz  *              2 of the License, or (at your option) any later version.
10cb7f6a7bSJulius Volz  *
1129c2026fSHans Schillstrom  * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
1229c2026fSHans Schillstrom  *              Network name space (netns) aware.
1329c2026fSHans Schillstrom  *              Global data moved to netns i.e struct netns_ipvs
1429c2026fSHans Schillstrom  *              Affected data: est_list and est_lock.
1529c2026fSHans Schillstrom  *              estimation_timer() runs with timer per netns.
1629c2026fSHans Schillstrom  *              get_stats()) do the per cpu summing.
17cb7f6a7bSJulius Volz  */
189aada7acSHannes Eder 
199aada7acSHannes Eder #define KMSG_COMPONENT "IPVS"
209aada7acSHannes Eder #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
219aada7acSHannes Eder 
22cb7f6a7bSJulius Volz #include <linux/kernel.h>
23cb7f6a7bSJulius Volz #include <linux/jiffies.h>
24cb7f6a7bSJulius Volz #include <linux/types.h>
25cb7f6a7bSJulius Volz #include <linux/interrupt.h>
26cb7f6a7bSJulius Volz #include <linux/sysctl.h>
27cb7f6a7bSJulius Volz #include <linux/list.h>
28cb7f6a7bSJulius Volz 
29cb7f6a7bSJulius Volz #include <net/ip_vs.h>
30cb7f6a7bSJulius Volz 
31cb7f6a7bSJulius Volz /*
32cb7f6a7bSJulius Volz   This code is to estimate rate in a shorter interval (such as 8
33cb7f6a7bSJulius Volz   seconds) for virtual services and real servers. For measure rate in a
34cb7f6a7bSJulius Volz   long interval, it is easy to implement a user level daemon which
35cb7f6a7bSJulius Volz   periodically reads those statistical counters and measure rate.
36cb7f6a7bSJulius Volz 
37cb7f6a7bSJulius Volz   Currently, the measurement is activated by slow timer handler. Hope
38cb7f6a7bSJulius Volz   this measurement will not introduce too much load.
39cb7f6a7bSJulius Volz 
40cb7f6a7bSJulius Volz   We measure rate during the last 8 seconds every 2 seconds:
41cb7f6a7bSJulius Volz 
42cb7f6a7bSJulius Volz     avgrate = avgrate*(1-W) + rate*W
43cb7f6a7bSJulius Volz 
44cb7f6a7bSJulius Volz     where W = 2^(-2)
45cb7f6a7bSJulius Volz 
46cb7f6a7bSJulius Volz   NOTES.
47cb7f6a7bSJulius Volz 
48cd67cd5eSJulian Anastasov   * Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10.
49cb7f6a7bSJulius Volz 
50cd67cd5eSJulian Anastasov   * Netlink users can see 64-bit values but sockopt users are restricted
51cd67cd5eSJulian Anastasov     to 32-bit values for conns, packets, bps, cps and pps.
52cd67cd5eSJulian Anastasov 
53cd67cd5eSJulian Anastasov   * A lot of code is taken from net/core/gen_estimator.c
54cb7f6a7bSJulius Volz  */
55cb7f6a7bSJulius Volz 
56cb7f6a7bSJulius Volz 
57b17fc996SHans Schillstrom /*
58b17fc996SHans Schillstrom  * Make a summary from each cpu
59b17fc996SHans Schillstrom  */
60cd67cd5eSJulian Anastasov static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum,
61b962abdcSJulian Anastasov 				 struct ip_vs_cpu_stats __percpu *stats)
62b17fc996SHans Schillstrom {
63b17fc996SHans Schillstrom 	int i;
64d1ee4feaSJulian Anastasov 	bool add = false;
65b17fc996SHans Schillstrom 
66b17fc996SHans Schillstrom 	for_each_possible_cpu(i) {
67b17fc996SHans Schillstrom 		struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
68b17fc996SHans Schillstrom 		unsigned int start;
69cd67cd5eSJulian Anastasov 		u64 conns, inpkts, outpkts, inbytes, outbytes;
70cd67cd5eSJulian Anastasov 
71d1ee4feaSJulian Anastasov 		if (add) {
72b17fc996SHans Schillstrom 			do {
734a569c0cSJulian Anastasov 				start = u64_stats_fetch_begin(&s->syncp);
74cd67cd5eSJulian Anastasov 				conns = s->cnt.conns;
75cd67cd5eSJulian Anastasov 				inpkts = s->cnt.inpkts;
76cd67cd5eSJulian Anastasov 				outpkts = s->cnt.outpkts;
77cd67cd5eSJulian Anastasov 				inbytes = s->cnt.inbytes;
78cd67cd5eSJulian Anastasov 				outbytes = s->cnt.outbytes;
794a569c0cSJulian Anastasov 			} while (u64_stats_fetch_retry(&s->syncp, start));
80cd67cd5eSJulian Anastasov 			sum->conns += conns;
81cd67cd5eSJulian Anastasov 			sum->inpkts += inpkts;
82cd67cd5eSJulian Anastasov 			sum->outpkts += outpkts;
83b17fc996SHans Schillstrom 			sum->inbytes += inbytes;
84b17fc996SHans Schillstrom 			sum->outbytes += outbytes;
85b17fc996SHans Schillstrom 		} else {
86d1ee4feaSJulian Anastasov 			add = true;
87b17fc996SHans Schillstrom 			do {
884a569c0cSJulian Anastasov 				start = u64_stats_fetch_begin(&s->syncp);
89cd67cd5eSJulian Anastasov 				sum->conns = s->cnt.conns;
90cd67cd5eSJulian Anastasov 				sum->inpkts = s->cnt.inpkts;
91cd67cd5eSJulian Anastasov 				sum->outpkts = s->cnt.outpkts;
92cd67cd5eSJulian Anastasov 				sum->inbytes = s->cnt.inbytes;
93cd67cd5eSJulian Anastasov 				sum->outbytes = s->cnt.outbytes;
944a569c0cSJulian Anastasov 			} while (u64_stats_fetch_retry(&s->syncp, start));
95b17fc996SHans Schillstrom 		}
96b17fc996SHans Schillstrom 	}
97b17fc996SHans Schillstrom }
98b17fc996SHans Schillstrom 
99b17fc996SHans Schillstrom 
100cb7f6a7bSJulius Volz static void estimation_timer(unsigned long arg)
101cb7f6a7bSJulius Volz {
102cb7f6a7bSJulius Volz 	struct ip_vs_estimator *e;
103cb7f6a7bSJulius Volz 	struct ip_vs_stats *s;
104cd67cd5eSJulian Anastasov 	u64 rate;
10529c2026fSHans Schillstrom 	struct net *net = (struct net *)arg;
10629c2026fSHans Schillstrom 	struct netns_ipvs *ipvs;
107cb7f6a7bSJulius Volz 
10829c2026fSHans Schillstrom 	ipvs = net_ipvs(net);
10929c2026fSHans Schillstrom 	spin_lock(&ipvs->est_lock);
11029c2026fSHans Schillstrom 	list_for_each_entry(e, &ipvs->est_list, list) {
111cb7f6a7bSJulius Volz 		s = container_of(e, struct ip_vs_stats, est);
112cb7f6a7bSJulius Volz 
113cb7f6a7bSJulius Volz 		spin_lock(&s->lock);
114cd67cd5eSJulian Anastasov 		ip_vs_read_cpu_stats(&s->kstats, s->cpustats);
115cb7f6a7bSJulius Volz 
116cb7f6a7bSJulius Volz 		/* scaled by 2^10, but divided 2 seconds */
117cd67cd5eSJulian Anastasov 		rate = (s->kstats.conns - e->last_conns) << 9;
118cd67cd5eSJulian Anastasov 		e->last_conns = s->kstats.conns;
119cd67cd5eSJulian Anastasov 		e->cps += ((s64)rate - (s64)e->cps) >> 2;
120cb7f6a7bSJulius Volz 
121cd67cd5eSJulian Anastasov 		rate = (s->kstats.inpkts - e->last_inpkts) << 9;
122cd67cd5eSJulian Anastasov 		e->last_inpkts = s->kstats.inpkts;
123cd67cd5eSJulian Anastasov 		e->inpps += ((s64)rate - (s64)e->inpps) >> 2;
124cb7f6a7bSJulius Volz 
125cd67cd5eSJulian Anastasov 		rate = (s->kstats.outpkts - e->last_outpkts) << 9;
126cd67cd5eSJulian Anastasov 		e->last_outpkts = s->kstats.outpkts;
127cd67cd5eSJulian Anastasov 		e->outpps += ((s64)rate - (s64)e->outpps) >> 2;
128cb7f6a7bSJulius Volz 
129cd67cd5eSJulian Anastasov 		/* scaled by 2^5, but divided 2 seconds */
130cd67cd5eSJulian Anastasov 		rate = (s->kstats.inbytes - e->last_inbytes) << 4;
131cd67cd5eSJulian Anastasov 		e->last_inbytes = s->kstats.inbytes;
132cd67cd5eSJulian Anastasov 		e->inbps += ((s64)rate - (s64)e->inbps) >> 2;
133cb7f6a7bSJulius Volz 
134cd67cd5eSJulian Anastasov 		rate = (s->kstats.outbytes - e->last_outbytes) << 4;
135cd67cd5eSJulian Anastasov 		e->last_outbytes = s->kstats.outbytes;
136cd67cd5eSJulian Anastasov 		e->outbps += ((s64)rate - (s64)e->outbps) >> 2;
137cb7f6a7bSJulius Volz 		spin_unlock(&s->lock);
138cb7f6a7bSJulius Volz 	}
13929c2026fSHans Schillstrom 	spin_unlock(&ipvs->est_lock);
14029c2026fSHans Schillstrom 	mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
141cb7f6a7bSJulius Volz }
142cb7f6a7bSJulius Volz 
1430f34d54bSEric W. Biederman void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
144cb7f6a7bSJulius Volz {
145cb7f6a7bSJulius Volz 	struct ip_vs_estimator *est = &stats->est;
146cb7f6a7bSJulius Volz 
147cb7f6a7bSJulius Volz 	INIT_LIST_HEAD(&est->list);
148cb7f6a7bSJulius Volz 
14929c2026fSHans Schillstrom 	spin_lock_bh(&ipvs->est_lock);
15029c2026fSHans Schillstrom 	list_add(&est->list, &ipvs->est_list);
15129c2026fSHans Schillstrom 	spin_unlock_bh(&ipvs->est_lock);
152cb7f6a7bSJulius Volz }
153cb7f6a7bSJulius Volz 
1540f34d54bSEric W. Biederman void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
155cb7f6a7bSJulius Volz {
156cb7f6a7bSJulius Volz 	struct ip_vs_estimator *est = &stats->est;
157cb7f6a7bSJulius Volz 
15829c2026fSHans Schillstrom 	spin_lock_bh(&ipvs->est_lock);
159cb7f6a7bSJulius Volz 	list_del(&est->list);
16029c2026fSHans Schillstrom 	spin_unlock_bh(&ipvs->est_lock);
161cb7f6a7bSJulius Volz }
162cb7f6a7bSJulius Volz 
163cb7f6a7bSJulius Volz void ip_vs_zero_estimator(struct ip_vs_stats *stats)
164cb7f6a7bSJulius Volz {
165cb7f6a7bSJulius Volz 	struct ip_vs_estimator *est = &stats->est;
166cd67cd5eSJulian Anastasov 	struct ip_vs_kstats *k = &stats->kstats;
167cb7f6a7bSJulius Volz 
16855a3d4e1SJulian Anastasov 	/* reset counters, caller must hold the stats->lock lock */
169cd67cd5eSJulian Anastasov 	est->last_inbytes = k->inbytes;
170cd67cd5eSJulian Anastasov 	est->last_outbytes = k->outbytes;
171cd67cd5eSJulian Anastasov 	est->last_conns = k->conns;
172cd67cd5eSJulian Anastasov 	est->last_inpkts = k->inpkts;
173cd67cd5eSJulian Anastasov 	est->last_outpkts = k->outpkts;
174cb7f6a7bSJulius Volz 	est->cps = 0;
175cb7f6a7bSJulius Volz 	est->inpps = 0;
176cb7f6a7bSJulius Volz 	est->outpps = 0;
177cb7f6a7bSJulius Volz 	est->inbps = 0;
178cb7f6a7bSJulius Volz 	est->outbps = 0;
179cb7f6a7bSJulius Volz }
180cb7f6a7bSJulius Volz 
181ea9f22ccSJulian Anastasov /* Get decoded rates */
182cd67cd5eSJulian Anastasov void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
183ea9f22ccSJulian Anastasov {
184ea9f22ccSJulian Anastasov 	struct ip_vs_estimator *e = &stats->est;
185ea9f22ccSJulian Anastasov 
186ea9f22ccSJulian Anastasov 	dst->cps = (e->cps + 0x1FF) >> 10;
187ea9f22ccSJulian Anastasov 	dst->inpps = (e->inpps + 0x1FF) >> 10;
188ea9f22ccSJulian Anastasov 	dst->outpps = (e->outpps + 0x1FF) >> 10;
189ea9f22ccSJulian Anastasov 	dst->inbps = (e->inbps + 0xF) >> 5;
190ea9f22ccSJulian Anastasov 	dst->outbps = (e->outbps + 0xF) >> 5;
191ea9f22ccSJulian Anastasov }
192ea9f22ccSJulian Anastasov 
193503cf15aSHans Schillstrom int __net_init ip_vs_estimator_net_init(struct net *net)
19461b1ab45SHans Schillstrom {
19529c2026fSHans Schillstrom 	struct netns_ipvs *ipvs = net_ipvs(net);
19629c2026fSHans Schillstrom 
19729c2026fSHans Schillstrom 	INIT_LIST_HEAD(&ipvs->est_list);
19829c2026fSHans Schillstrom 	spin_lock_init(&ipvs->est_lock);
19929c2026fSHans Schillstrom 	setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
20029c2026fSHans Schillstrom 	mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
20161b1ab45SHans Schillstrom 	return 0;
20261b1ab45SHans Schillstrom }
20361b1ab45SHans Schillstrom 
204503cf15aSHans Schillstrom void __net_exit ip_vs_estimator_net_cleanup(struct net *net)
20529c2026fSHans Schillstrom {
20629c2026fSHans Schillstrom 	del_timer_sync(&net_ipvs(net)->est_timer);
20729c2026fSHans Schillstrom }
208