xref: /openbmc/linux/net/core/gen_estimator.c (revision 2f164822)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/gen_estimator.c	Simple rate estimator.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *		Eric Dumazet <edumazet@google.com>
7  *
8  * Changes:
9  *              Jamal Hadi Salim - moved it to net/core and reshulfed
10  *              names to make it usable in general net subsystem.
11  */
12 
13 #include <linux/uaccess.h>
14 #include <linux/bitops.h>
15 #include <linux/module.h>
16 #include <linux/types.h>
17 #include <linux/kernel.h>
18 #include <linux/jiffies.h>
19 #include <linux/string.h>
20 #include <linux/mm.h>
21 #include <linux/socket.h>
22 #include <linux/sockios.h>
23 #include <linux/in.h>
24 #include <linux/errno.h>
25 #include <linux/interrupt.h>
26 #include <linux/netdevice.h>
27 #include <linux/skbuff.h>
28 #include <linux/rtnetlink.h>
29 #include <linux/init.h>
30 #include <linux/slab.h>
31 #include <linux/seqlock.h>
32 #include <net/sock.h>
33 #include <net/gen_stats.h>
34 
35 /* This code is NOT intended to be used for statistics collection,
36  * its purpose is to provide a base for statistical multiplexing
37  * for controlled load service.
38  * If you need only statistics, run a user level daemon which
39  * periodically reads byte counters.
40  */
41 
42 struct net_rate_estimator {
43 	struct gnet_stats_basic_sync	*bstats;
44 	spinlock_t		*stats_lock;
45 	bool			running;
46 	struct gnet_stats_basic_sync __percpu *cpu_bstats;
47 	u8			ewma_log;
48 	u8			intvl_log; /* period : (250ms << intvl_log) */
49 
50 	seqcount_t		seq;
51 	u64			last_packets;
52 	u64			last_bytes;
53 
54 	u64			avpps;
55 	u64			avbps;
56 
57 	unsigned long           next_jiffies;
58 	struct timer_list       timer;
59 	struct rcu_head		rcu;
60 };
61 
62 static void est_fetch_counters(struct net_rate_estimator *e,
63 			       struct gnet_stats_basic_sync *b)
64 {
65 	gnet_stats_basic_sync_init(b);
66 	if (e->stats_lock)
67 		spin_lock(e->stats_lock);
68 
69 	gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running);
70 
71 	if (e->stats_lock)
72 		spin_unlock(e->stats_lock);
73 
74 }
75 
76 static void est_timer(struct timer_list *t)
77 {
78 	struct net_rate_estimator *est = from_timer(est, t, timer);
79 	struct gnet_stats_basic_sync b;
80 	u64 b_bytes, b_packets;
81 	u64 rate, brate;
82 
83 	est_fetch_counters(est, &b);
84 	b_bytes = u64_stats_read(&b.bytes);
85 	b_packets = u64_stats_read(&b.packets);
86 
87 	brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log);
88 	brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);
89 
90 	rate = (b_packets - est->last_packets) << (10 - est->intvl_log);
91 	rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
92 
93 	write_seqcount_begin(&est->seq);
94 	est->avbps += brate;
95 	est->avpps += rate;
96 	write_seqcount_end(&est->seq);
97 
98 	est->last_bytes = b_bytes;
99 	est->last_packets = b_packets;
100 
101 	est->next_jiffies += ((HZ/4) << est->intvl_log);
102 
103 	if (unlikely(time_after_eq(jiffies, est->next_jiffies))) {
104 		/* Ouch... timer was delayed. */
105 		est->next_jiffies = jiffies + 1;
106 	}
107 	mod_timer(&est->timer, est->next_jiffies);
108 }
109 
110 /**
111  * gen_new_estimator - create a new rate estimator
112  * @bstats: basic statistics
113  * @cpu_bstats: bstats per cpu
114  * @rate_est: rate estimator statistics
115  * @lock: lock for statistics and control path
116  * @running: true if @bstats represents a running qdisc, thus @bstats'
117  *           internal values might change during basic reads. Only used
118  *           if @bstats_cpu is NULL
119  * @opt: rate estimator configuration TLV
120  *
121  * Creates a new rate estimator with &bstats as source and &rate_est
122  * as destination. A new timer with the interval specified in the
123  * configuration TLV is created. Upon each interval, the latest statistics
124  * will be read from &bstats and the estimated rate will be stored in
125  * &rate_est with the statistics lock grabbed during this period.
126  *
127  * Returns 0 on success or a negative error code.
128  *
129  */
130 int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
131 		      struct gnet_stats_basic_sync __percpu *cpu_bstats,
132 		      struct net_rate_estimator __rcu **rate_est,
133 		      spinlock_t *lock,
134 		      bool running,
135 		      struct nlattr *opt)
136 {
137 	struct gnet_estimator *parm = nla_data(opt);
138 	struct net_rate_estimator *old, *est;
139 	struct gnet_stats_basic_sync b;
140 	int intvl_log;
141 
142 	if (nla_len(opt) < sizeof(*parm))
143 		return -EINVAL;
144 
145 	/* allowed timer periods are :
146 	 * -2 : 250ms,   -1 : 500ms,    0 : 1 sec
147 	 *  1 : 2 sec,    2 : 4 sec,    3 : 8 sec
148 	 */
149 	if (parm->interval < -2 || parm->interval > 3)
150 		return -EINVAL;
151 
152 	if (parm->ewma_log == 0 || parm->ewma_log >= 31)
153 		return -EINVAL;
154 
155 	est = kzalloc(sizeof(*est), GFP_KERNEL);
156 	if (!est)
157 		return -ENOBUFS;
158 
159 	seqcount_init(&est->seq);
160 	intvl_log = parm->interval + 2;
161 	est->bstats = bstats;
162 	est->stats_lock = lock;
163 	est->running  = running;
164 	est->ewma_log = parm->ewma_log;
165 	est->intvl_log = intvl_log;
166 	est->cpu_bstats = cpu_bstats;
167 
168 	if (lock)
169 		local_bh_disable();
170 	est_fetch_counters(est, &b);
171 	if (lock)
172 		local_bh_enable();
173 	est->last_bytes = u64_stats_read(&b.bytes);
174 	est->last_packets = u64_stats_read(&b.packets);
175 
176 	if (lock)
177 		spin_lock_bh(lock);
178 	old = rcu_dereference_protected(*rate_est, 1);
179 	if (old) {
180 		del_timer_sync(&old->timer);
181 		est->avbps = old->avbps;
182 		est->avpps = old->avpps;
183 	}
184 
185 	est->next_jiffies = jiffies + ((HZ/4) << intvl_log);
186 	timer_setup(&est->timer, est_timer, 0);
187 	mod_timer(&est->timer, est->next_jiffies);
188 
189 	rcu_assign_pointer(*rate_est, est);
190 	if (lock)
191 		spin_unlock_bh(lock);
192 	if (old)
193 		kfree_rcu(old, rcu);
194 	return 0;
195 }
196 EXPORT_SYMBOL(gen_new_estimator);
197 
198 /**
199  * gen_kill_estimator - remove a rate estimator
200  * @rate_est: rate estimator
201  *
202  * Removes the rate estimator.
203  *
204  */
205 void gen_kill_estimator(struct net_rate_estimator __rcu **rate_est)
206 {
207 	struct net_rate_estimator *est;
208 
209 	est = xchg((__force struct net_rate_estimator **)rate_est, NULL);
210 	if (est) {
211 		timer_shutdown_sync(&est->timer);
212 		kfree_rcu(est, rcu);
213 	}
214 }
215 EXPORT_SYMBOL(gen_kill_estimator);
216 
217 /**
218  * gen_replace_estimator - replace rate estimator configuration
219  * @bstats: basic statistics
220  * @cpu_bstats: bstats per cpu
221  * @rate_est: rate estimator statistics
222  * @lock: lock for statistics and control path
223  * @running: true if @bstats represents a running qdisc, thus @bstats'
224  *           internal values might change during basic reads. Only used
225  *           if @cpu_bstats is NULL
226  * @opt: rate estimator configuration TLV
227  *
228  * Replaces the configuration of a rate estimator by calling
229  * gen_kill_estimator() and gen_new_estimator().
230  *
231  * Returns 0 on success or a negative error code.
232  */
233 int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
234 			  struct gnet_stats_basic_sync __percpu *cpu_bstats,
235 			  struct net_rate_estimator __rcu **rate_est,
236 			  spinlock_t *lock,
237 			  bool running, struct nlattr *opt)
238 {
239 	return gen_new_estimator(bstats, cpu_bstats, rate_est,
240 				 lock, running, opt);
241 }
242 EXPORT_SYMBOL(gen_replace_estimator);
243 
244 /**
245  * gen_estimator_active - test if estimator is currently in use
246  * @rate_est: rate estimator
247  *
248  * Returns true if estimator is active, and false if not.
249  */
250 bool gen_estimator_active(struct net_rate_estimator __rcu **rate_est)
251 {
252 	return !!rcu_access_pointer(*rate_est);
253 }
254 EXPORT_SYMBOL(gen_estimator_active);
255 
256 bool gen_estimator_read(struct net_rate_estimator __rcu **rate_est,
257 			struct gnet_stats_rate_est64 *sample)
258 {
259 	struct net_rate_estimator *est;
260 	unsigned seq;
261 
262 	rcu_read_lock();
263 	est = rcu_dereference(*rate_est);
264 	if (!est) {
265 		rcu_read_unlock();
266 		return false;
267 	}
268 
269 	do {
270 		seq = read_seqcount_begin(&est->seq);
271 		sample->bps = est->avbps >> 8;
272 		sample->pps = est->avpps >> 8;
273 	} while (read_seqcount_retry(&est->seq, seq));
274 
275 	rcu_read_unlock();
276 	return true;
277 }
278 EXPORT_SYMBOL(gen_estimator_read);
279