xref: /openbmc/linux/net/sched/sch_generic.c (revision 752beb5e)
1 /*
2  * net/sched/sch_generic.c	Generic packet scheduler routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
11  *              - Ingress support
12  */
13 
14 #include <linux/bitops.h>
15 #include <linux/module.h>
16 #include <linux/types.h>
17 #include <linux/kernel.h>
18 #include <linux/sched.h>
19 #include <linux/string.h>
20 #include <linux/errno.h>
21 #include <linux/netdevice.h>
22 #include <linux/skbuff.h>
23 #include <linux/rtnetlink.h>
24 #include <linux/init.h>
25 #include <linux/rcupdate.h>
26 #include <linux/list.h>
27 #include <linux/slab.h>
28 #include <linux/if_vlan.h>
29 #include <linux/skb_array.h>
30 #include <linux/if_macvlan.h>
31 #include <net/sch_generic.h>
32 #include <net/pkt_sched.h>
33 #include <net/dst.h>
34 #include <trace/events/qdisc.h>
35 #include <trace/events/net.h>
36 #include <net/xfrm.h>
37 
38 /* Qdisc to use by default */
39 const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
40 EXPORT_SYMBOL(default_qdisc_ops);
41 
42 /* Main transmission queue. */
43 
44 /* Modifications to data participating in scheduling must be protected with
45  * qdisc_lock(qdisc) spinlock.
46  *
47  * The idea is the following:
48  * - enqueue, dequeue are serialized via qdisc root lock
49  * - ingress filtering is also serialized via qdisc root lock
50  * - updates to tree and tree walking are only done under the rtnl mutex.
51  */
52 
53 static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
54 {
55 	const struct netdev_queue *txq = q->dev_queue;
56 	spinlock_t *lock = NULL;
57 	struct sk_buff *skb;
58 
59 	if (q->flags & TCQ_F_NOLOCK) {
60 		lock = qdisc_lock(q);
61 		spin_lock(lock);
62 	}
63 
64 	skb = skb_peek(&q->skb_bad_txq);
65 	if (skb) {
66 		/* check the reason of requeuing without tx lock first */
67 		txq = skb_get_tx_queue(txq->dev, skb);
68 		if (!netif_xmit_frozen_or_stopped(txq)) {
69 			skb = __skb_dequeue(&q->skb_bad_txq);
70 			if (qdisc_is_percpu_stats(q)) {
71 				qdisc_qstats_cpu_backlog_dec(q, skb);
72 				qdisc_qstats_cpu_qlen_dec(q);
73 			} else {
74 				qdisc_qstats_backlog_dec(q, skb);
75 				q->q.qlen--;
76 			}
77 		} else {
78 			skb = NULL;
79 		}
80 	}
81 
82 	if (lock)
83 		spin_unlock(lock);
84 
85 	return skb;
86 }
87 
88 static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q)
89 {
90 	struct sk_buff *skb = skb_peek(&q->skb_bad_txq);
91 
92 	if (unlikely(skb))
93 		skb = __skb_dequeue_bad_txq(q);
94 
95 	return skb;
96 }
97 
98 static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
99 					     struct sk_buff *skb)
100 {
101 	spinlock_t *lock = NULL;
102 
103 	if (q->flags & TCQ_F_NOLOCK) {
104 		lock = qdisc_lock(q);
105 		spin_lock(lock);
106 	}
107 
108 	__skb_queue_tail(&q->skb_bad_txq, skb);
109 
110 	if (qdisc_is_percpu_stats(q)) {
111 		qdisc_qstats_cpu_backlog_inc(q, skb);
112 		qdisc_qstats_cpu_qlen_inc(q);
113 	} else {
114 		qdisc_qstats_backlog_inc(q, skb);
115 		q->q.qlen++;
116 	}
117 
118 	if (lock)
119 		spin_unlock(lock);
120 }
121 
122 static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
123 {
124 	spinlock_t *lock = NULL;
125 
126 	if (q->flags & TCQ_F_NOLOCK) {
127 		lock = qdisc_lock(q);
128 		spin_lock(lock);
129 	}
130 
131 	while (skb) {
132 		struct sk_buff *next = skb->next;
133 
134 		__skb_queue_tail(&q->gso_skb, skb);
135 
136 		/* it's still part of the queue */
137 		if (qdisc_is_percpu_stats(q)) {
138 			qdisc_qstats_cpu_requeues_inc(q);
139 			qdisc_qstats_cpu_backlog_inc(q, skb);
140 			qdisc_qstats_cpu_qlen_inc(q);
141 		} else {
142 			q->qstats.requeues++;
143 			qdisc_qstats_backlog_inc(q, skb);
144 			q->q.qlen++;
145 		}
146 
147 		skb = next;
148 	}
149 	if (lock)
150 		spin_unlock(lock);
151 	__netif_schedule(q);
152 }
153 
154 static void try_bulk_dequeue_skb(struct Qdisc *q,
155 				 struct sk_buff *skb,
156 				 const struct netdev_queue *txq,
157 				 int *packets)
158 {
159 	int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
160 
161 	while (bytelimit > 0) {
162 		struct sk_buff *nskb = q->dequeue(q);
163 
164 		if (!nskb)
165 			break;
166 
167 		bytelimit -= nskb->len; /* covers GSO len */
168 		skb->next = nskb;
169 		skb = nskb;
170 		(*packets)++; /* GSO counts as one pkt */
171 	}
172 	skb_mark_not_on_list(skb);
173 }
174 
175 /* This variant of try_bulk_dequeue_skb() makes sure
176  * all skbs in the chain are for the same txq
177  */
178 static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
179 				      struct sk_buff *skb,
180 				      int *packets)
181 {
182 	int mapping = skb_get_queue_mapping(skb);
183 	struct sk_buff *nskb;
184 	int cnt = 0;
185 
186 	do {
187 		nskb = q->dequeue(q);
188 		if (!nskb)
189 			break;
190 		if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
191 			qdisc_enqueue_skb_bad_txq(q, nskb);
192 			break;
193 		}
194 		skb->next = nskb;
195 		skb = nskb;
196 	} while (++cnt < 8);
197 	(*packets) += cnt;
198 	skb_mark_not_on_list(skb);
199 }
200 
201 /* Note that dequeue_skb can possibly return a SKB list (via skb->next).
202  * A requeued skb (via q->gso_skb) can also be a SKB list.
203  */
204 static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
205 				   int *packets)
206 {
207 	const struct netdev_queue *txq = q->dev_queue;
208 	struct sk_buff *skb = NULL;
209 
210 	*packets = 1;
211 	if (unlikely(!skb_queue_empty(&q->gso_skb))) {
212 		spinlock_t *lock = NULL;
213 
214 		if (q->flags & TCQ_F_NOLOCK) {
215 			lock = qdisc_lock(q);
216 			spin_lock(lock);
217 		}
218 
219 		skb = skb_peek(&q->gso_skb);
220 
221 		/* skb may be null if another cpu pulls gso_skb off in between
222 		 * empty check and lock.
223 		 */
224 		if (!skb) {
225 			if (lock)
226 				spin_unlock(lock);
227 			goto validate;
228 		}
229 
230 		/* skb in gso_skb were already validated */
231 		*validate = false;
232 		if (xfrm_offload(skb))
233 			*validate = true;
234 		/* check the reason of requeuing without tx lock first */
235 		txq = skb_get_tx_queue(txq->dev, skb);
236 		if (!netif_xmit_frozen_or_stopped(txq)) {
237 			skb = __skb_dequeue(&q->gso_skb);
238 			if (qdisc_is_percpu_stats(q)) {
239 				qdisc_qstats_cpu_backlog_dec(q, skb);
240 				qdisc_qstats_cpu_qlen_dec(q);
241 			} else {
242 				qdisc_qstats_backlog_dec(q, skb);
243 				q->q.qlen--;
244 			}
245 		} else {
246 			skb = NULL;
247 		}
248 		if (lock)
249 			spin_unlock(lock);
250 		goto trace;
251 	}
252 validate:
253 	*validate = true;
254 
255 	if ((q->flags & TCQ_F_ONETXQUEUE) &&
256 	    netif_xmit_frozen_or_stopped(txq))
257 		return skb;
258 
259 	skb = qdisc_dequeue_skb_bad_txq(q);
260 	if (unlikely(skb))
261 		goto bulk;
262 	skb = q->dequeue(q);
263 	if (skb) {
264 bulk:
265 		if (qdisc_may_bulk(q))
266 			try_bulk_dequeue_skb(q, skb, txq, packets);
267 		else
268 			try_bulk_dequeue_skb_slow(q, skb, packets);
269 	}
270 trace:
271 	trace_qdisc_dequeue(q, txq, *packets, skb);
272 	return skb;
273 }
274 
275 /*
276  * Transmit possibly several skbs, and handle the return status as
277  * required. Owning running seqcount bit guarantees that
278  * only one CPU can execute this function.
279  *
280  * Returns to the caller:
281  *				false  - hardware queue frozen backoff
282  *				true   - feel free to send more pkts
283  */
284 bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
285 		     struct net_device *dev, struct netdev_queue *txq,
286 		     spinlock_t *root_lock, bool validate)
287 {
288 	int ret = NETDEV_TX_BUSY;
289 	bool again = false;
290 
291 	/* And release qdisc */
292 	if (root_lock)
293 		spin_unlock(root_lock);
294 
295 	/* Note that we validate skb (GSO, checksum, ...) outside of locks */
296 	if (validate)
297 		skb = validate_xmit_skb_list(skb, dev, &again);
298 
299 #ifdef CONFIG_XFRM_OFFLOAD
300 	if (unlikely(again)) {
301 		if (root_lock)
302 			spin_lock(root_lock);
303 
304 		dev_requeue_skb(skb, q);
305 		return false;
306 	}
307 #endif
308 
309 	if (likely(skb)) {
310 		HARD_TX_LOCK(dev, txq, smp_processor_id());
311 		if (!netif_xmit_frozen_or_stopped(txq))
312 			skb = dev_hard_start_xmit(skb, dev, txq, &ret);
313 
314 		HARD_TX_UNLOCK(dev, txq);
315 	} else {
316 		if (root_lock)
317 			spin_lock(root_lock);
318 		return true;
319 	}
320 
321 	if (root_lock)
322 		spin_lock(root_lock);
323 
324 	if (!dev_xmit_complete(ret)) {
325 		/* Driver returned NETDEV_TX_BUSY - requeue skb */
326 		if (unlikely(ret != NETDEV_TX_BUSY))
327 			net_warn_ratelimited("BUG %s code %d qlen %d\n",
328 					     dev->name, ret, q->q.qlen);
329 
330 		dev_requeue_skb(skb, q);
331 		return false;
332 	}
333 
334 	return true;
335 }
336 
337 /*
338  * NOTE: Called under qdisc_lock(q) with locally disabled BH.
339  *
340  * running seqcount guarantees only one CPU can process
341  * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
342  * this queue.
343  *
344  *  netif_tx_lock serializes accesses to device driver.
345  *
346  *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
347  *  if one is grabbed, another must be free.
348  *
349  * Note, that this procedure can be called by a watchdog timer
350  *
351  * Returns to the caller:
352  *				0  - queue is empty or throttled.
353  *				>0 - queue is not empty.
354  *
355  */
356 static inline bool qdisc_restart(struct Qdisc *q, int *packets)
357 {
358 	spinlock_t *root_lock = NULL;
359 	struct netdev_queue *txq;
360 	struct net_device *dev;
361 	struct sk_buff *skb;
362 	bool validate;
363 
364 	/* Dequeue packet */
365 	skb = dequeue_skb(q, &validate, packets);
366 	if (unlikely(!skb))
367 		return false;
368 
369 	if (!(q->flags & TCQ_F_NOLOCK))
370 		root_lock = qdisc_lock(q);
371 
372 	dev = qdisc_dev(q);
373 	txq = skb_get_tx_queue(dev, skb);
374 
375 	return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
376 }
377 
378 void __qdisc_run(struct Qdisc *q)
379 {
380 	int quota = dev_tx_weight;
381 	int packets;
382 
383 	while (qdisc_restart(q, &packets)) {
384 		/*
385 		 * Ordered by possible occurrence: Postpone processing if
386 		 * 1. we've exceeded packet quota
387 		 * 2. another process needs the CPU;
388 		 */
389 		quota -= packets;
390 		if (quota <= 0 || need_resched()) {
391 			__netif_schedule(q);
392 			break;
393 		}
394 	}
395 }
396 
397 unsigned long dev_trans_start(struct net_device *dev)
398 {
399 	unsigned long val, res;
400 	unsigned int i;
401 
402 	if (is_vlan_dev(dev))
403 		dev = vlan_dev_real_dev(dev);
404 	else if (netif_is_macvlan(dev))
405 		dev = macvlan_dev_real_dev(dev);
406 	res = netdev_get_tx_queue(dev, 0)->trans_start;
407 	for (i = 1; i < dev->num_tx_queues; i++) {
408 		val = netdev_get_tx_queue(dev, i)->trans_start;
409 		if (val && time_after(val, res))
410 			res = val;
411 	}
412 
413 	return res;
414 }
415 EXPORT_SYMBOL(dev_trans_start);
416 
417 static void dev_watchdog(struct timer_list *t)
418 {
419 	struct net_device *dev = from_timer(dev, t, watchdog_timer);
420 
421 	netif_tx_lock(dev);
422 	if (!qdisc_tx_is_noop(dev)) {
423 		if (netif_device_present(dev) &&
424 		    netif_running(dev) &&
425 		    netif_carrier_ok(dev)) {
426 			int some_queue_timedout = 0;
427 			unsigned int i;
428 			unsigned long trans_start;
429 
430 			for (i = 0; i < dev->num_tx_queues; i++) {
431 				struct netdev_queue *txq;
432 
433 				txq = netdev_get_tx_queue(dev, i);
434 				trans_start = txq->trans_start;
435 				if (netif_xmit_stopped(txq) &&
436 				    time_after(jiffies, (trans_start +
437 							 dev->watchdog_timeo))) {
438 					some_queue_timedout = 1;
439 					txq->trans_timeout++;
440 					break;
441 				}
442 			}
443 
444 			if (some_queue_timedout) {
445 				trace_net_dev_xmit_timeout(dev, i);
446 				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
447 				       dev->name, netdev_drivername(dev), i);
448 				dev->netdev_ops->ndo_tx_timeout(dev);
449 			}
450 			if (!mod_timer(&dev->watchdog_timer,
451 				       round_jiffies(jiffies +
452 						     dev->watchdog_timeo)))
453 				dev_hold(dev);
454 		}
455 	}
456 	netif_tx_unlock(dev);
457 
458 	dev_put(dev);
459 }
460 
461 void __netdev_watchdog_up(struct net_device *dev)
462 {
463 	if (dev->netdev_ops->ndo_tx_timeout) {
464 		if (dev->watchdog_timeo <= 0)
465 			dev->watchdog_timeo = 5*HZ;
466 		if (!mod_timer(&dev->watchdog_timer,
467 			       round_jiffies(jiffies + dev->watchdog_timeo)))
468 			dev_hold(dev);
469 	}
470 }
471 
472 static void dev_watchdog_up(struct net_device *dev)
473 {
474 	__netdev_watchdog_up(dev);
475 }
476 
477 static void dev_watchdog_down(struct net_device *dev)
478 {
479 	netif_tx_lock_bh(dev);
480 	if (del_timer(&dev->watchdog_timer))
481 		dev_put(dev);
482 	netif_tx_unlock_bh(dev);
483 }
484 
485 /**
486  *	netif_carrier_on - set carrier
487  *	@dev: network device
488  *
489  * Device has detected acquisition of carrier.
490  */
491 void netif_carrier_on(struct net_device *dev)
492 {
493 	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
494 		if (dev->reg_state == NETREG_UNINITIALIZED)
495 			return;
496 		atomic_inc(&dev->carrier_up_count);
497 		linkwatch_fire_event(dev);
498 		if (netif_running(dev))
499 			__netdev_watchdog_up(dev);
500 	}
501 }
502 EXPORT_SYMBOL(netif_carrier_on);
503 
504 /**
505  *	netif_carrier_off - clear carrier
506  *	@dev: network device
507  *
508  * Device has detected loss of carrier.
509  */
510 void netif_carrier_off(struct net_device *dev)
511 {
512 	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
513 		if (dev->reg_state == NETREG_UNINITIALIZED)
514 			return;
515 		atomic_inc(&dev->carrier_down_count);
516 		linkwatch_fire_event(dev);
517 	}
518 }
519 EXPORT_SYMBOL(netif_carrier_off);
520 
521 /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
522    under all circumstances. It is difficult to invent anything faster or
523    cheaper.
524  */
525 
526 static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
527 			struct sk_buff **to_free)
528 {
529 	__qdisc_drop(skb, to_free);
530 	return NET_XMIT_CN;
531 }
532 
533 static struct sk_buff *noop_dequeue(struct Qdisc *qdisc)
534 {
535 	return NULL;
536 }
537 
538 struct Qdisc_ops noop_qdisc_ops __read_mostly = {
539 	.id		=	"noop",
540 	.priv_size	=	0,
541 	.enqueue	=	noop_enqueue,
542 	.dequeue	=	noop_dequeue,
543 	.peek		=	noop_dequeue,
544 	.owner		=	THIS_MODULE,
545 };
546 
547 static struct netdev_queue noop_netdev_queue = {
548 	RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc),
549 	.qdisc_sleeping	=	&noop_qdisc,
550 };
551 
552 struct Qdisc noop_qdisc = {
553 	.enqueue	=	noop_enqueue,
554 	.dequeue	=	noop_dequeue,
555 	.flags		=	TCQ_F_BUILTIN,
556 	.ops		=	&noop_qdisc_ops,
557 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
558 	.dev_queue	=	&noop_netdev_queue,
559 	.running	=	SEQCNT_ZERO(noop_qdisc.running),
560 	.busylock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
561 	.gso_skb = {
562 		.next = (struct sk_buff *)&noop_qdisc.gso_skb,
563 		.prev = (struct sk_buff *)&noop_qdisc.gso_skb,
564 		.qlen = 0,
565 		.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.gso_skb.lock),
566 	},
567 	.skb_bad_txq = {
568 		.next = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
569 		.prev = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
570 		.qlen = 0,
571 		.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock),
572 	},
573 };
574 EXPORT_SYMBOL(noop_qdisc);
575 
576 static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt,
577 			struct netlink_ext_ack *extack)
578 {
579 	/* register_qdisc() assigns a default of noop_enqueue if unset,
580 	 * but __dev_queue_xmit() treats noqueue only as such
581 	 * if this is NULL - so clear it here. */
582 	qdisc->enqueue = NULL;
583 	return 0;
584 }
585 
586 struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
587 	.id		=	"noqueue",
588 	.priv_size	=	0,
589 	.init		=	noqueue_init,
590 	.enqueue	=	noop_enqueue,
591 	.dequeue	=	noop_dequeue,
592 	.peek		=	noop_dequeue,
593 	.owner		=	THIS_MODULE,
594 };
595 
596 static const u8 prio2band[TC_PRIO_MAX + 1] = {
597 	1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
598 };
599 
600 /* 3-band FIFO queue: old style, but should be a bit faster than
601    generic prio+fifo combination.
602  */
603 
604 #define PFIFO_FAST_BANDS 3
605 
606 /*
607  * Private data for a pfifo_fast scheduler containing:
608  *	- rings for priority bands
609  */
610 struct pfifo_fast_priv {
611 	struct skb_array q[PFIFO_FAST_BANDS];
612 };
613 
614 static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
615 					  int band)
616 {
617 	return &priv->q[band];
618 }
619 
620 static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
621 			      struct sk_buff **to_free)
622 {
623 	int band = prio2band[skb->priority & TC_PRIO_MAX];
624 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
625 	struct skb_array *q = band2list(priv, band);
626 	unsigned int pkt_len = qdisc_pkt_len(skb);
627 	int err;
628 
629 	err = skb_array_produce(q, skb);
630 
631 	if (unlikely(err))
632 		return qdisc_drop_cpu(skb, qdisc, to_free);
633 
634 	qdisc_update_stats_at_enqueue(qdisc, pkt_len);
635 	return NET_XMIT_SUCCESS;
636 }
637 
638 static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
639 {
640 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
641 	struct sk_buff *skb = NULL;
642 	int band;
643 
644 	for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
645 		struct skb_array *q = band2list(priv, band);
646 
647 		if (__skb_array_empty(q))
648 			continue;
649 
650 		skb = __skb_array_consume(q);
651 	}
652 	if (likely(skb)) {
653 		qdisc_update_stats_at_dequeue(qdisc, skb);
654 	} else {
655 		qdisc->empty = true;
656 	}
657 
658 	return skb;
659 }
660 
661 static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
662 {
663 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
664 	struct sk_buff *skb = NULL;
665 	int band;
666 
667 	for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
668 		struct skb_array *q = band2list(priv, band);
669 
670 		skb = __skb_array_peek(q);
671 	}
672 
673 	return skb;
674 }
675 
676 static void pfifo_fast_reset(struct Qdisc *qdisc)
677 {
678 	int i, band;
679 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
680 
681 	for (band = 0; band < PFIFO_FAST_BANDS; band++) {
682 		struct skb_array *q = band2list(priv, band);
683 		struct sk_buff *skb;
684 
685 		/* NULL ring is possible if destroy path is due to a failed
686 		 * skb_array_init() in pfifo_fast_init() case.
687 		 */
688 		if (!q->ring.queue)
689 			continue;
690 
691 		while ((skb = __skb_array_consume(q)) != NULL)
692 			kfree_skb(skb);
693 	}
694 
695 	for_each_possible_cpu(i) {
696 		struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);
697 
698 		q->backlog = 0;
699 		q->qlen = 0;
700 	}
701 }
702 
703 static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
704 {
705 	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
706 
707 	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
708 	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
709 		goto nla_put_failure;
710 	return skb->len;
711 
712 nla_put_failure:
713 	return -1;
714 }
715 
716 static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt,
717 			   struct netlink_ext_ack *extack)
718 {
719 	unsigned int qlen = qdisc_dev(qdisc)->tx_queue_len;
720 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
721 	int prio;
722 
723 	/* guard against zero length rings */
724 	if (!qlen)
725 		return -EINVAL;
726 
727 	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
728 		struct skb_array *q = band2list(priv, prio);
729 		int err;
730 
731 		err = skb_array_init(q, qlen, GFP_KERNEL);
732 		if (err)
733 			return -ENOMEM;
734 	}
735 
736 	/* Can by-pass the queue discipline */
737 	qdisc->flags |= TCQ_F_CAN_BYPASS;
738 	return 0;
739 }
740 
741 static void pfifo_fast_destroy(struct Qdisc *sch)
742 {
743 	struct pfifo_fast_priv *priv = qdisc_priv(sch);
744 	int prio;
745 
746 	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
747 		struct skb_array *q = band2list(priv, prio);
748 
749 		/* NULL ring is possible if destroy path is due to a failed
750 		 * skb_array_init() in pfifo_fast_init() case.
751 		 */
752 		if (!q->ring.queue)
753 			continue;
754 		/* Destroy ring but no need to kfree_skb because a call to
755 		 * pfifo_fast_reset() has already done that work.
756 		 */
757 		ptr_ring_cleanup(&q->ring, NULL);
758 	}
759 }
760 
761 static int pfifo_fast_change_tx_queue_len(struct Qdisc *sch,
762 					  unsigned int new_len)
763 {
764 	struct pfifo_fast_priv *priv = qdisc_priv(sch);
765 	struct skb_array *bands[PFIFO_FAST_BANDS];
766 	int prio;
767 
768 	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
769 		struct skb_array *q = band2list(priv, prio);
770 
771 		bands[prio] = q;
772 	}
773 
774 	return skb_array_resize_multiple(bands, PFIFO_FAST_BANDS, new_len,
775 					 GFP_KERNEL);
776 }
777 
778 struct Qdisc_ops pfifo_fast_ops __read_mostly = {
779 	.id		=	"pfifo_fast",
780 	.priv_size	=	sizeof(struct pfifo_fast_priv),
781 	.enqueue	=	pfifo_fast_enqueue,
782 	.dequeue	=	pfifo_fast_dequeue,
783 	.peek		=	pfifo_fast_peek,
784 	.init		=	pfifo_fast_init,
785 	.destroy	=	pfifo_fast_destroy,
786 	.reset		=	pfifo_fast_reset,
787 	.dump		=	pfifo_fast_dump,
788 	.change_tx_queue_len =  pfifo_fast_change_tx_queue_len,
789 	.owner		=	THIS_MODULE,
790 	.static_flags	=	TCQ_F_NOLOCK | TCQ_F_CPUSTATS,
791 };
792 EXPORT_SYMBOL(pfifo_fast_ops);
793 
794 static struct lock_class_key qdisc_tx_busylock;
795 static struct lock_class_key qdisc_running_key;
796 
797 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
798 			  const struct Qdisc_ops *ops,
799 			  struct netlink_ext_ack *extack)
800 {
801 	void *p;
802 	struct Qdisc *sch;
803 	unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
804 	int err = -ENOBUFS;
805 	struct net_device *dev;
806 
807 	if (!dev_queue) {
808 		NL_SET_ERR_MSG(extack, "No device queue given");
809 		err = -EINVAL;
810 		goto errout;
811 	}
812 
813 	dev = dev_queue->dev;
814 	p = kzalloc_node(size, GFP_KERNEL,
815 			 netdev_queue_numa_node_read(dev_queue));
816 
817 	if (!p)
818 		goto errout;
819 	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
820 	/* if we got non aligned memory, ask more and do alignment ourself */
821 	if (sch != p) {
822 		kfree(p);
823 		p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL,
824 				 netdev_queue_numa_node_read(dev_queue));
825 		if (!p)
826 			goto errout;
827 		sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
828 		sch->padded = (char *) sch - (char *) p;
829 	}
830 	__skb_queue_head_init(&sch->gso_skb);
831 	__skb_queue_head_init(&sch->skb_bad_txq);
832 	qdisc_skb_head_init(&sch->q);
833 	spin_lock_init(&sch->q.lock);
834 
835 	if (ops->static_flags & TCQ_F_CPUSTATS) {
836 		sch->cpu_bstats =
837 			netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
838 		if (!sch->cpu_bstats)
839 			goto errout1;
840 
841 		sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
842 		if (!sch->cpu_qstats) {
843 			free_percpu(sch->cpu_bstats);
844 			goto errout1;
845 		}
846 	}
847 
848 	spin_lock_init(&sch->busylock);
849 	lockdep_set_class(&sch->busylock,
850 			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
851 
852 	/* seqlock has the same scope of busylock, for NOLOCK qdisc */
853 	spin_lock_init(&sch->seqlock);
854 	lockdep_set_class(&sch->busylock,
855 			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
856 
857 	seqcount_init(&sch->running);
858 	lockdep_set_class(&sch->running,
859 			  dev->qdisc_running_key ?: &qdisc_running_key);
860 
861 	sch->ops = ops;
862 	sch->flags = ops->static_flags;
863 	sch->enqueue = ops->enqueue;
864 	sch->dequeue = ops->dequeue;
865 	sch->dev_queue = dev_queue;
866 	sch->empty = true;
867 	dev_hold(dev);
868 	refcount_set(&sch->refcnt, 1);
869 
870 	return sch;
871 errout1:
872 	kfree(p);
873 errout:
874 	return ERR_PTR(err);
875 }
876 
877 struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
878 				const struct Qdisc_ops *ops,
879 				unsigned int parentid,
880 				struct netlink_ext_ack *extack)
881 {
882 	struct Qdisc *sch;
883 
884 	if (!try_module_get(ops->owner)) {
885 		NL_SET_ERR_MSG(extack, "Failed to increase module reference counter");
886 		return NULL;
887 	}
888 
889 	sch = qdisc_alloc(dev_queue, ops, extack);
890 	if (IS_ERR(sch)) {
891 		module_put(ops->owner);
892 		return NULL;
893 	}
894 	sch->parent = parentid;
895 
896 	if (!ops->init || ops->init(sch, NULL, extack) == 0)
897 		return sch;
898 
899 	qdisc_put(sch);
900 	return NULL;
901 }
902 EXPORT_SYMBOL(qdisc_create_dflt);
903 
904 /* Under qdisc_lock(qdisc) and BH! */
905 
906 void qdisc_reset(struct Qdisc *qdisc)
907 {
908 	const struct Qdisc_ops *ops = qdisc->ops;
909 	struct sk_buff *skb, *tmp;
910 
911 	if (ops->reset)
912 		ops->reset(qdisc);
913 
914 	skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
915 		__skb_unlink(skb, &qdisc->gso_skb);
916 		kfree_skb_list(skb);
917 	}
918 
919 	skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
920 		__skb_unlink(skb, &qdisc->skb_bad_txq);
921 		kfree_skb_list(skb);
922 	}
923 
924 	qdisc->q.qlen = 0;
925 	qdisc->qstats.backlog = 0;
926 }
927 EXPORT_SYMBOL(qdisc_reset);
928 
929 void qdisc_free(struct Qdisc *qdisc)
930 {
931 	if (qdisc_is_percpu_stats(qdisc)) {
932 		free_percpu(qdisc->cpu_bstats);
933 		free_percpu(qdisc->cpu_qstats);
934 	}
935 
936 	kfree((char *) qdisc - qdisc->padded);
937 }
938 
939 static void qdisc_free_cb(struct rcu_head *head)
940 {
941 	struct Qdisc *q = container_of(head, struct Qdisc, rcu);
942 
943 	qdisc_free(q);
944 }
945 
946 static void qdisc_destroy(struct Qdisc *qdisc)
947 {
948 	const struct Qdisc_ops  *ops = qdisc->ops;
949 	struct sk_buff *skb, *tmp;
950 
951 #ifdef CONFIG_NET_SCHED
952 	qdisc_hash_del(qdisc);
953 
954 	qdisc_put_stab(rtnl_dereference(qdisc->stab));
955 #endif
956 	gen_kill_estimator(&qdisc->rate_est);
957 	if (ops->reset)
958 		ops->reset(qdisc);
959 	if (ops->destroy)
960 		ops->destroy(qdisc);
961 
962 	module_put(ops->owner);
963 	dev_put(qdisc_dev(qdisc));
964 
965 	skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
966 		__skb_unlink(skb, &qdisc->gso_skb);
967 		kfree_skb_list(skb);
968 	}
969 
970 	skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
971 		__skb_unlink(skb, &qdisc->skb_bad_txq);
972 		kfree_skb_list(skb);
973 	}
974 
975 	call_rcu(&qdisc->rcu, qdisc_free_cb);
976 }
977 
978 void qdisc_put(struct Qdisc *qdisc)
979 {
980 	if (qdisc->flags & TCQ_F_BUILTIN ||
981 	    !refcount_dec_and_test(&qdisc->refcnt))
982 		return;
983 
984 	qdisc_destroy(qdisc);
985 }
986 EXPORT_SYMBOL(qdisc_put);
987 
988 /* Version of qdisc_put() that is called with rtnl mutex unlocked.
989  * Intended to be used as optimization, this function only takes rtnl lock if
990  * qdisc reference counter reached zero.
991  */
992 
993 void qdisc_put_unlocked(struct Qdisc *qdisc)
994 {
995 	if (qdisc->flags & TCQ_F_BUILTIN ||
996 	    !refcount_dec_and_rtnl_lock(&qdisc->refcnt))
997 		return;
998 
999 	qdisc_destroy(qdisc);
1000 	rtnl_unlock();
1001 }
1002 EXPORT_SYMBOL(qdisc_put_unlocked);
1003 
1004 /* Attach toplevel qdisc to device queue. */
1005 struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
1006 			      struct Qdisc *qdisc)
1007 {
1008 	struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
1009 	spinlock_t *root_lock;
1010 
1011 	root_lock = qdisc_lock(oqdisc);
1012 	spin_lock_bh(root_lock);
1013 
1014 	/* ... and graft new one */
1015 	if (qdisc == NULL)
1016 		qdisc = &noop_qdisc;
1017 	dev_queue->qdisc_sleeping = qdisc;
1018 	rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
1019 
1020 	spin_unlock_bh(root_lock);
1021 
1022 	return oqdisc;
1023 }
1024 EXPORT_SYMBOL(dev_graft_qdisc);
1025 
1026 static void attach_one_default_qdisc(struct net_device *dev,
1027 				     struct netdev_queue *dev_queue,
1028 				     void *_unused)
1029 {
1030 	struct Qdisc *qdisc;
1031 	const struct Qdisc_ops *ops = default_qdisc_ops;
1032 
1033 	if (dev->priv_flags & IFF_NO_QUEUE)
1034 		ops = &noqueue_qdisc_ops;
1035 
1036 	qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
1037 	if (!qdisc) {
1038 		netdev_info(dev, "activation failed\n");
1039 		return;
1040 	}
1041 	if (!netif_is_multiqueue(dev))
1042 		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1043 	dev_queue->qdisc_sleeping = qdisc;
1044 }
1045 
1046 static void attach_default_qdiscs(struct net_device *dev)
1047 {
1048 	struct netdev_queue *txq;
1049 	struct Qdisc *qdisc;
1050 
1051 	txq = netdev_get_tx_queue(dev, 0);
1052 
1053 	if (!netif_is_multiqueue(dev) ||
1054 	    dev->priv_flags & IFF_NO_QUEUE) {
1055 		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
1056 		dev->qdisc = txq->qdisc_sleeping;
1057 		qdisc_refcount_inc(dev->qdisc);
1058 	} else {
1059 		qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL);
1060 		if (qdisc) {
1061 			dev->qdisc = qdisc;
1062 			qdisc->ops->attach(qdisc);
1063 		}
1064 	}
1065 #ifdef CONFIG_NET_SCHED
1066 	if (dev->qdisc != &noop_qdisc)
1067 		qdisc_hash_add(dev->qdisc, false);
1068 #endif
1069 }
1070 
1071 static void transition_one_qdisc(struct net_device *dev,
1072 				 struct netdev_queue *dev_queue,
1073 				 void *_need_watchdog)
1074 {
1075 	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
1076 	int *need_watchdog_p = _need_watchdog;
1077 
1078 	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
1079 		clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
1080 
1081 	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
1082 	if (need_watchdog_p) {
1083 		dev_queue->trans_start = 0;
1084 		*need_watchdog_p = 1;
1085 	}
1086 }
1087 
1088 void dev_activate(struct net_device *dev)
1089 {
1090 	int need_watchdog;
1091 
1092 	/* No queueing discipline is attached to device;
1093 	 * create default one for devices, which need queueing
1094 	 * and noqueue_qdisc for virtual interfaces
1095 	 */
1096 
1097 	if (dev->qdisc == &noop_qdisc)
1098 		attach_default_qdiscs(dev);
1099 
1100 	if (!netif_carrier_ok(dev))
1101 		/* Delay activation until next carrier-on event */
1102 		return;
1103 
1104 	need_watchdog = 0;
1105 	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
1106 	if (dev_ingress_queue(dev))
1107 		transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
1108 
1109 	if (need_watchdog) {
1110 		netif_trans_update(dev);
1111 		dev_watchdog_up(dev);
1112 	}
1113 }
1114 EXPORT_SYMBOL(dev_activate);
1115 
1116 static void dev_deactivate_queue(struct net_device *dev,
1117 				 struct netdev_queue *dev_queue,
1118 				 void *_qdisc_default)
1119 {
1120 	struct Qdisc *qdisc_default = _qdisc_default;
1121 	struct Qdisc *qdisc;
1122 
1123 	qdisc = rtnl_dereference(dev_queue->qdisc);
1124 	if (qdisc) {
1125 		bool nolock = qdisc->flags & TCQ_F_NOLOCK;
1126 
1127 		if (nolock)
1128 			spin_lock_bh(&qdisc->seqlock);
1129 		spin_lock_bh(qdisc_lock(qdisc));
1130 
1131 		if (!(qdisc->flags & TCQ_F_BUILTIN))
1132 			set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
1133 
1134 		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
1135 		qdisc_reset(qdisc);
1136 
1137 		spin_unlock_bh(qdisc_lock(qdisc));
1138 		if (nolock)
1139 			spin_unlock_bh(&qdisc->seqlock);
1140 	}
1141 }
1142 
1143 static bool some_qdisc_is_busy(struct net_device *dev)
1144 {
1145 	unsigned int i;
1146 
1147 	for (i = 0; i < dev->num_tx_queues; i++) {
1148 		struct netdev_queue *dev_queue;
1149 		spinlock_t *root_lock;
1150 		struct Qdisc *q;
1151 		int val;
1152 
1153 		dev_queue = netdev_get_tx_queue(dev, i);
1154 		q = dev_queue->qdisc_sleeping;
1155 
1156 		root_lock = qdisc_lock(q);
1157 		spin_lock_bh(root_lock);
1158 
1159 		val = (qdisc_is_running(q) ||
1160 		       test_bit(__QDISC_STATE_SCHED, &q->state));
1161 
1162 		spin_unlock_bh(root_lock);
1163 
1164 		if (val)
1165 			return true;
1166 	}
1167 	return false;
1168 }
1169 
1170 static void dev_qdisc_reset(struct net_device *dev,
1171 			    struct netdev_queue *dev_queue,
1172 			    void *none)
1173 {
1174 	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1175 
1176 	if (qdisc)
1177 		qdisc_reset(qdisc);
1178 }
1179 
1180 /**
1181  * 	dev_deactivate_many - deactivate transmissions on several devices
1182  * 	@head: list of devices to deactivate
1183  *
1184  *	This function returns only when all outstanding transmissions
1185  *	have completed, unless all devices are in dismantle phase.
1186  */
1187 void dev_deactivate_many(struct list_head *head)
1188 {
1189 	struct net_device *dev;
1190 
1191 	list_for_each_entry(dev, head, close_list) {
1192 		netdev_for_each_tx_queue(dev, dev_deactivate_queue,
1193 					 &noop_qdisc);
1194 		if (dev_ingress_queue(dev))
1195 			dev_deactivate_queue(dev, dev_ingress_queue(dev),
1196 					     &noop_qdisc);
1197 
1198 		dev_watchdog_down(dev);
1199 	}
1200 
1201 	/* Wait for outstanding qdisc-less dev_queue_xmit calls.
1202 	 * This is avoided if all devices are in dismantle phase :
1203 	 * Caller will call synchronize_net() for us
1204 	 */
1205 	synchronize_net();
1206 
1207 	/* Wait for outstanding qdisc_run calls. */
1208 	list_for_each_entry(dev, head, close_list) {
1209 		while (some_qdisc_is_busy(dev))
1210 			yield();
1211 		/* The new qdisc is assigned at this point so we can safely
1212 		 * unwind stale skb lists and qdisc statistics
1213 		 */
1214 		netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
1215 		if (dev_ingress_queue(dev))
1216 			dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
1217 	}
1218 }
1219 
1220 void dev_deactivate(struct net_device *dev)
1221 {
1222 	LIST_HEAD(single);
1223 
1224 	list_add(&dev->close_list, &single);
1225 	dev_deactivate_many(&single);
1226 	list_del(&single);
1227 }
1228 EXPORT_SYMBOL(dev_deactivate);
1229 
1230 static int qdisc_change_tx_queue_len(struct net_device *dev,
1231 				     struct netdev_queue *dev_queue)
1232 {
1233 	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1234 	const struct Qdisc_ops *ops = qdisc->ops;
1235 
1236 	if (ops->change_tx_queue_len)
1237 		return ops->change_tx_queue_len(qdisc, dev->tx_queue_len);
1238 	return 0;
1239 }
1240 
1241 int dev_qdisc_change_tx_queue_len(struct net_device *dev)
1242 {
1243 	bool up = dev->flags & IFF_UP;
1244 	unsigned int i;
1245 	int ret = 0;
1246 
1247 	if (up)
1248 		dev_deactivate(dev);
1249 
1250 	for (i = 0; i < dev->num_tx_queues; i++) {
1251 		ret = qdisc_change_tx_queue_len(dev, &dev->_tx[i]);
1252 
1253 		/* TODO: revert changes on a partial failure */
1254 		if (ret)
1255 			break;
1256 	}
1257 
1258 	if (up)
1259 		dev_activate(dev);
1260 	return ret;
1261 }
1262 
1263 static void dev_init_scheduler_queue(struct net_device *dev,
1264 				     struct netdev_queue *dev_queue,
1265 				     void *_qdisc)
1266 {
1267 	struct Qdisc *qdisc = _qdisc;
1268 
1269 	rcu_assign_pointer(dev_queue->qdisc, qdisc);
1270 	dev_queue->qdisc_sleeping = qdisc;
1271 }
1272 
1273 void dev_init_scheduler(struct net_device *dev)
1274 {
1275 	dev->qdisc = &noop_qdisc;
1276 	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
1277 	if (dev_ingress_queue(dev))
1278 		dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
1279 
1280 	timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
1281 }
1282 
1283 static void shutdown_scheduler_queue(struct net_device *dev,
1284 				     struct netdev_queue *dev_queue,
1285 				     void *_qdisc_default)
1286 {
1287 	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1288 	struct Qdisc *qdisc_default = _qdisc_default;
1289 
1290 	if (qdisc) {
1291 		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
1292 		dev_queue->qdisc_sleeping = qdisc_default;
1293 
1294 		qdisc_put(qdisc);
1295 	}
1296 }
1297 
1298 void dev_shutdown(struct net_device *dev)
1299 {
1300 	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
1301 	if (dev_ingress_queue(dev))
1302 		shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
1303 	qdisc_put(dev->qdisc);
1304 	dev->qdisc = &noop_qdisc;
1305 
1306 	WARN_ON(timer_pending(&dev->watchdog_timer));
1307 }
1308 
1309 void psched_ratecfg_precompute(struct psched_ratecfg *r,
1310 			       const struct tc_ratespec *conf,
1311 			       u64 rate64)
1312 {
1313 	memset(r, 0, sizeof(*r));
1314 	r->overhead = conf->overhead;
1315 	r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
1316 	r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
1317 	r->mult = 1;
1318 	/*
1319 	 * The deal here is to replace a divide by a reciprocal one
1320 	 * in fast path (a reciprocal divide is a multiply and a shift)
1321 	 *
1322 	 * Normal formula would be :
1323 	 *  time_in_ns = (NSEC_PER_SEC * len) / rate_bps
1324 	 *
1325 	 * We compute mult/shift to use instead :
1326 	 *  time_in_ns = (len * mult) >> shift;
1327 	 *
1328 	 * We try to get the highest possible mult value for accuracy,
1329 	 * but have to make sure no overflows will ever happen.
1330 	 */
1331 	if (r->rate_bytes_ps > 0) {
1332 		u64 factor = NSEC_PER_SEC;
1333 
1334 		for (;;) {
1335 			r->mult = div64_u64(factor, r->rate_bytes_ps);
1336 			if (r->mult & (1U << 31) || factor & (1ULL << 63))
1337 				break;
1338 			factor <<= 1;
1339 			r->shift++;
1340 		}
1341 	}
1342 }
1343 EXPORT_SYMBOL(psched_ratecfg_precompute);
1344 
1345 static void mini_qdisc_rcu_func(struct rcu_head *head)
1346 {
1347 }
1348 
1349 void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
1350 			  struct tcf_proto *tp_head)
1351 {
1352 	/* Protected with chain0->filter_chain_lock.
1353 	 * Can't access chain directly because tp_head can be NULL.
1354 	 */
1355 	struct mini_Qdisc *miniq_old =
1356 		rcu_dereference_protected(*miniqp->p_miniq, 1);
1357 	struct mini_Qdisc *miniq;
1358 
1359 	if (!tp_head) {
1360 		RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
1361 		/* Wait for flying RCU callback before it is freed. */
1362 		rcu_barrier();
1363 		return;
1364 	}
1365 
1366 	miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
1367 		&miniqp->miniq1 : &miniqp->miniq2;
1368 
1369 	/* We need to make sure that readers won't see the miniq
1370 	 * we are about to modify. So wait until previous call_rcu callback
1371 	 * is done.
1372 	 */
1373 	rcu_barrier();
1374 	miniq->filter_list = tp_head;
1375 	rcu_assign_pointer(*miniqp->p_miniq, miniq);
1376 
1377 	if (miniq_old)
1378 		/* This is counterpart of the rcu barriers above. We need to
1379 		 * block potential new user of miniq_old until all readers
1380 		 * are not seeing it.
1381 		 */
1382 		call_rcu(&miniq_old->rcu, mini_qdisc_rcu_func);
1383 }
1384 EXPORT_SYMBOL(mini_qdisc_pair_swap);
1385 
1386 void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
1387 			  struct mini_Qdisc __rcu **p_miniq)
1388 {
1389 	miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats;
1390 	miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
1391 	miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
1392 	miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
1393 	miniqp->p_miniq = p_miniq;
1394 }
1395 EXPORT_SYMBOL(mini_qdisc_pair_init);
1396