xref: /openbmc/linux/net/sched/sch_sfq.c (revision 060f35a317ef09101b128f399dce7ed13d019461)
12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * net/sched/sch_sfq.c	Stochastic Fairness Queueing discipline.
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
61da177e4SLinus Torvalds  */
71da177e4SLinus Torvalds 
81da177e4SLinus Torvalds #include <linux/module.h>
91da177e4SLinus Torvalds #include <linux/types.h>
101da177e4SLinus Torvalds #include <linux/kernel.h>
111da177e4SLinus Torvalds #include <linux/jiffies.h>
121da177e4SLinus Torvalds #include <linux/string.h>
131da177e4SLinus Torvalds #include <linux/in.h>
141da177e4SLinus Torvalds #include <linux/errno.h>
151da177e4SLinus Torvalds #include <linux/init.h>
160ba48053SPatrick McHardy #include <linux/skbuff.h>
1755667441SEric Dumazet #include <linux/siphash.h>
185a0e3ad6STejun Heo #include <linux/slab.h>
19817fb15dSEric Dumazet #include <linux/vmalloc.h>
20dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
211da177e4SLinus Torvalds #include <net/pkt_sched.h>
22cf1facdaSJiri Pirko #include <net/pkt_cls.h>
23ddecf0f4SEric Dumazet #include <net/red.h>
241da177e4SLinus Torvalds 
251da177e4SLinus Torvalds 
261da177e4SLinus Torvalds /*	Stochastic Fairness Queuing algorithm.
271da177e4SLinus Torvalds 	=======================================
281da177e4SLinus Torvalds 
291da177e4SLinus Torvalds 	Source:
301da177e4SLinus Torvalds 	Paul E. McKenney "Stochastic Fairness Queuing",
311da177e4SLinus Torvalds 	IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
321da177e4SLinus Torvalds 
331da177e4SLinus Torvalds 	Paul E. McKenney "Stochastic Fairness Queuing",
341da177e4SLinus Torvalds 	"Interworking: Research and Experience", v.2, 1991, p.113-131.
351da177e4SLinus Torvalds 
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds 	See also:
381da177e4SLinus Torvalds 	M. Shreedhar and George Varghese "Efficient Fair
391da177e4SLinus Torvalds 	Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
401da177e4SLinus Torvalds 
411da177e4SLinus Torvalds 
421da177e4SLinus Torvalds 	This is not the thing that is usually called (W)FQ nowadays.
431da177e4SLinus Torvalds 	It does not use any timestamp mechanism, but instead
441da177e4SLinus Torvalds 	processes queues in round-robin order.
451da177e4SLinus Torvalds 
461da177e4SLinus Torvalds 	ADVANTAGE:
471da177e4SLinus Torvalds 
481da177e4SLinus Torvalds 	- It is very cheap. Both CPU and memory requirements are minimal.
491da177e4SLinus Torvalds 
501da177e4SLinus Torvalds 	DRAWBACKS:
511da177e4SLinus Torvalds 
521da177e4SLinus Torvalds 	- "Stochastic" -> It is not 100% fair.
531da177e4SLinus Torvalds 	When hash collisions occur, several flows are considered as one.
541da177e4SLinus Torvalds 
551da177e4SLinus Torvalds 	- "Round-robin" -> It introduces larger delays than virtual clock
561da177e4SLinus Torvalds 	based schemes, and should not be used for isolating interactive
571da177e4SLinus Torvalds 	traffic	from non-interactive. It means, that this scheduler
581da177e4SLinus Torvalds 	should be used as leaf of CBQ or P3, which put interactive traffic
591da177e4SLinus Torvalds 	to higher priority band.
601da177e4SLinus Torvalds 
611da177e4SLinus Torvalds 	We still need true WFQ for top level CSZ, but using WFQ
621da177e4SLinus Torvalds 	for the best effort traffic is absolutely pointless:
631da177e4SLinus Torvalds 	SFQ is superior for this purpose.
641da177e4SLinus Torvalds 
651da177e4SLinus Torvalds 	IMPLEMENTATION:
6618cb8098SEric Dumazet 	This implementation limits :
6718cb8098SEric Dumazet 	- maximal queue length per flow to 127 packets.
6818cb8098SEric Dumazet 	- max mtu to 2^18-1;
6918cb8098SEric Dumazet 	- max 65408 flows,
7018cb8098SEric Dumazet 	- number of hash buckets to 65536.
711da177e4SLinus Torvalds 
721da177e4SLinus Torvalds 	It is easy to increase these values, but not in flight.  */
731da177e4SLinus Torvalds 
7418cb8098SEric Dumazet #define SFQ_MAX_DEPTH		127 /* max number of packets per flow */
7518cb8098SEric Dumazet #define SFQ_DEFAULT_FLOWS	128
7618cb8098SEric Dumazet #define SFQ_MAX_FLOWS		(0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */
7718cb8098SEric Dumazet #define SFQ_EMPTY_SLOT		0xffff
78817fb15dSEric Dumazet #define SFQ_DEFAULT_HASH_DIVISOR 1024
79817fb15dSEric Dumazet 
8018cb8098SEric Dumazet /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
8118cb8098SEric Dumazet typedef u16 sfq_index;
821da177e4SLinus Torvalds 
83eda83e3bSEric Dumazet /*
84eda83e3bSEric Dumazet  * We dont use pointers to save space.
8518cb8098SEric Dumazet  * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array
8618cb8098SEric Dumazet  * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH]
87eda83e3bSEric Dumazet  * are 'pointers' to dep[] array
88eda83e3bSEric Dumazet  */
89cc7ec456SEric Dumazet struct sfq_head {
901da177e4SLinus Torvalds 	sfq_index	next;
911da177e4SLinus Torvalds 	sfq_index	prev;
921da177e4SLinus Torvalds };
931da177e4SLinus Torvalds 
94eda83e3bSEric Dumazet struct sfq_slot {
95eda83e3bSEric Dumazet 	struct sk_buff	*skblist_next;
96eda83e3bSEric Dumazet 	struct sk_buff	*skblist_prev;
97eda83e3bSEric Dumazet 	sfq_index	qlen; /* number of skbs in skblist */
9818cb8098SEric Dumazet 	sfq_index	next; /* next slot in sfq RR chain */
99eda83e3bSEric Dumazet 	struct sfq_head dep; /* anchor in dep[] chains */
100eda83e3bSEric Dumazet 	unsigned short	hash; /* hash value (index in ht[]) */
10158ae7465SEric Dumazet 	int		allot; /* credit for this slot */
102ddecf0f4SEric Dumazet 
103ddecf0f4SEric Dumazet 	unsigned int    backlog;
104ddecf0f4SEric Dumazet 	struct red_vars vars;
105eda83e3bSEric Dumazet };
106eda83e3bSEric Dumazet 
107cc7ec456SEric Dumazet struct sfq_sched_data {
10818cb8098SEric Dumazet /* frequently used fields */
10918cb8098SEric Dumazet 	int		limit;		/* limit of total number of packets in this qdisc */
110817fb15dSEric Dumazet 	unsigned int	divisor;	/* number of slots in hash table */
111ddecf0f4SEric Dumazet 	u8		headdrop;
112ddecf0f4SEric Dumazet 	u8		maxdepth;	/* limit of packets per flow */
11318cb8098SEric Dumazet 
11455667441SEric Dumazet 	siphash_key_t 	perturbation;
115ddecf0f4SEric Dumazet 	u8		cur_depth;	/* depth of longest slot */
116ddecf0f4SEric Dumazet 	u8		flags;
11725d8c0d5SJohn Fastabend 	struct tcf_proto __rcu *filter_list;
1186529eabaSJiri Pirko 	struct tcf_block *block;
11918cb8098SEric Dumazet 	sfq_index	*ht;		/* Hash table ('divisor' slots) */
12018cb8098SEric Dumazet 	struct sfq_slot	*slots;		/* Flows table ('maxflows' entries) */
12118cb8098SEric Dumazet 
122ddecf0f4SEric Dumazet 	struct red_parms *red_parms;
123ddecf0f4SEric Dumazet 	struct tc_sfqred_stats stats;
124ddecf0f4SEric Dumazet 	struct sfq_slot *tail;		/* current slot in round */
125ddecf0f4SEric Dumazet 
12618cb8098SEric Dumazet 	struct sfq_head	dep[SFQ_MAX_DEPTH + 1];
12718cb8098SEric Dumazet 					/* Linked lists of slots, indexed by depth
12818cb8098SEric Dumazet 					 * dep[0] : list of unused flows
12918cb8098SEric Dumazet 					 * dep[1] : list of flows with 1 packet
13018cb8098SEric Dumazet 					 * dep[X] : list of flows with X packets
13118cb8098SEric Dumazet 					 */
13218cb8098SEric Dumazet 
133ddecf0f4SEric Dumazet 	unsigned int	maxflows;	/* number of flows in flows array */
13418cb8098SEric Dumazet 	int		perturb_period;
13518cb8098SEric Dumazet 	unsigned int	quantum;	/* Allotment per round: MUST BE >= MTU */
13618cb8098SEric Dumazet 	struct timer_list perturb_timer;
137cdeabbb8SKees Cook 	struct Qdisc	*sch;
1381da177e4SLinus Torvalds };
1391da177e4SLinus Torvalds 
140eda83e3bSEric Dumazet /*
141eda83e3bSEric Dumazet  * sfq_head are either in a sfq_slot or in dep[] array
142eda83e3bSEric Dumazet  */
sfq_dep_head(struct sfq_sched_data * q,sfq_index val)143eda83e3bSEric Dumazet static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
144eda83e3bSEric Dumazet {
14518cb8098SEric Dumazet 	if (val < SFQ_MAX_FLOWS)
146eda83e3bSEric Dumazet 		return &q->slots[val].dep;
14718cb8098SEric Dumazet 	return &q->dep[val - SFQ_MAX_FLOWS];
148eda83e3bSEric Dumazet }
149eda83e3bSEric Dumazet 
sfq_hash(const struct sfq_sched_data * q,const struct sk_buff * skb)15011fca931SEric Dumazet static unsigned int sfq_hash(const struct sfq_sched_data *q,
15111fca931SEric Dumazet 			     const struct sk_buff *skb)
1521da177e4SLinus Torvalds {
15355667441SEric Dumazet 	return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1);
1541da177e4SLinus Torvalds }
1551da177e4SLinus Torvalds 
sfq_classify(struct sk_buff * skb,struct Qdisc * sch,int * qerr)1567d2681a6SPatrick McHardy static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
1577d2681a6SPatrick McHardy 				 int *qerr)
1587d2681a6SPatrick McHardy {
1597d2681a6SPatrick McHardy 	struct sfq_sched_data *q = qdisc_priv(sch);
1607d2681a6SPatrick McHardy 	struct tcf_result res;
16125d8c0d5SJohn Fastabend 	struct tcf_proto *fl;
1627d2681a6SPatrick McHardy 	int result;
1637d2681a6SPatrick McHardy 
1647d2681a6SPatrick McHardy 	if (TC_H_MAJ(skb->priority) == sch->handle &&
1657d2681a6SPatrick McHardy 	    TC_H_MIN(skb->priority) > 0 &&
166817fb15dSEric Dumazet 	    TC_H_MIN(skb->priority) <= q->divisor)
1677d2681a6SPatrick McHardy 		return TC_H_MIN(skb->priority);
1687d2681a6SPatrick McHardy 
16925d8c0d5SJohn Fastabend 	fl = rcu_dereference_bh(q->filter_list);
170ada1dba0STom Herbert 	if (!fl)
1717d2681a6SPatrick McHardy 		return sfq_hash(q, skb) + 1;
1727d2681a6SPatrick McHardy 
173c27f339aSJarek Poplawski 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1743aa26055SDavide Caratti 	result = tcf_classify(skb, NULL, fl, &res, false);
1757d2681a6SPatrick McHardy 	if (result >= 0) {
1767d2681a6SPatrick McHardy #ifdef CONFIG_NET_CLS_ACT
1777d2681a6SPatrick McHardy 		switch (result) {
1787d2681a6SPatrick McHardy 		case TC_ACT_STOLEN:
1797d2681a6SPatrick McHardy 		case TC_ACT_QUEUED:
180e25ea21fSJiri Pirko 		case TC_ACT_TRAP:
181378a2f09SJarek Poplawski 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
182964201deSGustavo A. R. Silva 			fallthrough;
1837d2681a6SPatrick McHardy 		case TC_ACT_SHOT:
1847d2681a6SPatrick McHardy 			return 0;
1857d2681a6SPatrick McHardy 		}
1867d2681a6SPatrick McHardy #endif
187817fb15dSEric Dumazet 		if (TC_H_MIN(res.classid) <= q->divisor)
1887d2681a6SPatrick McHardy 			return TC_H_MIN(res.classid);
1897d2681a6SPatrick McHardy 	}
1907d2681a6SPatrick McHardy 	return 0;
1917d2681a6SPatrick McHardy }
1927d2681a6SPatrick McHardy 
193eda83e3bSEric Dumazet /*
19418cb8098SEric Dumazet  * x : slot number [0 .. SFQ_MAX_FLOWS - 1]
195eda83e3bSEric Dumazet  */
sfq_link(struct sfq_sched_data * q,sfq_index x)1961da177e4SLinus Torvalds static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
1971da177e4SLinus Torvalds {
1981da177e4SLinus Torvalds 	sfq_index p, n;
19918cb8098SEric Dumazet 	struct sfq_slot *slot = &q->slots[x];
20018cb8098SEric Dumazet 	int qlen = slot->qlen;
2011da177e4SLinus Torvalds 
20218cb8098SEric Dumazet 	p = qlen + SFQ_MAX_FLOWS;
203eda83e3bSEric Dumazet 	n = q->dep[qlen].next;
204eda83e3bSEric Dumazet 
20518cb8098SEric Dumazet 	slot->dep.next = n;
20618cb8098SEric Dumazet 	slot->dep.prev = p;
207eda83e3bSEric Dumazet 
208eda83e3bSEric Dumazet 	q->dep[qlen].next = x;		/* sfq_dep_head(q, p)->next = x */
209eda83e3bSEric Dumazet 	sfq_dep_head(q, n)->prev = x;
2101da177e4SLinus Torvalds }
2111da177e4SLinus Torvalds 
212eda83e3bSEric Dumazet #define sfq_unlink(q, x, n, p)			\
213fa08943bSYang Yingliang 	do {					\
214eda83e3bSEric Dumazet 		n = q->slots[x].dep.next;	\
215eda83e3bSEric Dumazet 		p = q->slots[x].dep.prev;	\
216eda83e3bSEric Dumazet 		sfq_dep_head(q, p)->next = n;	\
217fa08943bSYang Yingliang 		sfq_dep_head(q, n)->prev = p;	\
218fa08943bSYang Yingliang 	} while (0)
219eda83e3bSEric Dumazet 
220eda83e3bSEric Dumazet 
sfq_dec(struct sfq_sched_data * q,sfq_index x)2211da177e4SLinus Torvalds static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
2221da177e4SLinus Torvalds {
2231da177e4SLinus Torvalds 	sfq_index p, n;
224eda83e3bSEric Dumazet 	int d;
2251da177e4SLinus Torvalds 
226eda83e3bSEric Dumazet 	sfq_unlink(q, x, n, p);
2271da177e4SLinus Torvalds 
228eda83e3bSEric Dumazet 	d = q->slots[x].qlen--;
229eda83e3bSEric Dumazet 	if (n == p && q->cur_depth == d)
230eda83e3bSEric Dumazet 		q->cur_depth--;
2311da177e4SLinus Torvalds 	sfq_link(q, x);
2321da177e4SLinus Torvalds }
2331da177e4SLinus Torvalds 
sfq_inc(struct sfq_sched_data * q,sfq_index x)2341da177e4SLinus Torvalds static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
2351da177e4SLinus Torvalds {
2361da177e4SLinus Torvalds 	sfq_index p, n;
2371da177e4SLinus Torvalds 	int d;
2381da177e4SLinus Torvalds 
239eda83e3bSEric Dumazet 	sfq_unlink(q, x, n, p);
2401da177e4SLinus Torvalds 
241eda83e3bSEric Dumazet 	d = ++q->slots[x].qlen;
242eda83e3bSEric Dumazet 	if (q->cur_depth < d)
243eda83e3bSEric Dumazet 		q->cur_depth = d;
2441da177e4SLinus Torvalds 	sfq_link(q, x);
2451da177e4SLinus Torvalds }
2461da177e4SLinus Torvalds 
247eda83e3bSEric Dumazet /* helper functions : might be changed when/if skb use a standard list_head */
248eda83e3bSEric Dumazet 
249eda83e3bSEric Dumazet /* remove one skb from tail of slot queue */
slot_dequeue_tail(struct sfq_slot * slot)250eda83e3bSEric Dumazet static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
251eda83e3bSEric Dumazet {
252eda83e3bSEric Dumazet 	struct sk_buff *skb = slot->skblist_prev;
253eda83e3bSEric Dumazet 
254eda83e3bSEric Dumazet 	slot->skblist_prev = skb->prev;
255ee09b3c1SEric Dumazet 	skb->prev->next = (struct sk_buff *)slot;
256eda83e3bSEric Dumazet 	skb->next = skb->prev = NULL;
257eda83e3bSEric Dumazet 	return skb;
258eda83e3bSEric Dumazet }
259eda83e3bSEric Dumazet 
260eda83e3bSEric Dumazet /* remove one skb from head of slot queue */
slot_dequeue_head(struct sfq_slot * slot)261eda83e3bSEric Dumazet static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
262eda83e3bSEric Dumazet {
263eda83e3bSEric Dumazet 	struct sk_buff *skb = slot->skblist_next;
264eda83e3bSEric Dumazet 
265eda83e3bSEric Dumazet 	slot->skblist_next = skb->next;
26618c8d82aSEric Dumazet 	skb->next->prev = (struct sk_buff *)slot;
267eda83e3bSEric Dumazet 	skb->next = skb->prev = NULL;
268eda83e3bSEric Dumazet 	return skb;
269eda83e3bSEric Dumazet }
270eda83e3bSEric Dumazet 
slot_queue_init(struct sfq_slot * slot)271eda83e3bSEric Dumazet static inline void slot_queue_init(struct sfq_slot *slot)
272eda83e3bSEric Dumazet {
27318cb8098SEric Dumazet 	memset(slot, 0, sizeof(*slot));
274eda83e3bSEric Dumazet 	slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
275eda83e3bSEric Dumazet }
276eda83e3bSEric Dumazet 
277eda83e3bSEric Dumazet /* add skb to slot queue (tail add) */
slot_queue_add(struct sfq_slot * slot,struct sk_buff * skb)278eda83e3bSEric Dumazet static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
279eda83e3bSEric Dumazet {
280eda83e3bSEric Dumazet 	skb->prev = slot->skblist_prev;
281eda83e3bSEric Dumazet 	skb->next = (struct sk_buff *)slot;
282eda83e3bSEric Dumazet 	slot->skblist_prev->next = skb;
283eda83e3bSEric Dumazet 	slot->skblist_prev = skb;
284eda83e3bSEric Dumazet }
285eda83e3bSEric Dumazet 
sfq_drop(struct Qdisc * sch,struct sk_buff ** to_free)286f9ab7425SGao Feng static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free)
2871da177e4SLinus Torvalds {
2881da177e4SLinus Torvalds 	struct sfq_sched_data *q = qdisc_priv(sch);
289eda83e3bSEric Dumazet 	sfq_index x, d = q->cur_depth;
2901da177e4SLinus Torvalds 	struct sk_buff *skb;
2911da177e4SLinus Torvalds 	unsigned int len;
292eda83e3bSEric Dumazet 	struct sfq_slot *slot;
2931da177e4SLinus Torvalds 
294eda83e3bSEric Dumazet 	/* Queue is full! Find the longest slot and drop tail packet from it */
2951da177e4SLinus Torvalds 	if (d > 1) {
296eda83e3bSEric Dumazet 		x = q->dep[d].next;
297eda83e3bSEric Dumazet 		slot = &q->slots[x];
298eda83e3bSEric Dumazet drop:
29918cb8098SEric Dumazet 		skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
3000abf77e5SJussi Kivilinna 		len = qdisc_pkt_len(skb);
301ddecf0f4SEric Dumazet 		slot->backlog -= len;
3021da177e4SLinus Torvalds 		sfq_dec(q, x);
3031da177e4SLinus Torvalds 		sch->q.qlen--;
30425331d6cSJohn Fastabend 		qdisc_qstats_backlog_dec(sch, skb);
305f9ab7425SGao Feng 		qdisc_drop(skb, sch, to_free);
3061da177e4SLinus Torvalds 		return len;
3071da177e4SLinus Torvalds 	}
3081da177e4SLinus Torvalds 
3091da177e4SLinus Torvalds 	if (d == 1) {
3101da177e4SLinus Torvalds 		/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
311eda83e3bSEric Dumazet 		x = q->tail->next;
312eda83e3bSEric Dumazet 		slot = &q->slots[x];
313eda83e3bSEric Dumazet 		q->tail->next = slot->next;
314eda83e3bSEric Dumazet 		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
315eda83e3bSEric Dumazet 		goto drop;
3161da177e4SLinus Torvalds 	}
3171da177e4SLinus Torvalds 
3181da177e4SLinus Torvalds 	return 0;
3191da177e4SLinus Torvalds }
3201da177e4SLinus Torvalds 
321ddecf0f4SEric Dumazet /* Is ECN parameter configured */
sfq_prob_mark(const struct sfq_sched_data * q)322ddecf0f4SEric Dumazet static int sfq_prob_mark(const struct sfq_sched_data *q)
323ddecf0f4SEric Dumazet {
324ddecf0f4SEric Dumazet 	return q->flags & TC_RED_ECN;
325ddecf0f4SEric Dumazet }
326ddecf0f4SEric Dumazet 
327ddecf0f4SEric Dumazet /* Should packets over max threshold just be marked */
sfq_hard_mark(const struct sfq_sched_data * q)328ddecf0f4SEric Dumazet static int sfq_hard_mark(const struct sfq_sched_data *q)
329ddecf0f4SEric Dumazet {
330ddecf0f4SEric Dumazet 	return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN;
331ddecf0f4SEric Dumazet }
332ddecf0f4SEric Dumazet 
sfq_headdrop(const struct sfq_sched_data * q)333ddecf0f4SEric Dumazet static int sfq_headdrop(const struct sfq_sched_data *q)
334ddecf0f4SEric Dumazet {
335ddecf0f4SEric Dumazet 	return q->headdrop;
336ddecf0f4SEric Dumazet }
337ddecf0f4SEric Dumazet 
3381da177e4SLinus Torvalds static int
sfq_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)339520ac30fSEric Dumazet sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
3401da177e4SLinus Torvalds {
3411da177e4SLinus Torvalds 	struct sfq_sched_data *q = qdisc_priv(sch);
3422ccccf5fSWANG Cong 	unsigned int hash, dropped;
3438efa8854SEric Dumazet 	sfq_index x, qlen;
344eda83e3bSEric Dumazet 	struct sfq_slot *slot;
3453f649ab7SKees Cook 	int ret;
346ddecf0f4SEric Dumazet 	struct sk_buff *head;
347ddecf0f4SEric Dumazet 	int delta;
3487d2681a6SPatrick McHardy 
3497d2681a6SPatrick McHardy 	hash = sfq_classify(skb, sch, &ret);
3507d2681a6SPatrick McHardy 	if (hash == 0) {
351c27f339aSJarek Poplawski 		if (ret & __NET_XMIT_BYPASS)
35225331d6cSJohn Fastabend 			qdisc_qstats_drop(sch);
353f9ab7425SGao Feng 		__qdisc_drop(skb, to_free);
3547d2681a6SPatrick McHardy 		return ret;
3557d2681a6SPatrick McHardy 	}
3567d2681a6SPatrick McHardy 	hash--;
3571da177e4SLinus Torvalds 
3581da177e4SLinus Torvalds 	x = q->ht[hash];
359eda83e3bSEric Dumazet 	slot = &q->slots[x];
360eda83e3bSEric Dumazet 	if (x == SFQ_EMPTY_SLOT) {
361eda83e3bSEric Dumazet 		x = q->dep[0].next; /* get a free slot */
36218cb8098SEric Dumazet 		if (x >= SFQ_MAX_FLOWS)
363520ac30fSEric Dumazet 			return qdisc_drop(skb, sch, to_free);
364eda83e3bSEric Dumazet 		q->ht[hash] = x;
365eda83e3bSEric Dumazet 		slot = &q->slots[x];
366eda83e3bSEric Dumazet 		slot->hash = hash;
367ddecf0f4SEric Dumazet 		slot->backlog = 0; /* should already be 0 anyway... */
368ddecf0f4SEric Dumazet 		red_set_vars(&slot->vars);
369ddecf0f4SEric Dumazet 		goto enqueue;
370ddecf0f4SEric Dumazet 	}
371ddecf0f4SEric Dumazet 	if (q->red_parms) {
372ddecf0f4SEric Dumazet 		slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
373ddecf0f4SEric Dumazet 							&slot->vars,
374ddecf0f4SEric Dumazet 							slot->backlog);
375ddecf0f4SEric Dumazet 		switch (red_action(q->red_parms,
376ddecf0f4SEric Dumazet 				   &slot->vars,
377ddecf0f4SEric Dumazet 				   slot->vars.qavg)) {
378ddecf0f4SEric Dumazet 		case RED_DONT_MARK:
379ddecf0f4SEric Dumazet 			break;
380ddecf0f4SEric Dumazet 
381ddecf0f4SEric Dumazet 		case RED_PROB_MARK:
38225331d6cSJohn Fastabend 			qdisc_qstats_overlimit(sch);
383ddecf0f4SEric Dumazet 			if (sfq_prob_mark(q)) {
384ddecf0f4SEric Dumazet 				/* We know we have at least one packet in queue */
385ddecf0f4SEric Dumazet 				if (sfq_headdrop(q) &&
386ddecf0f4SEric Dumazet 				    INET_ECN_set_ce(slot->skblist_next)) {
387ddecf0f4SEric Dumazet 					q->stats.prob_mark_head++;
388ddecf0f4SEric Dumazet 					break;
389ddecf0f4SEric Dumazet 				}
390ddecf0f4SEric Dumazet 				if (INET_ECN_set_ce(skb)) {
391ddecf0f4SEric Dumazet 					q->stats.prob_mark++;
392ddecf0f4SEric Dumazet 					break;
393ddecf0f4SEric Dumazet 				}
394ddecf0f4SEric Dumazet 			}
395ddecf0f4SEric Dumazet 			q->stats.prob_drop++;
396ddecf0f4SEric Dumazet 			goto congestion_drop;
397ddecf0f4SEric Dumazet 
398ddecf0f4SEric Dumazet 		case RED_HARD_MARK:
39925331d6cSJohn Fastabend 			qdisc_qstats_overlimit(sch);
400ddecf0f4SEric Dumazet 			if (sfq_hard_mark(q)) {
401ddecf0f4SEric Dumazet 				/* We know we have at least one packet in queue */
402ddecf0f4SEric Dumazet 				if (sfq_headdrop(q) &&
403ddecf0f4SEric Dumazet 				    INET_ECN_set_ce(slot->skblist_next)) {
404ddecf0f4SEric Dumazet 					q->stats.forced_mark_head++;
405ddecf0f4SEric Dumazet 					break;
406ddecf0f4SEric Dumazet 				}
407ddecf0f4SEric Dumazet 				if (INET_ECN_set_ce(skb)) {
408ddecf0f4SEric Dumazet 					q->stats.forced_mark++;
409ddecf0f4SEric Dumazet 					break;
410ddecf0f4SEric Dumazet 				}
411ddecf0f4SEric Dumazet 			}
412ddecf0f4SEric Dumazet 			q->stats.forced_drop++;
413ddecf0f4SEric Dumazet 			goto congestion_drop;
414ddecf0f4SEric Dumazet 		}
4151da177e4SLinus Torvalds 	}
4166f9e98f7SStephen Hemminger 
41718cb8098SEric Dumazet 	if (slot->qlen >= q->maxdepth) {
418ddecf0f4SEric Dumazet congestion_drop:
419ddecf0f4SEric Dumazet 		if (!sfq_headdrop(q))
420520ac30fSEric Dumazet 			return qdisc_drop(skb, sch, to_free);
42132740ddcSAlexey Kuznetsov 
422ddecf0f4SEric Dumazet 		/* We know we have at least one packet in queue */
42318cb8098SEric Dumazet 		head = slot_dequeue_head(slot);
424ddecf0f4SEric Dumazet 		delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
425ddecf0f4SEric Dumazet 		sch->qstats.backlog -= delta;
426ddecf0f4SEric Dumazet 		slot->backlog -= delta;
427520ac30fSEric Dumazet 		qdisc_drop(head, sch, to_free);
42818cb8098SEric Dumazet 
42918cb8098SEric Dumazet 		slot_queue_add(slot, skb);
430325d5dc3SKonstantin Khlebnikov 		qdisc_tree_reduce_backlog(sch, 0, delta);
43118cb8098SEric Dumazet 		return NET_XMIT_CN;
43218cb8098SEric Dumazet 	}
43318cb8098SEric Dumazet 
434ddecf0f4SEric Dumazet enqueue:
43525331d6cSJohn Fastabend 	qdisc_qstats_backlog_inc(sch, skb);
436ddecf0f4SEric Dumazet 	slot->backlog += qdisc_pkt_len(skb);
437eda83e3bSEric Dumazet 	slot_queue_add(slot, skb);
4381da177e4SLinus Torvalds 	sfq_inc(q, x);
439eda83e3bSEric Dumazet 	if (slot->qlen == 1) {		/* The flow is new */
440eda83e3bSEric Dumazet 		if (q->tail == NULL) {	/* It is the first flow */
441eda83e3bSEric Dumazet 			slot->next = x;
4421da177e4SLinus Torvalds 		} else {
443eda83e3bSEric Dumazet 			slot->next = q->tail->next;
444eda83e3bSEric Dumazet 			q->tail->next = x;
4451da177e4SLinus Torvalds 		}
446cc34eb67SEric Dumazet 		/* We put this flow at the end of our flow list.
447cc34eb67SEric Dumazet 		 * This might sound unfair for a new flow to wait after old ones,
448cc34eb67SEric Dumazet 		 * but we could endup servicing new flows only, and freeze old ones.
449cc34eb67SEric Dumazet 		 */
450cc34eb67SEric Dumazet 		q->tail = slot;
451ddecf0f4SEric Dumazet 		/* We could use a bigger initial quantum for new flows */
45258ae7465SEric Dumazet 		slot->allot = q->quantum;
4531da177e4SLinus Torvalds 	}
4549190b3b3SEric Dumazet 	if (++sch->q.qlen <= q->limit)
4559871e50eSBen Greear 		return NET_XMIT_SUCCESS;
4561da177e4SLinus Torvalds 
4578efa8854SEric Dumazet 	qlen = slot->qlen;
458f9ab7425SGao Feng 	dropped = sfq_drop(sch, to_free);
4598efa8854SEric Dumazet 	/* Return Congestion Notification only if we dropped a packet
4608efa8854SEric Dumazet 	 * from this flow.
4618efa8854SEric Dumazet 	 */
462325d5dc3SKonstantin Khlebnikov 	if (qlen != slot->qlen) {
463325d5dc3SKonstantin Khlebnikov 		qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb));
464e1738bd9SEric Dumazet 		return NET_XMIT_CN;
465325d5dc3SKonstantin Khlebnikov 	}
466e1738bd9SEric Dumazet 
467e1738bd9SEric Dumazet 	/* As we dropped a packet, better let upper stack know this */
4682ccccf5fSWANG Cong 	qdisc_tree_reduce_backlog(sch, 1, dropped);
469e1738bd9SEric Dumazet 	return NET_XMIT_SUCCESS;
4701da177e4SLinus Torvalds }
4711da177e4SLinus Torvalds 
47248a8f519SPatrick McHardy static struct sk_buff *
sfq_dequeue(struct Qdisc * sch)4731da177e4SLinus Torvalds sfq_dequeue(struct Qdisc *sch)
4741da177e4SLinus Torvalds {
4751da177e4SLinus Torvalds 	struct sfq_sched_data *q = qdisc_priv(sch);
4761da177e4SLinus Torvalds 	struct sk_buff *skb;
477aa3e2199SEric Dumazet 	sfq_index a, next_a;
478eda83e3bSEric Dumazet 	struct sfq_slot *slot;
4791da177e4SLinus Torvalds 
4801da177e4SLinus Torvalds 	/* No active slots */
481eda83e3bSEric Dumazet 	if (q->tail == NULL)
4821da177e4SLinus Torvalds 		return NULL;
4831da177e4SLinus Torvalds 
484eeaeb068SEric Dumazet next_slot:
485eda83e3bSEric Dumazet 	a = q->tail->next;
486eda83e3bSEric Dumazet 	slot = &q->slots[a];
487eeaeb068SEric Dumazet 	if (slot->allot <= 0) {
488eeaeb068SEric Dumazet 		q->tail = slot;
48958ae7465SEric Dumazet 		slot->allot += q->quantum;
490eeaeb068SEric Dumazet 		goto next_slot;
491eeaeb068SEric Dumazet 	}
492eda83e3bSEric Dumazet 	skb = slot_dequeue_head(slot);
4931da177e4SLinus Torvalds 	sfq_dec(q, a);
4949190b3b3SEric Dumazet 	qdisc_bstats_update(sch, skb);
4951da177e4SLinus Torvalds 	sch->q.qlen--;
49625331d6cSJohn Fastabend 	qdisc_qstats_backlog_dec(sch, skb);
497ddecf0f4SEric Dumazet 	slot->backlog -= qdisc_pkt_len(skb);
4981da177e4SLinus Torvalds 	/* Is the slot empty? */
499eda83e3bSEric Dumazet 	if (slot->qlen == 0) {
500eda83e3bSEric Dumazet 		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
501eda83e3bSEric Dumazet 		next_a = slot->next;
502aa3e2199SEric Dumazet 		if (a == next_a) {
503eda83e3bSEric Dumazet 			q->tail = NULL; /* no more active slots */
5041da177e4SLinus Torvalds 			return skb;
5051da177e4SLinus Torvalds 		}
506eda83e3bSEric Dumazet 		q->tail->next = next_a;
507eeaeb068SEric Dumazet 	} else {
50858ae7465SEric Dumazet 		slot->allot -= qdisc_pkt_len(skb);
5091da177e4SLinus Torvalds 	}
5101da177e4SLinus Torvalds 	return skb;
5111da177e4SLinus Torvalds }
5121da177e4SLinus Torvalds 
5131da177e4SLinus Torvalds static void
sfq_reset(struct Qdisc * sch)5141da177e4SLinus Torvalds sfq_reset(struct Qdisc *sch)
5151da177e4SLinus Torvalds {
5161da177e4SLinus Torvalds 	struct sk_buff *skb;
5171da177e4SLinus Torvalds 
5181da177e4SLinus Torvalds 	while ((skb = sfq_dequeue(sch)) != NULL)
519fea02478SEric Dumazet 		rtnl_kfree_skbs(skb, skb);
5201da177e4SLinus Torvalds }
5211da177e4SLinus Torvalds 
522225d9b89SEric Dumazet /*
523225d9b89SEric Dumazet  * When q->perturbation is changed, we rehash all queued skbs
524225d9b89SEric Dumazet  * to avoid OOO (Out Of Order) effects.
525225d9b89SEric Dumazet  * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
526225d9b89SEric Dumazet  * counters.
527225d9b89SEric Dumazet  */
sfq_rehash(struct Qdisc * sch)52818cb8098SEric Dumazet static void sfq_rehash(struct Qdisc *sch)
529225d9b89SEric Dumazet {
53018cb8098SEric Dumazet 	struct sfq_sched_data *q = qdisc_priv(sch);
531225d9b89SEric Dumazet 	struct sk_buff *skb;
532225d9b89SEric Dumazet 	int i;
533225d9b89SEric Dumazet 	struct sfq_slot *slot;
534225d9b89SEric Dumazet 	struct sk_buff_head list;
53518cb8098SEric Dumazet 	int dropped = 0;
5362ccccf5fSWANG Cong 	unsigned int drop_len = 0;
537225d9b89SEric Dumazet 
538225d9b89SEric Dumazet 	__skb_queue_head_init(&list);
539225d9b89SEric Dumazet 
54018cb8098SEric Dumazet 	for (i = 0; i < q->maxflows; i++) {
541225d9b89SEric Dumazet 		slot = &q->slots[i];
542225d9b89SEric Dumazet 		if (!slot->qlen)
543225d9b89SEric Dumazet 			continue;
544225d9b89SEric Dumazet 		while (slot->qlen) {
545225d9b89SEric Dumazet 			skb = slot_dequeue_head(slot);
546225d9b89SEric Dumazet 			sfq_dec(q, i);
547225d9b89SEric Dumazet 			__skb_queue_tail(&list, skb);
548225d9b89SEric Dumazet 		}
549ddecf0f4SEric Dumazet 		slot->backlog = 0;
550ddecf0f4SEric Dumazet 		red_set_vars(&slot->vars);
551225d9b89SEric Dumazet 		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
552225d9b89SEric Dumazet 	}
553225d9b89SEric Dumazet 	q->tail = NULL;
554225d9b89SEric Dumazet 
555225d9b89SEric Dumazet 	while ((skb = __skb_dequeue(&list)) != NULL) {
556225d9b89SEric Dumazet 		unsigned int hash = sfq_hash(q, skb);
557225d9b89SEric Dumazet 		sfq_index x = q->ht[hash];
558225d9b89SEric Dumazet 
559225d9b89SEric Dumazet 		slot = &q->slots[x];
560225d9b89SEric Dumazet 		if (x == SFQ_EMPTY_SLOT) {
561225d9b89SEric Dumazet 			x = q->dep[0].next; /* get a free slot */
56218cb8098SEric Dumazet 			if (x >= SFQ_MAX_FLOWS) {
56325331d6cSJohn Fastabend drop:
56425331d6cSJohn Fastabend 				qdisc_qstats_backlog_dec(sch, skb);
5652ccccf5fSWANG Cong 				drop_len += qdisc_pkt_len(skb);
56618cb8098SEric Dumazet 				kfree_skb(skb);
56718cb8098SEric Dumazet 				dropped++;
56818cb8098SEric Dumazet 				continue;
56918cb8098SEric Dumazet 			}
570225d9b89SEric Dumazet 			q->ht[hash] = x;
571225d9b89SEric Dumazet 			slot = &q->slots[x];
572225d9b89SEric Dumazet 			slot->hash = hash;
573225d9b89SEric Dumazet 		}
57418cb8098SEric Dumazet 		if (slot->qlen >= q->maxdepth)
57518cb8098SEric Dumazet 			goto drop;
576225d9b89SEric Dumazet 		slot_queue_add(slot, skb);
577ddecf0f4SEric Dumazet 		if (q->red_parms)
578ddecf0f4SEric Dumazet 			slot->vars.qavg = red_calc_qavg(q->red_parms,
579ddecf0f4SEric Dumazet 							&slot->vars,
580ddecf0f4SEric Dumazet 							slot->backlog);
581ddecf0f4SEric Dumazet 		slot->backlog += qdisc_pkt_len(skb);
582225d9b89SEric Dumazet 		sfq_inc(q, x);
583225d9b89SEric Dumazet 		if (slot->qlen == 1) {		/* The flow is new */
584225d9b89SEric Dumazet 			if (q->tail == NULL) {	/* It is the first flow */
585225d9b89SEric Dumazet 				slot->next = x;
586225d9b89SEric Dumazet 			} else {
587225d9b89SEric Dumazet 				slot->next = q->tail->next;
588225d9b89SEric Dumazet 				q->tail->next = x;
589225d9b89SEric Dumazet 			}
590225d9b89SEric Dumazet 			q->tail = slot;
59158ae7465SEric Dumazet 			slot->allot = q->quantum;
592225d9b89SEric Dumazet 		}
593225d9b89SEric Dumazet 	}
59418cb8098SEric Dumazet 	sch->q.qlen -= dropped;
5952ccccf5fSWANG Cong 	qdisc_tree_reduce_backlog(sch, dropped, drop_len);
596225d9b89SEric Dumazet }
597225d9b89SEric Dumazet 
sfq_perturbation(struct timer_list * t)598cdeabbb8SKees Cook static void sfq_perturbation(struct timer_list *t)
5991da177e4SLinus Torvalds {
600cdeabbb8SKees Cook 	struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
601cdeabbb8SKees Cook 	struct Qdisc *sch = q->sch;
602d636fc5dSEric Dumazet 	spinlock_t *root_lock;
60355667441SEric Dumazet 	siphash_key_t nkey;
604ab18d76fSEric Dumazet 	int period;
6051da177e4SLinus Torvalds 
60655667441SEric Dumazet 	get_random_bytes(&nkey, sizeof(nkey));
607d636fc5dSEric Dumazet 	rcu_read_lock();
608d636fc5dSEric Dumazet 	root_lock = qdisc_lock(qdisc_root_sleeping(sch));
609225d9b89SEric Dumazet 	spin_lock(root_lock);
61055667441SEric Dumazet 	q->perturbation = nkey;
611225d9b89SEric Dumazet 	if (!q->filter_list && q->tail)
61218cb8098SEric Dumazet 		sfq_rehash(sch);
613225d9b89SEric Dumazet 	spin_unlock(root_lock);
6141da177e4SLinus Torvalds 
615ab18d76fSEric Dumazet 	/* q->perturb_period can change under us from
616ab18d76fSEric Dumazet 	 * sfq_change() and sfq_destroy().
617ab18d76fSEric Dumazet 	 */
618ab18d76fSEric Dumazet 	period = READ_ONCE(q->perturb_period);
619ab18d76fSEric Dumazet 	if (period)
620ab18d76fSEric Dumazet 		mod_timer(&q->perturb_timer, jiffies + period);
621d636fc5dSEric Dumazet 	rcu_read_unlock();
6221da177e4SLinus Torvalds }
6231da177e4SLinus Torvalds 
sfq_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)62458ae7465SEric Dumazet static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
62558ae7465SEric Dumazet 		      struct netlink_ext_ack *extack)
6261da177e4SLinus Torvalds {
6271da177e4SLinus Torvalds 	struct sfq_sched_data *q = qdisc_priv(sch);
6281e90474cSPatrick McHardy 	struct tc_sfq_qopt *ctl = nla_data(opt);
62918cb8098SEric Dumazet 	struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
6302ccccf5fSWANG Cong 	unsigned int qlen, dropped = 0;
631ddecf0f4SEric Dumazet 	struct red_parms *p = NULL;
632f9ab7425SGao Feng 	struct sk_buff *to_free = NULL;
633f9ab7425SGao Feng 	struct sk_buff *tail = NULL;
6341da177e4SLinus Torvalds 
6351e90474cSPatrick McHardy 	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
6361da177e4SLinus Torvalds 		return -EINVAL;
63718cb8098SEric Dumazet 	if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
63818cb8098SEric Dumazet 		ctl_v1 = nla_data(opt);
639119b3d38Sstephen hemminger 	if (ctl->divisor &&
640119b3d38Sstephen hemminger 	    (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
641119b3d38Sstephen hemminger 		return -EINVAL;
642df4953e4SEric Dumazet 
64358ae7465SEric Dumazet 	if ((int)ctl->quantum < 0) {
64458ae7465SEric Dumazet 		NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
645df4953e4SEric Dumazet 		return -EINVAL;
646df4953e4SEric Dumazet 	}
6478afa10cbSNogah Frankel 	if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
648e323d865SEric Dumazet 					ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
6498afa10cbSNogah Frankel 		return -EINVAL;
650ddecf0f4SEric Dumazet 	if (ctl_v1 && ctl_v1->qth_min) {
651ddecf0f4SEric Dumazet 		p = kmalloc(sizeof(*p), GFP_KERNEL);
652ddecf0f4SEric Dumazet 		if (!p)
653ddecf0f4SEric Dumazet 			return -ENOMEM;
654ddecf0f4SEric Dumazet 	}
655*833e9a1cSOctavian Purdila 	if (ctl->limit == 1) {
656*833e9a1cSOctavian Purdila 		NL_SET_ERR_MSG_MOD(extack, "invalid limit");
657*833e9a1cSOctavian Purdila 		return -EINVAL;
658*833e9a1cSOctavian Purdila 	}
6591da177e4SLinus Torvalds 	sch_tree_lock(sch);
66058ae7465SEric Dumazet 	if (ctl->quantum)
66118cb8098SEric Dumazet 		q->quantum = ctl->quantum;
662ab18d76fSEric Dumazet 	WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
66318cb8098SEric Dumazet 	if (ctl->flows)
66418cb8098SEric Dumazet 		q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
66518cb8098SEric Dumazet 	if (ctl->divisor) {
666817fb15dSEric Dumazet 		q->divisor = ctl->divisor;
66718cb8098SEric Dumazet 		q->maxflows = min_t(u32, q->maxflows, q->divisor);
66818cb8098SEric Dumazet 	}
66918cb8098SEric Dumazet 	if (ctl_v1) {
67018cb8098SEric Dumazet 		if (ctl_v1->depth)
67118cb8098SEric Dumazet 			q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
672ddecf0f4SEric Dumazet 		if (p) {
673ddecf0f4SEric Dumazet 			swap(q->red_parms, p);
674ddecf0f4SEric Dumazet 			red_set_parms(q->red_parms,
675ddecf0f4SEric Dumazet 				      ctl_v1->qth_min, ctl_v1->qth_max,
676ddecf0f4SEric Dumazet 				      ctl_v1->Wlog,
677ddecf0f4SEric Dumazet 				      ctl_v1->Plog, ctl_v1->Scell_log,
678ddecf0f4SEric Dumazet 				      NULL,
679ddecf0f4SEric Dumazet 				      ctl_v1->max_P);
680ddecf0f4SEric Dumazet 		}
681ddecf0f4SEric Dumazet 		q->flags = ctl_v1->flags;
68218cb8098SEric Dumazet 		q->headdrop = ctl_v1->headdrop;
68318cb8098SEric Dumazet 	}
68418cb8098SEric Dumazet 	if (ctl->limit) {
68518cb8098SEric Dumazet 		q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
68618cb8098SEric Dumazet 		q->maxflows = min_t(u32, q->maxflows, q->limit);
68718cb8098SEric Dumazet 	}
68818cb8098SEric Dumazet 
6895e50da01SPatrick McHardy 	qlen = sch->q.qlen;
690f9ab7425SGao Feng 	while (sch->q.qlen > q->limit) {
691f9ab7425SGao Feng 		dropped += sfq_drop(sch, &to_free);
692f9ab7425SGao Feng 		if (!tail)
693f9ab7425SGao Feng 			tail = to_free;
694f9ab7425SGao Feng 	}
695f9ab7425SGao Feng 
696f9ab7425SGao Feng 	rtnl_kfree_skbs(to_free, tail);
6972ccccf5fSWANG Cong 	qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
6981da177e4SLinus Torvalds 
6991da177e4SLinus Torvalds 	del_timer(&q->perturb_timer);
7001da177e4SLinus Torvalds 	if (q->perturb_period) {
70132740ddcSAlexey Kuznetsov 		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
70255667441SEric Dumazet 		get_random_bytes(&q->perturbation, sizeof(q->perturbation));
7031da177e4SLinus Torvalds 	}
7041da177e4SLinus Torvalds 	sch_tree_unlock(sch);
705ddecf0f4SEric Dumazet 	kfree(p);
7061da177e4SLinus Torvalds 	return 0;
7071da177e4SLinus Torvalds }
7081da177e4SLinus Torvalds 
sfq_alloc(size_t sz)709bd16a6ccSEric Dumazet static void *sfq_alloc(size_t sz)
710bd16a6ccSEric Dumazet {
711752ade68SMichal Hocko 	return  kvmalloc(sz, GFP_KERNEL);
712bd16a6ccSEric Dumazet }
713bd16a6ccSEric Dumazet 
sfq_free(void * addr)714bd16a6ccSEric Dumazet static void sfq_free(void *addr)
715bd16a6ccSEric Dumazet {
7164cb28970SWANG Cong 	kvfree(addr);
717bd16a6ccSEric Dumazet }
718bd16a6ccSEric Dumazet 
sfq_destroy(struct Qdisc * sch)719bd16a6ccSEric Dumazet static void sfq_destroy(struct Qdisc *sch)
720bd16a6ccSEric Dumazet {
721bd16a6ccSEric Dumazet 	struct sfq_sched_data *q = qdisc_priv(sch);
722bd16a6ccSEric Dumazet 
7236529eabaSJiri Pirko 	tcf_block_put(q->block);
724ab18d76fSEric Dumazet 	WRITE_ONCE(q->perturb_period, 0);
725bd16a6ccSEric Dumazet 	del_timer_sync(&q->perturb_timer);
726bd16a6ccSEric Dumazet 	sfq_free(q->ht);
72718cb8098SEric Dumazet 	sfq_free(q->slots);
728ddecf0f4SEric Dumazet 	kfree(q->red_parms);
729bd16a6ccSEric Dumazet }
730bd16a6ccSEric Dumazet 
sfq_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)731e63d7dfdSAlexander Aring static int sfq_init(struct Qdisc *sch, struct nlattr *opt,
732e63d7dfdSAlexander Aring 		    struct netlink_ext_ack *extack)
7331da177e4SLinus Torvalds {
7341da177e4SLinus Torvalds 	struct sfq_sched_data *q = qdisc_priv(sch);
7351da177e4SLinus Torvalds 	int i;
7366529eabaSJiri Pirko 	int err;
7376529eabaSJiri Pirko 
738f85729d0SPaolo Abeni 	q->sch = sch;
739cdeabbb8SKees Cook 	timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
740e2326576SNikolay Aleksandrov 
7418d1a77f9SAlexander Aring 	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
7426529eabaSJiri Pirko 	if (err)
7436529eabaSJiri Pirko 		return err;
7441da177e4SLinus Torvalds 
74518cb8098SEric Dumazet 	for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
74618cb8098SEric Dumazet 		q->dep[i].next = i + SFQ_MAX_FLOWS;
74718cb8098SEric Dumazet 		q->dep[i].prev = i + SFQ_MAX_FLOWS;
7481da177e4SLinus Torvalds 	}
7496f9e98f7SStephen Hemminger 
75018cb8098SEric Dumazet 	q->limit = SFQ_MAX_DEPTH;
75118cb8098SEric Dumazet 	q->maxdepth = SFQ_MAX_DEPTH;
752eda83e3bSEric Dumazet 	q->cur_depth = 0;
753eda83e3bSEric Dumazet 	q->tail = NULL;
754817fb15dSEric Dumazet 	q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
75518cb8098SEric Dumazet 	q->maxflows = SFQ_DEFAULT_FLOWS;
7565ce2d488SDavid S. Miller 	q->quantum = psched_mtu(qdisc_dev(sch));
7571da177e4SLinus Torvalds 	q->perturb_period = 0;
75855667441SEric Dumazet 	get_random_bytes(&q->perturbation, sizeof(q->perturbation));
75902a9098eSEric Dumazet 
76002a9098eSEric Dumazet 	if (opt) {
76158ae7465SEric Dumazet 		int err = sfq_change(sch, opt, extack);
7621da177e4SLinus Torvalds 		if (err)
7631da177e4SLinus Torvalds 			return err;
7641da177e4SLinus Torvalds 	}
7656f9e98f7SStephen Hemminger 
766bd16a6ccSEric Dumazet 	q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
76718cb8098SEric Dumazet 	q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
76818cb8098SEric Dumazet 	if (!q->ht || !q->slots) {
76987b60cfaSEric Dumazet 		/* Note: sfq_destroy() will be called by our caller */
770817fb15dSEric Dumazet 		return -ENOMEM;
771bd16a6ccSEric Dumazet 	}
77287b60cfaSEric Dumazet 
773817fb15dSEric Dumazet 	for (i = 0; i < q->divisor; i++)
774817fb15dSEric Dumazet 		q->ht[i] = SFQ_EMPTY_SLOT;
775817fb15dSEric Dumazet 
77618cb8098SEric Dumazet 	for (i = 0; i < q->maxflows; i++) {
77718c8d82aSEric Dumazet 		slot_queue_init(&q->slots[i]);
7781da177e4SLinus Torvalds 		sfq_link(q, i);
77918c8d82aSEric Dumazet 	}
78023624935SEric Dumazet 	if (q->limit >= 1)
78123624935SEric Dumazet 		sch->flags |= TCQ_F_CAN_BYPASS;
78223624935SEric Dumazet 	else
78323624935SEric Dumazet 		sch->flags &= ~TCQ_F_CAN_BYPASS;
7841da177e4SLinus Torvalds 	return 0;
7851da177e4SLinus Torvalds }
7861da177e4SLinus Torvalds 
sfq_dump(struct Qdisc * sch,struct sk_buff * skb)7871da177e4SLinus Torvalds static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
7881da177e4SLinus Torvalds {
7891da177e4SLinus Torvalds 	struct sfq_sched_data *q = qdisc_priv(sch);
79027a884dcSArnaldo Carvalho de Melo 	unsigned char *b = skb_tail_pointer(skb);
79118cb8098SEric Dumazet 	struct tc_sfq_qopt_v1 opt;
792ddecf0f4SEric Dumazet 	struct red_parms *p = q->red_parms;
7931da177e4SLinus Torvalds 
79418cb8098SEric Dumazet 	memset(&opt, 0, sizeof(opt));
79518cb8098SEric Dumazet 	opt.v0.quantum	= q->quantum;
79618cb8098SEric Dumazet 	opt.v0.perturb_period = q->perturb_period / HZ;
79718cb8098SEric Dumazet 	opt.v0.limit	= q->limit;
79818cb8098SEric Dumazet 	opt.v0.divisor	= q->divisor;
79918cb8098SEric Dumazet 	opt.v0.flows	= q->maxflows;
80018cb8098SEric Dumazet 	opt.depth	= q->maxdepth;
80118cb8098SEric Dumazet 	opt.headdrop	= q->headdrop;
8021da177e4SLinus Torvalds 
803ddecf0f4SEric Dumazet 	if (p) {
804ddecf0f4SEric Dumazet 		opt.qth_min	= p->qth_min >> p->Wlog;
805ddecf0f4SEric Dumazet 		opt.qth_max	= p->qth_max >> p->Wlog;
806ddecf0f4SEric Dumazet 		opt.Wlog	= p->Wlog;
807ddecf0f4SEric Dumazet 		opt.Plog	= p->Plog;
808ddecf0f4SEric Dumazet 		opt.Scell_log	= p->Scell_log;
809ddecf0f4SEric Dumazet 		opt.max_P	= p->max_P;
810ddecf0f4SEric Dumazet 	}
811ddecf0f4SEric Dumazet 	memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
812ddecf0f4SEric Dumazet 	opt.flags	= q->flags;
813ddecf0f4SEric Dumazet 
8141b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
8151b34ec43SDavid S. Miller 		goto nla_put_failure;
8161da177e4SLinus Torvalds 
8171da177e4SLinus Torvalds 	return skb->len;
8181da177e4SLinus Torvalds 
8191e90474cSPatrick McHardy nla_put_failure:
820dc5fc579SArnaldo Carvalho de Melo 	nlmsg_trim(skb, b);
8211da177e4SLinus Torvalds 	return -1;
8221da177e4SLinus Torvalds }
8231da177e4SLinus Torvalds 
sfq_leaf(struct Qdisc * sch,unsigned long arg)82441065fbaSJarek Poplawski static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
82541065fbaSJarek Poplawski {
82641065fbaSJarek Poplawski 	return NULL;
82741065fbaSJarek Poplawski }
82841065fbaSJarek Poplawski 
sfq_find(struct Qdisc * sch,u32 classid)829143976ceSWANG Cong static unsigned long sfq_find(struct Qdisc *sch, u32 classid)
8307d2681a6SPatrick McHardy {
8317d2681a6SPatrick McHardy 	return 0;
8327d2681a6SPatrick McHardy }
8337d2681a6SPatrick McHardy 
sfq_bind(struct Qdisc * sch,unsigned long parent,u32 classid)834eb4a5527SJarek Poplawski static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
835eb4a5527SJarek Poplawski 			      u32 classid)
836eb4a5527SJarek Poplawski {
837eb4a5527SJarek Poplawski 	return 0;
838eb4a5527SJarek Poplawski }
839eb4a5527SJarek Poplawski 
sfq_unbind(struct Qdisc * q,unsigned long cl)840143976ceSWANG Cong static void sfq_unbind(struct Qdisc *q, unsigned long cl)
841da7115d9SJarek Poplawski {
842da7115d9SJarek Poplawski }
843da7115d9SJarek Poplawski 
sfq_tcf_block(struct Qdisc * sch,unsigned long cl,struct netlink_ext_ack * extack)844cbaacc4eSAlexander Aring static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl,
845cbaacc4eSAlexander Aring 				       struct netlink_ext_ack *extack)
8467d2681a6SPatrick McHardy {
8477d2681a6SPatrick McHardy 	struct sfq_sched_data *q = qdisc_priv(sch);
8487d2681a6SPatrick McHardy 
8497d2681a6SPatrick McHardy 	if (cl)
8507d2681a6SPatrick McHardy 		return NULL;
8516529eabaSJiri Pirko 	return q->block;
8527d2681a6SPatrick McHardy }
8537d2681a6SPatrick McHardy 
sfq_dump_class(struct Qdisc * sch,unsigned long cl,struct sk_buff * skb,struct tcmsg * tcm)85494de78d1SPatrick McHardy static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
85594de78d1SPatrick McHardy 			  struct sk_buff *skb, struct tcmsg *tcm)
85694de78d1SPatrick McHardy {
85794de78d1SPatrick McHardy 	tcm->tcm_handle |= TC_H_MIN(cl);
85894de78d1SPatrick McHardy 	return 0;
85994de78d1SPatrick McHardy }
86094de78d1SPatrick McHardy 
sfq_dump_class_stats(struct Qdisc * sch,unsigned long cl,struct gnet_dump * d)86194de78d1SPatrick McHardy static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
86294de78d1SPatrick McHardy 				struct gnet_dump *d)
86394de78d1SPatrick McHardy {
86494de78d1SPatrick McHardy 	struct sfq_sched_data *q = qdisc_priv(sch);
865ee09b3c1SEric Dumazet 	sfq_index idx = q->ht[cl - 1];
866ee09b3c1SEric Dumazet 	struct gnet_stats_queue qs = { 0 };
867ee09b3c1SEric Dumazet 	struct tc_sfq_xstats xstats = { 0 };
868c4266263SEric Dumazet 
869ee09b3c1SEric Dumazet 	if (idx != SFQ_EMPTY_SLOT) {
870ee09b3c1SEric Dumazet 		const struct sfq_slot *slot = &q->slots[idx];
871ee09b3c1SEric Dumazet 
87258ae7465SEric Dumazet 		xstats.allot = slot->allot;
873ee09b3c1SEric Dumazet 		qs.qlen = slot->qlen;
874ddecf0f4SEric Dumazet 		qs.backlog = slot->backlog;
875ee09b3c1SEric Dumazet 	}
876b0ab6f92SJohn Fastabend 	if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
87794de78d1SPatrick McHardy 		return -1;
87894de78d1SPatrick McHardy 	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
87994de78d1SPatrick McHardy }
88094de78d1SPatrick McHardy 
sfq_walk(struct Qdisc * sch,struct qdisc_walker * arg)8817d2681a6SPatrick McHardy static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
8827d2681a6SPatrick McHardy {
88394de78d1SPatrick McHardy 	struct sfq_sched_data *q = qdisc_priv(sch);
88494de78d1SPatrick McHardy 	unsigned int i;
88594de78d1SPatrick McHardy 
88694de78d1SPatrick McHardy 	if (arg->stop)
8877d2681a6SPatrick McHardy 		return;
88894de78d1SPatrick McHardy 
889817fb15dSEric Dumazet 	for (i = 0; i < q->divisor; i++) {
890e046fa89SZhengchao Shao 		if (q->ht[i] == SFQ_EMPTY_SLOT) {
89194de78d1SPatrick McHardy 			arg->count++;
89294de78d1SPatrick McHardy 			continue;
89394de78d1SPatrick McHardy 		}
894e046fa89SZhengchao Shao 		if (!tc_qdisc_stats_dump(sch, i + 1, arg))
89594de78d1SPatrick McHardy 			break;
89694de78d1SPatrick McHardy 	}
8977d2681a6SPatrick McHardy }
8987d2681a6SPatrick McHardy 
8997d2681a6SPatrick McHardy static const struct Qdisc_class_ops sfq_class_ops = {
90041065fbaSJarek Poplawski 	.leaf		=	sfq_leaf,
901143976ceSWANG Cong 	.find		=	sfq_find,
9026529eabaSJiri Pirko 	.tcf_block	=	sfq_tcf_block,
903eb4a5527SJarek Poplawski 	.bind_tcf	=	sfq_bind,
904143976ceSWANG Cong 	.unbind_tcf	=	sfq_unbind,
90594de78d1SPatrick McHardy 	.dump		=	sfq_dump_class,
90694de78d1SPatrick McHardy 	.dump_stats	=	sfq_dump_class_stats,
9077d2681a6SPatrick McHardy 	.walk		=	sfq_walk,
9087d2681a6SPatrick McHardy };
9097d2681a6SPatrick McHardy 
91020fea08bSEric Dumazet static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
9117d2681a6SPatrick McHardy 	.cl_ops		=	&sfq_class_ops,
9121da177e4SLinus Torvalds 	.id		=	"sfq",
9131da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct sfq_sched_data),
9141da177e4SLinus Torvalds 	.enqueue	=	sfq_enqueue,
9151da177e4SLinus Torvalds 	.dequeue	=	sfq_dequeue,
91607bd8df5SEric Dumazet 	.peek		=	qdisc_peek_dequeued,
9171da177e4SLinus Torvalds 	.init		=	sfq_init,
9181da177e4SLinus Torvalds 	.reset		=	sfq_reset,
9191da177e4SLinus Torvalds 	.destroy	=	sfq_destroy,
9201da177e4SLinus Torvalds 	.change		=	NULL,
9211da177e4SLinus Torvalds 	.dump		=	sfq_dump,
9221da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
9231da177e4SLinus Torvalds };
9241da177e4SLinus Torvalds 
sfq_module_init(void)9251da177e4SLinus Torvalds static int __init sfq_module_init(void)
9261da177e4SLinus Torvalds {
9271da177e4SLinus Torvalds 	return register_qdisc(&sfq_qdisc_ops);
9281da177e4SLinus Torvalds }
sfq_module_exit(void)9291da177e4SLinus Torvalds static void __exit sfq_module_exit(void)
9301da177e4SLinus Torvalds {
9311da177e4SLinus Torvalds 	unregister_qdisc(&sfq_qdisc_ops);
9321da177e4SLinus Torvalds }
9331da177e4SLinus Torvalds module_init(sfq_module_init)
9341da177e4SLinus Torvalds module_exit(sfq_module_exit)
9351da177e4SLinus Torvalds MODULE_LICENSE("GPL");
936