12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * net/sched/sch_sfq.c Stochastic Fairness Queueing discipline.
41da177e4SLinus Torvalds *
51da177e4SLinus Torvalds * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
61da177e4SLinus Torvalds */
71da177e4SLinus Torvalds
81da177e4SLinus Torvalds #include <linux/module.h>
91da177e4SLinus Torvalds #include <linux/types.h>
101da177e4SLinus Torvalds #include <linux/kernel.h>
111da177e4SLinus Torvalds #include <linux/jiffies.h>
121da177e4SLinus Torvalds #include <linux/string.h>
131da177e4SLinus Torvalds #include <linux/in.h>
141da177e4SLinus Torvalds #include <linux/errno.h>
151da177e4SLinus Torvalds #include <linux/init.h>
160ba48053SPatrick McHardy #include <linux/skbuff.h>
1755667441SEric Dumazet #include <linux/siphash.h>
185a0e3ad6STejun Heo #include <linux/slab.h>
19817fb15dSEric Dumazet #include <linux/vmalloc.h>
20dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
211da177e4SLinus Torvalds #include <net/pkt_sched.h>
22cf1facdaSJiri Pirko #include <net/pkt_cls.h>
23ddecf0f4SEric Dumazet #include <net/red.h>
241da177e4SLinus Torvalds
251da177e4SLinus Torvalds
261da177e4SLinus Torvalds /* Stochastic Fairness Queuing algorithm.
271da177e4SLinus Torvalds =======================================
281da177e4SLinus Torvalds
291da177e4SLinus Torvalds Source:
301da177e4SLinus Torvalds Paul E. McKenney "Stochastic Fairness Queuing",
311da177e4SLinus Torvalds IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
321da177e4SLinus Torvalds
331da177e4SLinus Torvalds Paul E. McKenney "Stochastic Fairness Queuing",
341da177e4SLinus Torvalds "Interworking: Research and Experience", v.2, 1991, p.113-131.
351da177e4SLinus Torvalds
361da177e4SLinus Torvalds
371da177e4SLinus Torvalds See also:
381da177e4SLinus Torvalds M. Shreedhar and George Varghese "Efficient Fair
391da177e4SLinus Torvalds Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
401da177e4SLinus Torvalds
411da177e4SLinus Torvalds
421da177e4SLinus Torvalds This is not the thing that is usually called (W)FQ nowadays.
431da177e4SLinus Torvalds It does not use any timestamp mechanism, but instead
441da177e4SLinus Torvalds processes queues in round-robin order.
451da177e4SLinus Torvalds
461da177e4SLinus Torvalds ADVANTAGE:
471da177e4SLinus Torvalds
481da177e4SLinus Torvalds - It is very cheap. Both CPU and memory requirements are minimal.
491da177e4SLinus Torvalds
501da177e4SLinus Torvalds DRAWBACKS:
511da177e4SLinus Torvalds
521da177e4SLinus Torvalds - "Stochastic" -> It is not 100% fair.
531da177e4SLinus Torvalds When hash collisions occur, several flows are considered as one.
541da177e4SLinus Torvalds
551da177e4SLinus Torvalds - "Round-robin" -> It introduces larger delays than virtual clock
561da177e4SLinus Torvalds based schemes, and should not be used for isolating interactive
571da177e4SLinus Torvalds traffic from non-interactive. It means, that this scheduler
581da177e4SLinus Torvalds should be used as leaf of CBQ or P3, which put interactive traffic
591da177e4SLinus Torvalds to higher priority band.
601da177e4SLinus Torvalds
611da177e4SLinus Torvalds We still need true WFQ for top level CSZ, but using WFQ
621da177e4SLinus Torvalds for the best effort traffic is absolutely pointless:
631da177e4SLinus Torvalds SFQ is superior for this purpose.
641da177e4SLinus Torvalds
651da177e4SLinus Torvalds IMPLEMENTATION:
6618cb8098SEric Dumazet This implementation limits :
6718cb8098SEric Dumazet - maximal queue length per flow to 127 packets.
6818cb8098SEric Dumazet - max mtu to 2^18-1;
6918cb8098SEric Dumazet - max 65408 flows,
7018cb8098SEric Dumazet - number of hash buckets to 65536.
711da177e4SLinus Torvalds
721da177e4SLinus Torvalds It is easy to increase these values, but not in flight. */
731da177e4SLinus Torvalds
7418cb8098SEric Dumazet #define SFQ_MAX_DEPTH 127 /* max number of packets per flow */
7518cb8098SEric Dumazet #define SFQ_DEFAULT_FLOWS 128
7618cb8098SEric Dumazet #define SFQ_MAX_FLOWS (0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */
7718cb8098SEric Dumazet #define SFQ_EMPTY_SLOT 0xffff
78817fb15dSEric Dumazet #define SFQ_DEFAULT_HASH_DIVISOR 1024
79817fb15dSEric Dumazet
8018cb8098SEric Dumazet /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
8118cb8098SEric Dumazet typedef u16 sfq_index;
821da177e4SLinus Torvalds
83eda83e3bSEric Dumazet /*
84eda83e3bSEric Dumazet * We dont use pointers to save space.
8518cb8098SEric Dumazet * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array
8618cb8098SEric Dumazet * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH]
87eda83e3bSEric Dumazet * are 'pointers' to dep[] array
88eda83e3bSEric Dumazet */
89cc7ec456SEric Dumazet struct sfq_head {
901da177e4SLinus Torvalds sfq_index next;
911da177e4SLinus Torvalds sfq_index prev;
921da177e4SLinus Torvalds };
931da177e4SLinus Torvalds
94eda83e3bSEric Dumazet struct sfq_slot {
95eda83e3bSEric Dumazet struct sk_buff *skblist_next;
96eda83e3bSEric Dumazet struct sk_buff *skblist_prev;
97eda83e3bSEric Dumazet sfq_index qlen; /* number of skbs in skblist */
9818cb8098SEric Dumazet sfq_index next; /* next slot in sfq RR chain */
99eda83e3bSEric Dumazet struct sfq_head dep; /* anchor in dep[] chains */
100eda83e3bSEric Dumazet unsigned short hash; /* hash value (index in ht[]) */
10158ae7465SEric Dumazet int allot; /* credit for this slot */
102ddecf0f4SEric Dumazet
103ddecf0f4SEric Dumazet unsigned int backlog;
104ddecf0f4SEric Dumazet struct red_vars vars;
105eda83e3bSEric Dumazet };
106eda83e3bSEric Dumazet
107cc7ec456SEric Dumazet struct sfq_sched_data {
10818cb8098SEric Dumazet /* frequently used fields */
10918cb8098SEric Dumazet int limit; /* limit of total number of packets in this qdisc */
110817fb15dSEric Dumazet unsigned int divisor; /* number of slots in hash table */
111ddecf0f4SEric Dumazet u8 headdrop;
112ddecf0f4SEric Dumazet u8 maxdepth; /* limit of packets per flow */
11318cb8098SEric Dumazet
11455667441SEric Dumazet siphash_key_t perturbation;
115ddecf0f4SEric Dumazet u8 cur_depth; /* depth of longest slot */
116ddecf0f4SEric Dumazet u8 flags;
11725d8c0d5SJohn Fastabend struct tcf_proto __rcu *filter_list;
1186529eabaSJiri Pirko struct tcf_block *block;
11918cb8098SEric Dumazet sfq_index *ht; /* Hash table ('divisor' slots) */
12018cb8098SEric Dumazet struct sfq_slot *slots; /* Flows table ('maxflows' entries) */
12118cb8098SEric Dumazet
122ddecf0f4SEric Dumazet struct red_parms *red_parms;
123ddecf0f4SEric Dumazet struct tc_sfqred_stats stats;
124ddecf0f4SEric Dumazet struct sfq_slot *tail; /* current slot in round */
125ddecf0f4SEric Dumazet
12618cb8098SEric Dumazet struct sfq_head dep[SFQ_MAX_DEPTH + 1];
12718cb8098SEric Dumazet /* Linked lists of slots, indexed by depth
12818cb8098SEric Dumazet * dep[0] : list of unused flows
12918cb8098SEric Dumazet * dep[1] : list of flows with 1 packet
13018cb8098SEric Dumazet * dep[X] : list of flows with X packets
13118cb8098SEric Dumazet */
13218cb8098SEric Dumazet
133ddecf0f4SEric Dumazet unsigned int maxflows; /* number of flows in flows array */
13418cb8098SEric Dumazet int perturb_period;
13518cb8098SEric Dumazet unsigned int quantum; /* Allotment per round: MUST BE >= MTU */
13618cb8098SEric Dumazet struct timer_list perturb_timer;
137cdeabbb8SKees Cook struct Qdisc *sch;
1381da177e4SLinus Torvalds };
1391da177e4SLinus Torvalds
140eda83e3bSEric Dumazet /*
141eda83e3bSEric Dumazet * sfq_head are either in a sfq_slot or in dep[] array
142eda83e3bSEric Dumazet */
sfq_dep_head(struct sfq_sched_data * q,sfq_index val)143eda83e3bSEric Dumazet static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
144eda83e3bSEric Dumazet {
14518cb8098SEric Dumazet if (val < SFQ_MAX_FLOWS)
146eda83e3bSEric Dumazet return &q->slots[val].dep;
14718cb8098SEric Dumazet return &q->dep[val - SFQ_MAX_FLOWS];
148eda83e3bSEric Dumazet }
149eda83e3bSEric Dumazet
sfq_hash(const struct sfq_sched_data * q,const struct sk_buff * skb)15011fca931SEric Dumazet static unsigned int sfq_hash(const struct sfq_sched_data *q,
15111fca931SEric Dumazet const struct sk_buff *skb)
1521da177e4SLinus Torvalds {
15355667441SEric Dumazet return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1);
1541da177e4SLinus Torvalds }
1551da177e4SLinus Torvalds
sfq_classify(struct sk_buff * skb,struct Qdisc * sch,int * qerr)1567d2681a6SPatrick McHardy static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
1577d2681a6SPatrick McHardy int *qerr)
1587d2681a6SPatrick McHardy {
1597d2681a6SPatrick McHardy struct sfq_sched_data *q = qdisc_priv(sch);
1607d2681a6SPatrick McHardy struct tcf_result res;
16125d8c0d5SJohn Fastabend struct tcf_proto *fl;
1627d2681a6SPatrick McHardy int result;
1637d2681a6SPatrick McHardy
1647d2681a6SPatrick McHardy if (TC_H_MAJ(skb->priority) == sch->handle &&
1657d2681a6SPatrick McHardy TC_H_MIN(skb->priority) > 0 &&
166817fb15dSEric Dumazet TC_H_MIN(skb->priority) <= q->divisor)
1677d2681a6SPatrick McHardy return TC_H_MIN(skb->priority);
1687d2681a6SPatrick McHardy
16925d8c0d5SJohn Fastabend fl = rcu_dereference_bh(q->filter_list);
170ada1dba0STom Herbert if (!fl)
1717d2681a6SPatrick McHardy return sfq_hash(q, skb) + 1;
1727d2681a6SPatrick McHardy
173c27f339aSJarek Poplawski *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1743aa26055SDavide Caratti result = tcf_classify(skb, NULL, fl, &res, false);
1757d2681a6SPatrick McHardy if (result >= 0) {
1767d2681a6SPatrick McHardy #ifdef CONFIG_NET_CLS_ACT
1777d2681a6SPatrick McHardy switch (result) {
1787d2681a6SPatrick McHardy case TC_ACT_STOLEN:
1797d2681a6SPatrick McHardy case TC_ACT_QUEUED:
180e25ea21fSJiri Pirko case TC_ACT_TRAP:
181378a2f09SJarek Poplawski *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
182964201deSGustavo A. R. Silva fallthrough;
1837d2681a6SPatrick McHardy case TC_ACT_SHOT:
1847d2681a6SPatrick McHardy return 0;
1857d2681a6SPatrick McHardy }
1867d2681a6SPatrick McHardy #endif
187817fb15dSEric Dumazet if (TC_H_MIN(res.classid) <= q->divisor)
1887d2681a6SPatrick McHardy return TC_H_MIN(res.classid);
1897d2681a6SPatrick McHardy }
1907d2681a6SPatrick McHardy return 0;
1917d2681a6SPatrick McHardy }
1927d2681a6SPatrick McHardy
193eda83e3bSEric Dumazet /*
19418cb8098SEric Dumazet * x : slot number [0 .. SFQ_MAX_FLOWS - 1]
195eda83e3bSEric Dumazet */
sfq_link(struct sfq_sched_data * q,sfq_index x)1961da177e4SLinus Torvalds static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
1971da177e4SLinus Torvalds {
1981da177e4SLinus Torvalds sfq_index p, n;
19918cb8098SEric Dumazet struct sfq_slot *slot = &q->slots[x];
20018cb8098SEric Dumazet int qlen = slot->qlen;
2011da177e4SLinus Torvalds
20218cb8098SEric Dumazet p = qlen + SFQ_MAX_FLOWS;
203eda83e3bSEric Dumazet n = q->dep[qlen].next;
204eda83e3bSEric Dumazet
20518cb8098SEric Dumazet slot->dep.next = n;
20618cb8098SEric Dumazet slot->dep.prev = p;
207eda83e3bSEric Dumazet
208eda83e3bSEric Dumazet q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */
209eda83e3bSEric Dumazet sfq_dep_head(q, n)->prev = x;
2101da177e4SLinus Torvalds }
2111da177e4SLinus Torvalds
212eda83e3bSEric Dumazet #define sfq_unlink(q, x, n, p) \
213fa08943bSYang Yingliang do { \
214eda83e3bSEric Dumazet n = q->slots[x].dep.next; \
215eda83e3bSEric Dumazet p = q->slots[x].dep.prev; \
216eda83e3bSEric Dumazet sfq_dep_head(q, p)->next = n; \
217fa08943bSYang Yingliang sfq_dep_head(q, n)->prev = p; \
218fa08943bSYang Yingliang } while (0)
219eda83e3bSEric Dumazet
220eda83e3bSEric Dumazet
sfq_dec(struct sfq_sched_data * q,sfq_index x)2211da177e4SLinus Torvalds static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
2221da177e4SLinus Torvalds {
2231da177e4SLinus Torvalds sfq_index p, n;
224eda83e3bSEric Dumazet int d;
2251da177e4SLinus Torvalds
226eda83e3bSEric Dumazet sfq_unlink(q, x, n, p);
2271da177e4SLinus Torvalds
228eda83e3bSEric Dumazet d = q->slots[x].qlen--;
229eda83e3bSEric Dumazet if (n == p && q->cur_depth == d)
230eda83e3bSEric Dumazet q->cur_depth--;
2311da177e4SLinus Torvalds sfq_link(q, x);
2321da177e4SLinus Torvalds }
2331da177e4SLinus Torvalds
sfq_inc(struct sfq_sched_data * q,sfq_index x)2341da177e4SLinus Torvalds static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
2351da177e4SLinus Torvalds {
2361da177e4SLinus Torvalds sfq_index p, n;
2371da177e4SLinus Torvalds int d;
2381da177e4SLinus Torvalds
239eda83e3bSEric Dumazet sfq_unlink(q, x, n, p);
2401da177e4SLinus Torvalds
241eda83e3bSEric Dumazet d = ++q->slots[x].qlen;
242eda83e3bSEric Dumazet if (q->cur_depth < d)
243eda83e3bSEric Dumazet q->cur_depth = d;
2441da177e4SLinus Torvalds sfq_link(q, x);
2451da177e4SLinus Torvalds }
2461da177e4SLinus Torvalds
247eda83e3bSEric Dumazet /* helper functions : might be changed when/if skb use a standard list_head */
248eda83e3bSEric Dumazet
249eda83e3bSEric Dumazet /* remove one skb from tail of slot queue */
slot_dequeue_tail(struct sfq_slot * slot)250eda83e3bSEric Dumazet static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
251eda83e3bSEric Dumazet {
252eda83e3bSEric Dumazet struct sk_buff *skb = slot->skblist_prev;
253eda83e3bSEric Dumazet
254eda83e3bSEric Dumazet slot->skblist_prev = skb->prev;
255ee09b3c1SEric Dumazet skb->prev->next = (struct sk_buff *)slot;
256eda83e3bSEric Dumazet skb->next = skb->prev = NULL;
257eda83e3bSEric Dumazet return skb;
258eda83e3bSEric Dumazet }
259eda83e3bSEric Dumazet
260eda83e3bSEric Dumazet /* remove one skb from head of slot queue */
slot_dequeue_head(struct sfq_slot * slot)261eda83e3bSEric Dumazet static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
262eda83e3bSEric Dumazet {
263eda83e3bSEric Dumazet struct sk_buff *skb = slot->skblist_next;
264eda83e3bSEric Dumazet
265eda83e3bSEric Dumazet slot->skblist_next = skb->next;
26618c8d82aSEric Dumazet skb->next->prev = (struct sk_buff *)slot;
267eda83e3bSEric Dumazet skb->next = skb->prev = NULL;
268eda83e3bSEric Dumazet return skb;
269eda83e3bSEric Dumazet }
270eda83e3bSEric Dumazet
slot_queue_init(struct sfq_slot * slot)271eda83e3bSEric Dumazet static inline void slot_queue_init(struct sfq_slot *slot)
272eda83e3bSEric Dumazet {
27318cb8098SEric Dumazet memset(slot, 0, sizeof(*slot));
274eda83e3bSEric Dumazet slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
275eda83e3bSEric Dumazet }
276eda83e3bSEric Dumazet
277eda83e3bSEric Dumazet /* add skb to slot queue (tail add) */
slot_queue_add(struct sfq_slot * slot,struct sk_buff * skb)278eda83e3bSEric Dumazet static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
279eda83e3bSEric Dumazet {
280eda83e3bSEric Dumazet skb->prev = slot->skblist_prev;
281eda83e3bSEric Dumazet skb->next = (struct sk_buff *)slot;
282eda83e3bSEric Dumazet slot->skblist_prev->next = skb;
283eda83e3bSEric Dumazet slot->skblist_prev = skb;
284eda83e3bSEric Dumazet }
285eda83e3bSEric Dumazet
sfq_drop(struct Qdisc * sch,struct sk_buff ** to_free)286f9ab7425SGao Feng static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free)
2871da177e4SLinus Torvalds {
2881da177e4SLinus Torvalds struct sfq_sched_data *q = qdisc_priv(sch);
289eda83e3bSEric Dumazet sfq_index x, d = q->cur_depth;
2901da177e4SLinus Torvalds struct sk_buff *skb;
2911da177e4SLinus Torvalds unsigned int len;
292eda83e3bSEric Dumazet struct sfq_slot *slot;
2931da177e4SLinus Torvalds
294eda83e3bSEric Dumazet /* Queue is full! Find the longest slot and drop tail packet from it */
2951da177e4SLinus Torvalds if (d > 1) {
296eda83e3bSEric Dumazet x = q->dep[d].next;
297eda83e3bSEric Dumazet slot = &q->slots[x];
298eda83e3bSEric Dumazet drop:
29918cb8098SEric Dumazet skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
3000abf77e5SJussi Kivilinna len = qdisc_pkt_len(skb);
301ddecf0f4SEric Dumazet slot->backlog -= len;
3021da177e4SLinus Torvalds sfq_dec(q, x);
3031da177e4SLinus Torvalds sch->q.qlen--;
30425331d6cSJohn Fastabend qdisc_qstats_backlog_dec(sch, skb);
305f9ab7425SGao Feng qdisc_drop(skb, sch, to_free);
3061da177e4SLinus Torvalds return len;
3071da177e4SLinus Torvalds }
3081da177e4SLinus Torvalds
3091da177e4SLinus Torvalds if (d == 1) {
3101da177e4SLinus Torvalds /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
311eda83e3bSEric Dumazet x = q->tail->next;
312eda83e3bSEric Dumazet slot = &q->slots[x];
313eda83e3bSEric Dumazet q->tail->next = slot->next;
314eda83e3bSEric Dumazet q->ht[slot->hash] = SFQ_EMPTY_SLOT;
315eda83e3bSEric Dumazet goto drop;
3161da177e4SLinus Torvalds }
3171da177e4SLinus Torvalds
3181da177e4SLinus Torvalds return 0;
3191da177e4SLinus Torvalds }
3201da177e4SLinus Torvalds
321ddecf0f4SEric Dumazet /* Is ECN parameter configured */
sfq_prob_mark(const struct sfq_sched_data * q)322ddecf0f4SEric Dumazet static int sfq_prob_mark(const struct sfq_sched_data *q)
323ddecf0f4SEric Dumazet {
324ddecf0f4SEric Dumazet return q->flags & TC_RED_ECN;
325ddecf0f4SEric Dumazet }
326ddecf0f4SEric Dumazet
327ddecf0f4SEric Dumazet /* Should packets over max threshold just be marked */
sfq_hard_mark(const struct sfq_sched_data * q)328ddecf0f4SEric Dumazet static int sfq_hard_mark(const struct sfq_sched_data *q)
329ddecf0f4SEric Dumazet {
330ddecf0f4SEric Dumazet return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN;
331ddecf0f4SEric Dumazet }
332ddecf0f4SEric Dumazet
sfq_headdrop(const struct sfq_sched_data * q)333ddecf0f4SEric Dumazet static int sfq_headdrop(const struct sfq_sched_data *q)
334ddecf0f4SEric Dumazet {
335ddecf0f4SEric Dumazet return q->headdrop;
336ddecf0f4SEric Dumazet }
337ddecf0f4SEric Dumazet
3381da177e4SLinus Torvalds static int
sfq_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)339520ac30fSEric Dumazet sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
3401da177e4SLinus Torvalds {
3411da177e4SLinus Torvalds struct sfq_sched_data *q = qdisc_priv(sch);
3422ccccf5fSWANG Cong unsigned int hash, dropped;
3438efa8854SEric Dumazet sfq_index x, qlen;
344eda83e3bSEric Dumazet struct sfq_slot *slot;
3453f649ab7SKees Cook int ret;
346ddecf0f4SEric Dumazet struct sk_buff *head;
347ddecf0f4SEric Dumazet int delta;
3487d2681a6SPatrick McHardy
3497d2681a6SPatrick McHardy hash = sfq_classify(skb, sch, &ret);
3507d2681a6SPatrick McHardy if (hash == 0) {
351c27f339aSJarek Poplawski if (ret & __NET_XMIT_BYPASS)
35225331d6cSJohn Fastabend qdisc_qstats_drop(sch);
353f9ab7425SGao Feng __qdisc_drop(skb, to_free);
3547d2681a6SPatrick McHardy return ret;
3557d2681a6SPatrick McHardy }
3567d2681a6SPatrick McHardy hash--;
3571da177e4SLinus Torvalds
3581da177e4SLinus Torvalds x = q->ht[hash];
359eda83e3bSEric Dumazet slot = &q->slots[x];
360eda83e3bSEric Dumazet if (x == SFQ_EMPTY_SLOT) {
361eda83e3bSEric Dumazet x = q->dep[0].next; /* get a free slot */
36218cb8098SEric Dumazet if (x >= SFQ_MAX_FLOWS)
363520ac30fSEric Dumazet return qdisc_drop(skb, sch, to_free);
364eda83e3bSEric Dumazet q->ht[hash] = x;
365eda83e3bSEric Dumazet slot = &q->slots[x];
366eda83e3bSEric Dumazet slot->hash = hash;
367ddecf0f4SEric Dumazet slot->backlog = 0; /* should already be 0 anyway... */
368ddecf0f4SEric Dumazet red_set_vars(&slot->vars);
369ddecf0f4SEric Dumazet goto enqueue;
370ddecf0f4SEric Dumazet }
371ddecf0f4SEric Dumazet if (q->red_parms) {
372ddecf0f4SEric Dumazet slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
373ddecf0f4SEric Dumazet &slot->vars,
374ddecf0f4SEric Dumazet slot->backlog);
375ddecf0f4SEric Dumazet switch (red_action(q->red_parms,
376ddecf0f4SEric Dumazet &slot->vars,
377ddecf0f4SEric Dumazet slot->vars.qavg)) {
378ddecf0f4SEric Dumazet case RED_DONT_MARK:
379ddecf0f4SEric Dumazet break;
380ddecf0f4SEric Dumazet
381ddecf0f4SEric Dumazet case RED_PROB_MARK:
38225331d6cSJohn Fastabend qdisc_qstats_overlimit(sch);
383ddecf0f4SEric Dumazet if (sfq_prob_mark(q)) {
384ddecf0f4SEric Dumazet /* We know we have at least one packet in queue */
385ddecf0f4SEric Dumazet if (sfq_headdrop(q) &&
386ddecf0f4SEric Dumazet INET_ECN_set_ce(slot->skblist_next)) {
387ddecf0f4SEric Dumazet q->stats.prob_mark_head++;
388ddecf0f4SEric Dumazet break;
389ddecf0f4SEric Dumazet }
390ddecf0f4SEric Dumazet if (INET_ECN_set_ce(skb)) {
391ddecf0f4SEric Dumazet q->stats.prob_mark++;
392ddecf0f4SEric Dumazet break;
393ddecf0f4SEric Dumazet }
394ddecf0f4SEric Dumazet }
395ddecf0f4SEric Dumazet q->stats.prob_drop++;
396ddecf0f4SEric Dumazet goto congestion_drop;
397ddecf0f4SEric Dumazet
398ddecf0f4SEric Dumazet case RED_HARD_MARK:
39925331d6cSJohn Fastabend qdisc_qstats_overlimit(sch);
400ddecf0f4SEric Dumazet if (sfq_hard_mark(q)) {
401ddecf0f4SEric Dumazet /* We know we have at least one packet in queue */
402ddecf0f4SEric Dumazet if (sfq_headdrop(q) &&
403ddecf0f4SEric Dumazet INET_ECN_set_ce(slot->skblist_next)) {
404ddecf0f4SEric Dumazet q->stats.forced_mark_head++;
405ddecf0f4SEric Dumazet break;
406ddecf0f4SEric Dumazet }
407ddecf0f4SEric Dumazet if (INET_ECN_set_ce(skb)) {
408ddecf0f4SEric Dumazet q->stats.forced_mark++;
409ddecf0f4SEric Dumazet break;
410ddecf0f4SEric Dumazet }
411ddecf0f4SEric Dumazet }
412ddecf0f4SEric Dumazet q->stats.forced_drop++;
413ddecf0f4SEric Dumazet goto congestion_drop;
414ddecf0f4SEric Dumazet }
4151da177e4SLinus Torvalds }
4166f9e98f7SStephen Hemminger
41718cb8098SEric Dumazet if (slot->qlen >= q->maxdepth) {
418ddecf0f4SEric Dumazet congestion_drop:
419ddecf0f4SEric Dumazet if (!sfq_headdrop(q))
420520ac30fSEric Dumazet return qdisc_drop(skb, sch, to_free);
42132740ddcSAlexey Kuznetsov
422ddecf0f4SEric Dumazet /* We know we have at least one packet in queue */
42318cb8098SEric Dumazet head = slot_dequeue_head(slot);
424ddecf0f4SEric Dumazet delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
425ddecf0f4SEric Dumazet sch->qstats.backlog -= delta;
426ddecf0f4SEric Dumazet slot->backlog -= delta;
427520ac30fSEric Dumazet qdisc_drop(head, sch, to_free);
42818cb8098SEric Dumazet
42918cb8098SEric Dumazet slot_queue_add(slot, skb);
430325d5dc3SKonstantin Khlebnikov qdisc_tree_reduce_backlog(sch, 0, delta);
43118cb8098SEric Dumazet return NET_XMIT_CN;
43218cb8098SEric Dumazet }
43318cb8098SEric Dumazet
434ddecf0f4SEric Dumazet enqueue:
43525331d6cSJohn Fastabend qdisc_qstats_backlog_inc(sch, skb);
436ddecf0f4SEric Dumazet slot->backlog += qdisc_pkt_len(skb);
437eda83e3bSEric Dumazet slot_queue_add(slot, skb);
4381da177e4SLinus Torvalds sfq_inc(q, x);
439eda83e3bSEric Dumazet if (slot->qlen == 1) { /* The flow is new */
440eda83e3bSEric Dumazet if (q->tail == NULL) { /* It is the first flow */
441eda83e3bSEric Dumazet slot->next = x;
4421da177e4SLinus Torvalds } else {
443eda83e3bSEric Dumazet slot->next = q->tail->next;
444eda83e3bSEric Dumazet q->tail->next = x;
4451da177e4SLinus Torvalds }
446cc34eb67SEric Dumazet /* We put this flow at the end of our flow list.
447cc34eb67SEric Dumazet * This might sound unfair for a new flow to wait after old ones,
448cc34eb67SEric Dumazet * but we could endup servicing new flows only, and freeze old ones.
449cc34eb67SEric Dumazet */
450cc34eb67SEric Dumazet q->tail = slot;
451ddecf0f4SEric Dumazet /* We could use a bigger initial quantum for new flows */
45258ae7465SEric Dumazet slot->allot = q->quantum;
4531da177e4SLinus Torvalds }
4549190b3b3SEric Dumazet if (++sch->q.qlen <= q->limit)
4559871e50eSBen Greear return NET_XMIT_SUCCESS;
4561da177e4SLinus Torvalds
4578efa8854SEric Dumazet qlen = slot->qlen;
458f9ab7425SGao Feng dropped = sfq_drop(sch, to_free);
4598efa8854SEric Dumazet /* Return Congestion Notification only if we dropped a packet
4608efa8854SEric Dumazet * from this flow.
4618efa8854SEric Dumazet */
462325d5dc3SKonstantin Khlebnikov if (qlen != slot->qlen) {
463325d5dc3SKonstantin Khlebnikov qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb));
464e1738bd9SEric Dumazet return NET_XMIT_CN;
465325d5dc3SKonstantin Khlebnikov }
466e1738bd9SEric Dumazet
467e1738bd9SEric Dumazet /* As we dropped a packet, better let upper stack know this */
4682ccccf5fSWANG Cong qdisc_tree_reduce_backlog(sch, 1, dropped);
469e1738bd9SEric Dumazet return NET_XMIT_SUCCESS;
4701da177e4SLinus Torvalds }
4711da177e4SLinus Torvalds
47248a8f519SPatrick McHardy static struct sk_buff *
sfq_dequeue(struct Qdisc * sch)4731da177e4SLinus Torvalds sfq_dequeue(struct Qdisc *sch)
4741da177e4SLinus Torvalds {
4751da177e4SLinus Torvalds struct sfq_sched_data *q = qdisc_priv(sch);
4761da177e4SLinus Torvalds struct sk_buff *skb;
477aa3e2199SEric Dumazet sfq_index a, next_a;
478eda83e3bSEric Dumazet struct sfq_slot *slot;
4791da177e4SLinus Torvalds
4801da177e4SLinus Torvalds /* No active slots */
481eda83e3bSEric Dumazet if (q->tail == NULL)
4821da177e4SLinus Torvalds return NULL;
4831da177e4SLinus Torvalds
484eeaeb068SEric Dumazet next_slot:
485eda83e3bSEric Dumazet a = q->tail->next;
486eda83e3bSEric Dumazet slot = &q->slots[a];
487eeaeb068SEric Dumazet if (slot->allot <= 0) {
488eeaeb068SEric Dumazet q->tail = slot;
48958ae7465SEric Dumazet slot->allot += q->quantum;
490eeaeb068SEric Dumazet goto next_slot;
491eeaeb068SEric Dumazet }
492eda83e3bSEric Dumazet skb = slot_dequeue_head(slot);
4931da177e4SLinus Torvalds sfq_dec(q, a);
4949190b3b3SEric Dumazet qdisc_bstats_update(sch, skb);
4951da177e4SLinus Torvalds sch->q.qlen--;
49625331d6cSJohn Fastabend qdisc_qstats_backlog_dec(sch, skb);
497ddecf0f4SEric Dumazet slot->backlog -= qdisc_pkt_len(skb);
4981da177e4SLinus Torvalds /* Is the slot empty? */
499eda83e3bSEric Dumazet if (slot->qlen == 0) {
500eda83e3bSEric Dumazet q->ht[slot->hash] = SFQ_EMPTY_SLOT;
501eda83e3bSEric Dumazet next_a = slot->next;
502aa3e2199SEric Dumazet if (a == next_a) {
503eda83e3bSEric Dumazet q->tail = NULL; /* no more active slots */
5041da177e4SLinus Torvalds return skb;
5051da177e4SLinus Torvalds }
506eda83e3bSEric Dumazet q->tail->next = next_a;
507eeaeb068SEric Dumazet } else {
50858ae7465SEric Dumazet slot->allot -= qdisc_pkt_len(skb);
5091da177e4SLinus Torvalds }
5101da177e4SLinus Torvalds return skb;
5111da177e4SLinus Torvalds }
5121da177e4SLinus Torvalds
5131da177e4SLinus Torvalds static void
sfq_reset(struct Qdisc * sch)5141da177e4SLinus Torvalds sfq_reset(struct Qdisc *sch)
5151da177e4SLinus Torvalds {
5161da177e4SLinus Torvalds struct sk_buff *skb;
5171da177e4SLinus Torvalds
5181da177e4SLinus Torvalds while ((skb = sfq_dequeue(sch)) != NULL)
519fea02478SEric Dumazet rtnl_kfree_skbs(skb, skb);
5201da177e4SLinus Torvalds }
5211da177e4SLinus Torvalds
522225d9b89SEric Dumazet /*
523225d9b89SEric Dumazet * When q->perturbation is changed, we rehash all queued skbs
524225d9b89SEric Dumazet * to avoid OOO (Out Of Order) effects.
525225d9b89SEric Dumazet * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
526225d9b89SEric Dumazet * counters.
527225d9b89SEric Dumazet */
sfq_rehash(struct Qdisc * sch)52818cb8098SEric Dumazet static void sfq_rehash(struct Qdisc *sch)
529225d9b89SEric Dumazet {
53018cb8098SEric Dumazet struct sfq_sched_data *q = qdisc_priv(sch);
531225d9b89SEric Dumazet struct sk_buff *skb;
532225d9b89SEric Dumazet int i;
533225d9b89SEric Dumazet struct sfq_slot *slot;
534225d9b89SEric Dumazet struct sk_buff_head list;
53518cb8098SEric Dumazet int dropped = 0;
5362ccccf5fSWANG Cong unsigned int drop_len = 0;
537225d9b89SEric Dumazet
538225d9b89SEric Dumazet __skb_queue_head_init(&list);
539225d9b89SEric Dumazet
54018cb8098SEric Dumazet for (i = 0; i < q->maxflows; i++) {
541225d9b89SEric Dumazet slot = &q->slots[i];
542225d9b89SEric Dumazet if (!slot->qlen)
543225d9b89SEric Dumazet continue;
544225d9b89SEric Dumazet while (slot->qlen) {
545225d9b89SEric Dumazet skb = slot_dequeue_head(slot);
546225d9b89SEric Dumazet sfq_dec(q, i);
547225d9b89SEric Dumazet __skb_queue_tail(&list, skb);
548225d9b89SEric Dumazet }
549ddecf0f4SEric Dumazet slot->backlog = 0;
550ddecf0f4SEric Dumazet red_set_vars(&slot->vars);
551225d9b89SEric Dumazet q->ht[slot->hash] = SFQ_EMPTY_SLOT;
552225d9b89SEric Dumazet }
553225d9b89SEric Dumazet q->tail = NULL;
554225d9b89SEric Dumazet
555225d9b89SEric Dumazet while ((skb = __skb_dequeue(&list)) != NULL) {
556225d9b89SEric Dumazet unsigned int hash = sfq_hash(q, skb);
557225d9b89SEric Dumazet sfq_index x = q->ht[hash];
558225d9b89SEric Dumazet
559225d9b89SEric Dumazet slot = &q->slots[x];
560225d9b89SEric Dumazet if (x == SFQ_EMPTY_SLOT) {
561225d9b89SEric Dumazet x = q->dep[0].next; /* get a free slot */
56218cb8098SEric Dumazet if (x >= SFQ_MAX_FLOWS) {
56325331d6cSJohn Fastabend drop:
56425331d6cSJohn Fastabend qdisc_qstats_backlog_dec(sch, skb);
5652ccccf5fSWANG Cong drop_len += qdisc_pkt_len(skb);
56618cb8098SEric Dumazet kfree_skb(skb);
56718cb8098SEric Dumazet dropped++;
56818cb8098SEric Dumazet continue;
56918cb8098SEric Dumazet }
570225d9b89SEric Dumazet q->ht[hash] = x;
571225d9b89SEric Dumazet slot = &q->slots[x];
572225d9b89SEric Dumazet slot->hash = hash;
573225d9b89SEric Dumazet }
57418cb8098SEric Dumazet if (slot->qlen >= q->maxdepth)
57518cb8098SEric Dumazet goto drop;
576225d9b89SEric Dumazet slot_queue_add(slot, skb);
577ddecf0f4SEric Dumazet if (q->red_parms)
578ddecf0f4SEric Dumazet slot->vars.qavg = red_calc_qavg(q->red_parms,
579ddecf0f4SEric Dumazet &slot->vars,
580ddecf0f4SEric Dumazet slot->backlog);
581ddecf0f4SEric Dumazet slot->backlog += qdisc_pkt_len(skb);
582225d9b89SEric Dumazet sfq_inc(q, x);
583225d9b89SEric Dumazet if (slot->qlen == 1) { /* The flow is new */
584225d9b89SEric Dumazet if (q->tail == NULL) { /* It is the first flow */
585225d9b89SEric Dumazet slot->next = x;
586225d9b89SEric Dumazet } else {
587225d9b89SEric Dumazet slot->next = q->tail->next;
588225d9b89SEric Dumazet q->tail->next = x;
589225d9b89SEric Dumazet }
590225d9b89SEric Dumazet q->tail = slot;
59158ae7465SEric Dumazet slot->allot = q->quantum;
592225d9b89SEric Dumazet }
593225d9b89SEric Dumazet }
59418cb8098SEric Dumazet sch->q.qlen -= dropped;
5952ccccf5fSWANG Cong qdisc_tree_reduce_backlog(sch, dropped, drop_len);
596225d9b89SEric Dumazet }
597225d9b89SEric Dumazet
sfq_perturbation(struct timer_list * t)598cdeabbb8SKees Cook static void sfq_perturbation(struct timer_list *t)
5991da177e4SLinus Torvalds {
600cdeabbb8SKees Cook struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
601cdeabbb8SKees Cook struct Qdisc *sch = q->sch;
602d636fc5dSEric Dumazet spinlock_t *root_lock;
60355667441SEric Dumazet siphash_key_t nkey;
604ab18d76fSEric Dumazet int period;
6051da177e4SLinus Torvalds
60655667441SEric Dumazet get_random_bytes(&nkey, sizeof(nkey));
607d636fc5dSEric Dumazet rcu_read_lock();
608d636fc5dSEric Dumazet root_lock = qdisc_lock(qdisc_root_sleeping(sch));
609225d9b89SEric Dumazet spin_lock(root_lock);
61055667441SEric Dumazet q->perturbation = nkey;
611225d9b89SEric Dumazet if (!q->filter_list && q->tail)
61218cb8098SEric Dumazet sfq_rehash(sch);
613225d9b89SEric Dumazet spin_unlock(root_lock);
6141da177e4SLinus Torvalds
615ab18d76fSEric Dumazet /* q->perturb_period can change under us from
616ab18d76fSEric Dumazet * sfq_change() and sfq_destroy().
617ab18d76fSEric Dumazet */
618ab18d76fSEric Dumazet period = READ_ONCE(q->perturb_period);
619ab18d76fSEric Dumazet if (period)
620ab18d76fSEric Dumazet mod_timer(&q->perturb_timer, jiffies + period);
621d636fc5dSEric Dumazet rcu_read_unlock();
6221da177e4SLinus Torvalds }
6231da177e4SLinus Torvalds
sfq_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)62458ae7465SEric Dumazet static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
62558ae7465SEric Dumazet struct netlink_ext_ack *extack)
6261da177e4SLinus Torvalds {
6271da177e4SLinus Torvalds struct sfq_sched_data *q = qdisc_priv(sch);
6281e90474cSPatrick McHardy struct tc_sfq_qopt *ctl = nla_data(opt);
62918cb8098SEric Dumazet struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
6302ccccf5fSWANG Cong unsigned int qlen, dropped = 0;
631ddecf0f4SEric Dumazet struct red_parms *p = NULL;
632f9ab7425SGao Feng struct sk_buff *to_free = NULL;
633f9ab7425SGao Feng struct sk_buff *tail = NULL;
6341da177e4SLinus Torvalds
6351e90474cSPatrick McHardy if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
6361da177e4SLinus Torvalds return -EINVAL;
63718cb8098SEric Dumazet if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
63818cb8098SEric Dumazet ctl_v1 = nla_data(opt);
639119b3d38Sstephen hemminger if (ctl->divisor &&
640119b3d38Sstephen hemminger (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
641119b3d38Sstephen hemminger return -EINVAL;
642df4953e4SEric Dumazet
64358ae7465SEric Dumazet if ((int)ctl->quantum < 0) {
64458ae7465SEric Dumazet NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
645df4953e4SEric Dumazet return -EINVAL;
646df4953e4SEric Dumazet }
6478afa10cbSNogah Frankel if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
648e323d865SEric Dumazet ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
6498afa10cbSNogah Frankel return -EINVAL;
650ddecf0f4SEric Dumazet if (ctl_v1 && ctl_v1->qth_min) {
651ddecf0f4SEric Dumazet p = kmalloc(sizeof(*p), GFP_KERNEL);
652ddecf0f4SEric Dumazet if (!p)
653ddecf0f4SEric Dumazet return -ENOMEM;
654ddecf0f4SEric Dumazet }
655*833e9a1cSOctavian Purdila if (ctl->limit == 1) {
656*833e9a1cSOctavian Purdila NL_SET_ERR_MSG_MOD(extack, "invalid limit");
657*833e9a1cSOctavian Purdila return -EINVAL;
658*833e9a1cSOctavian Purdila }
6591da177e4SLinus Torvalds sch_tree_lock(sch);
66058ae7465SEric Dumazet if (ctl->quantum)
66118cb8098SEric Dumazet q->quantum = ctl->quantum;
662ab18d76fSEric Dumazet WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
66318cb8098SEric Dumazet if (ctl->flows)
66418cb8098SEric Dumazet q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
66518cb8098SEric Dumazet if (ctl->divisor) {
666817fb15dSEric Dumazet q->divisor = ctl->divisor;
66718cb8098SEric Dumazet q->maxflows = min_t(u32, q->maxflows, q->divisor);
66818cb8098SEric Dumazet }
66918cb8098SEric Dumazet if (ctl_v1) {
67018cb8098SEric Dumazet if (ctl_v1->depth)
67118cb8098SEric Dumazet q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
672ddecf0f4SEric Dumazet if (p) {
673ddecf0f4SEric Dumazet swap(q->red_parms, p);
674ddecf0f4SEric Dumazet red_set_parms(q->red_parms,
675ddecf0f4SEric Dumazet ctl_v1->qth_min, ctl_v1->qth_max,
676ddecf0f4SEric Dumazet ctl_v1->Wlog,
677ddecf0f4SEric Dumazet ctl_v1->Plog, ctl_v1->Scell_log,
678ddecf0f4SEric Dumazet NULL,
679ddecf0f4SEric Dumazet ctl_v1->max_P);
680ddecf0f4SEric Dumazet }
681ddecf0f4SEric Dumazet q->flags = ctl_v1->flags;
68218cb8098SEric Dumazet q->headdrop = ctl_v1->headdrop;
68318cb8098SEric Dumazet }
68418cb8098SEric Dumazet if (ctl->limit) {
68518cb8098SEric Dumazet q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
68618cb8098SEric Dumazet q->maxflows = min_t(u32, q->maxflows, q->limit);
68718cb8098SEric Dumazet }
68818cb8098SEric Dumazet
6895e50da01SPatrick McHardy qlen = sch->q.qlen;
690f9ab7425SGao Feng while (sch->q.qlen > q->limit) {
691f9ab7425SGao Feng dropped += sfq_drop(sch, &to_free);
692f9ab7425SGao Feng if (!tail)
693f9ab7425SGao Feng tail = to_free;
694f9ab7425SGao Feng }
695f9ab7425SGao Feng
696f9ab7425SGao Feng rtnl_kfree_skbs(to_free, tail);
6972ccccf5fSWANG Cong qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
6981da177e4SLinus Torvalds
6991da177e4SLinus Torvalds del_timer(&q->perturb_timer);
7001da177e4SLinus Torvalds if (q->perturb_period) {
70132740ddcSAlexey Kuznetsov mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
70255667441SEric Dumazet get_random_bytes(&q->perturbation, sizeof(q->perturbation));
7031da177e4SLinus Torvalds }
7041da177e4SLinus Torvalds sch_tree_unlock(sch);
705ddecf0f4SEric Dumazet kfree(p);
7061da177e4SLinus Torvalds return 0;
7071da177e4SLinus Torvalds }
7081da177e4SLinus Torvalds
sfq_alloc(size_t sz)709bd16a6ccSEric Dumazet static void *sfq_alloc(size_t sz)
710bd16a6ccSEric Dumazet {
711752ade68SMichal Hocko return kvmalloc(sz, GFP_KERNEL);
712bd16a6ccSEric Dumazet }
713bd16a6ccSEric Dumazet
sfq_free(void * addr)714bd16a6ccSEric Dumazet static void sfq_free(void *addr)
715bd16a6ccSEric Dumazet {
7164cb28970SWANG Cong kvfree(addr);
717bd16a6ccSEric Dumazet }
718bd16a6ccSEric Dumazet
sfq_destroy(struct Qdisc * sch)719bd16a6ccSEric Dumazet static void sfq_destroy(struct Qdisc *sch)
720bd16a6ccSEric Dumazet {
721bd16a6ccSEric Dumazet struct sfq_sched_data *q = qdisc_priv(sch);
722bd16a6ccSEric Dumazet
7236529eabaSJiri Pirko tcf_block_put(q->block);
724ab18d76fSEric Dumazet WRITE_ONCE(q->perturb_period, 0);
725bd16a6ccSEric Dumazet del_timer_sync(&q->perturb_timer);
726bd16a6ccSEric Dumazet sfq_free(q->ht);
72718cb8098SEric Dumazet sfq_free(q->slots);
728ddecf0f4SEric Dumazet kfree(q->red_parms);
729bd16a6ccSEric Dumazet }
730bd16a6ccSEric Dumazet
sfq_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)731e63d7dfdSAlexander Aring static int sfq_init(struct Qdisc *sch, struct nlattr *opt,
732e63d7dfdSAlexander Aring struct netlink_ext_ack *extack)
7331da177e4SLinus Torvalds {
7341da177e4SLinus Torvalds struct sfq_sched_data *q = qdisc_priv(sch);
7351da177e4SLinus Torvalds int i;
7366529eabaSJiri Pirko int err;
7376529eabaSJiri Pirko
738f85729d0SPaolo Abeni q->sch = sch;
739cdeabbb8SKees Cook timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
740e2326576SNikolay Aleksandrov
7418d1a77f9SAlexander Aring err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
7426529eabaSJiri Pirko if (err)
7436529eabaSJiri Pirko return err;
7441da177e4SLinus Torvalds
74518cb8098SEric Dumazet for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
74618cb8098SEric Dumazet q->dep[i].next = i + SFQ_MAX_FLOWS;
74718cb8098SEric Dumazet q->dep[i].prev = i + SFQ_MAX_FLOWS;
7481da177e4SLinus Torvalds }
7496f9e98f7SStephen Hemminger
75018cb8098SEric Dumazet q->limit = SFQ_MAX_DEPTH;
75118cb8098SEric Dumazet q->maxdepth = SFQ_MAX_DEPTH;
752eda83e3bSEric Dumazet q->cur_depth = 0;
753eda83e3bSEric Dumazet q->tail = NULL;
754817fb15dSEric Dumazet q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
75518cb8098SEric Dumazet q->maxflows = SFQ_DEFAULT_FLOWS;
7565ce2d488SDavid S. Miller q->quantum = psched_mtu(qdisc_dev(sch));
7571da177e4SLinus Torvalds q->perturb_period = 0;
75855667441SEric Dumazet get_random_bytes(&q->perturbation, sizeof(q->perturbation));
75902a9098eSEric Dumazet
76002a9098eSEric Dumazet if (opt) {
76158ae7465SEric Dumazet int err = sfq_change(sch, opt, extack);
7621da177e4SLinus Torvalds if (err)
7631da177e4SLinus Torvalds return err;
7641da177e4SLinus Torvalds }
7656f9e98f7SStephen Hemminger
766bd16a6ccSEric Dumazet q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
76718cb8098SEric Dumazet q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
76818cb8098SEric Dumazet if (!q->ht || !q->slots) {
76987b60cfaSEric Dumazet /* Note: sfq_destroy() will be called by our caller */
770817fb15dSEric Dumazet return -ENOMEM;
771bd16a6ccSEric Dumazet }
77287b60cfaSEric Dumazet
773817fb15dSEric Dumazet for (i = 0; i < q->divisor; i++)
774817fb15dSEric Dumazet q->ht[i] = SFQ_EMPTY_SLOT;
775817fb15dSEric Dumazet
77618cb8098SEric Dumazet for (i = 0; i < q->maxflows; i++) {
77718c8d82aSEric Dumazet slot_queue_init(&q->slots[i]);
7781da177e4SLinus Torvalds sfq_link(q, i);
77918c8d82aSEric Dumazet }
78023624935SEric Dumazet if (q->limit >= 1)
78123624935SEric Dumazet sch->flags |= TCQ_F_CAN_BYPASS;
78223624935SEric Dumazet else
78323624935SEric Dumazet sch->flags &= ~TCQ_F_CAN_BYPASS;
7841da177e4SLinus Torvalds return 0;
7851da177e4SLinus Torvalds }
7861da177e4SLinus Torvalds
sfq_dump(struct Qdisc * sch,struct sk_buff * skb)7871da177e4SLinus Torvalds static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
7881da177e4SLinus Torvalds {
7891da177e4SLinus Torvalds struct sfq_sched_data *q = qdisc_priv(sch);
79027a884dcSArnaldo Carvalho de Melo unsigned char *b = skb_tail_pointer(skb);
79118cb8098SEric Dumazet struct tc_sfq_qopt_v1 opt;
792ddecf0f4SEric Dumazet struct red_parms *p = q->red_parms;
7931da177e4SLinus Torvalds
79418cb8098SEric Dumazet memset(&opt, 0, sizeof(opt));
79518cb8098SEric Dumazet opt.v0.quantum = q->quantum;
79618cb8098SEric Dumazet opt.v0.perturb_period = q->perturb_period / HZ;
79718cb8098SEric Dumazet opt.v0.limit = q->limit;
79818cb8098SEric Dumazet opt.v0.divisor = q->divisor;
79918cb8098SEric Dumazet opt.v0.flows = q->maxflows;
80018cb8098SEric Dumazet opt.depth = q->maxdepth;
80118cb8098SEric Dumazet opt.headdrop = q->headdrop;
8021da177e4SLinus Torvalds
803ddecf0f4SEric Dumazet if (p) {
804ddecf0f4SEric Dumazet opt.qth_min = p->qth_min >> p->Wlog;
805ddecf0f4SEric Dumazet opt.qth_max = p->qth_max >> p->Wlog;
806ddecf0f4SEric Dumazet opt.Wlog = p->Wlog;
807ddecf0f4SEric Dumazet opt.Plog = p->Plog;
808ddecf0f4SEric Dumazet opt.Scell_log = p->Scell_log;
809ddecf0f4SEric Dumazet opt.max_P = p->max_P;
810ddecf0f4SEric Dumazet }
811ddecf0f4SEric Dumazet memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
812ddecf0f4SEric Dumazet opt.flags = q->flags;
813ddecf0f4SEric Dumazet
8141b34ec43SDavid S. Miller if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
8151b34ec43SDavid S. Miller goto nla_put_failure;
8161da177e4SLinus Torvalds
8171da177e4SLinus Torvalds return skb->len;
8181da177e4SLinus Torvalds
8191e90474cSPatrick McHardy nla_put_failure:
820dc5fc579SArnaldo Carvalho de Melo nlmsg_trim(skb, b);
8211da177e4SLinus Torvalds return -1;
8221da177e4SLinus Torvalds }
8231da177e4SLinus Torvalds
sfq_leaf(struct Qdisc * sch,unsigned long arg)82441065fbaSJarek Poplawski static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
82541065fbaSJarek Poplawski {
82641065fbaSJarek Poplawski return NULL;
82741065fbaSJarek Poplawski }
82841065fbaSJarek Poplawski
sfq_find(struct Qdisc * sch,u32 classid)829143976ceSWANG Cong static unsigned long sfq_find(struct Qdisc *sch, u32 classid)
8307d2681a6SPatrick McHardy {
8317d2681a6SPatrick McHardy return 0;
8327d2681a6SPatrick McHardy }
8337d2681a6SPatrick McHardy
sfq_bind(struct Qdisc * sch,unsigned long parent,u32 classid)834eb4a5527SJarek Poplawski static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
835eb4a5527SJarek Poplawski u32 classid)
836eb4a5527SJarek Poplawski {
837eb4a5527SJarek Poplawski return 0;
838eb4a5527SJarek Poplawski }
839eb4a5527SJarek Poplawski
sfq_unbind(struct Qdisc * q,unsigned long cl)840143976ceSWANG Cong static void sfq_unbind(struct Qdisc *q, unsigned long cl)
841da7115d9SJarek Poplawski {
842da7115d9SJarek Poplawski }
843da7115d9SJarek Poplawski
sfq_tcf_block(struct Qdisc * sch,unsigned long cl,struct netlink_ext_ack * extack)844cbaacc4eSAlexander Aring static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl,
845cbaacc4eSAlexander Aring struct netlink_ext_ack *extack)
8467d2681a6SPatrick McHardy {
8477d2681a6SPatrick McHardy struct sfq_sched_data *q = qdisc_priv(sch);
8487d2681a6SPatrick McHardy
8497d2681a6SPatrick McHardy if (cl)
8507d2681a6SPatrick McHardy return NULL;
8516529eabaSJiri Pirko return q->block;
8527d2681a6SPatrick McHardy }
8537d2681a6SPatrick McHardy
sfq_dump_class(struct Qdisc * sch,unsigned long cl,struct sk_buff * skb,struct tcmsg * tcm)85494de78d1SPatrick McHardy static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
85594de78d1SPatrick McHardy struct sk_buff *skb, struct tcmsg *tcm)
85694de78d1SPatrick McHardy {
85794de78d1SPatrick McHardy tcm->tcm_handle |= TC_H_MIN(cl);
85894de78d1SPatrick McHardy return 0;
85994de78d1SPatrick McHardy }
86094de78d1SPatrick McHardy
sfq_dump_class_stats(struct Qdisc * sch,unsigned long cl,struct gnet_dump * d)86194de78d1SPatrick McHardy static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
86294de78d1SPatrick McHardy struct gnet_dump *d)
86394de78d1SPatrick McHardy {
86494de78d1SPatrick McHardy struct sfq_sched_data *q = qdisc_priv(sch);
865ee09b3c1SEric Dumazet sfq_index idx = q->ht[cl - 1];
866ee09b3c1SEric Dumazet struct gnet_stats_queue qs = { 0 };
867ee09b3c1SEric Dumazet struct tc_sfq_xstats xstats = { 0 };
868c4266263SEric Dumazet
869ee09b3c1SEric Dumazet if (idx != SFQ_EMPTY_SLOT) {
870ee09b3c1SEric Dumazet const struct sfq_slot *slot = &q->slots[idx];
871ee09b3c1SEric Dumazet
87258ae7465SEric Dumazet xstats.allot = slot->allot;
873ee09b3c1SEric Dumazet qs.qlen = slot->qlen;
874ddecf0f4SEric Dumazet qs.backlog = slot->backlog;
875ee09b3c1SEric Dumazet }
876b0ab6f92SJohn Fastabend if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
87794de78d1SPatrick McHardy return -1;
87894de78d1SPatrick McHardy return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
87994de78d1SPatrick McHardy }
88094de78d1SPatrick McHardy
sfq_walk(struct Qdisc * sch,struct qdisc_walker * arg)8817d2681a6SPatrick McHardy static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
8827d2681a6SPatrick McHardy {
88394de78d1SPatrick McHardy struct sfq_sched_data *q = qdisc_priv(sch);
88494de78d1SPatrick McHardy unsigned int i;
88594de78d1SPatrick McHardy
88694de78d1SPatrick McHardy if (arg->stop)
8877d2681a6SPatrick McHardy return;
88894de78d1SPatrick McHardy
889817fb15dSEric Dumazet for (i = 0; i < q->divisor; i++) {
890e046fa89SZhengchao Shao if (q->ht[i] == SFQ_EMPTY_SLOT) {
89194de78d1SPatrick McHardy arg->count++;
89294de78d1SPatrick McHardy continue;
89394de78d1SPatrick McHardy }
894e046fa89SZhengchao Shao if (!tc_qdisc_stats_dump(sch, i + 1, arg))
89594de78d1SPatrick McHardy break;
89694de78d1SPatrick McHardy }
8977d2681a6SPatrick McHardy }
8987d2681a6SPatrick McHardy
8997d2681a6SPatrick McHardy static const struct Qdisc_class_ops sfq_class_ops = {
90041065fbaSJarek Poplawski .leaf = sfq_leaf,
901143976ceSWANG Cong .find = sfq_find,
9026529eabaSJiri Pirko .tcf_block = sfq_tcf_block,
903eb4a5527SJarek Poplawski .bind_tcf = sfq_bind,
904143976ceSWANG Cong .unbind_tcf = sfq_unbind,
90594de78d1SPatrick McHardy .dump = sfq_dump_class,
90694de78d1SPatrick McHardy .dump_stats = sfq_dump_class_stats,
9077d2681a6SPatrick McHardy .walk = sfq_walk,
9087d2681a6SPatrick McHardy };
9097d2681a6SPatrick McHardy
91020fea08bSEric Dumazet static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
9117d2681a6SPatrick McHardy .cl_ops = &sfq_class_ops,
9121da177e4SLinus Torvalds .id = "sfq",
9131da177e4SLinus Torvalds .priv_size = sizeof(struct sfq_sched_data),
9141da177e4SLinus Torvalds .enqueue = sfq_enqueue,
9151da177e4SLinus Torvalds .dequeue = sfq_dequeue,
91607bd8df5SEric Dumazet .peek = qdisc_peek_dequeued,
9171da177e4SLinus Torvalds .init = sfq_init,
9181da177e4SLinus Torvalds .reset = sfq_reset,
9191da177e4SLinus Torvalds .destroy = sfq_destroy,
9201da177e4SLinus Torvalds .change = NULL,
9211da177e4SLinus Torvalds .dump = sfq_dump,
9221da177e4SLinus Torvalds .owner = THIS_MODULE,
9231da177e4SLinus Torvalds };
9241da177e4SLinus Torvalds
sfq_module_init(void)9251da177e4SLinus Torvalds static int __init sfq_module_init(void)
9261da177e4SLinus Torvalds {
9271da177e4SLinus Torvalds return register_qdisc(&sfq_qdisc_ops);
9281da177e4SLinus Torvalds }
sfq_module_exit(void)9291da177e4SLinus Torvalds static void __exit sfq_module_exit(void)
9301da177e4SLinus Torvalds {
9311da177e4SLinus Torvalds unregister_qdisc(&sfq_qdisc_ops);
9321da177e4SLinus Torvalds }
9331da177e4SLinus Torvalds module_init(sfq_module_init)
9341da177e4SLinus Torvalds module_exit(sfq_module_exit)
9351da177e4SLinus Torvalds MODULE_LICENSE("GPL");
936