xref: /openbmc/linux/net/sched/sch_gred.c (revision 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2)
1 /*
2  * net/sched/sch_gred.c	Generic Random Early Detection queue.
3  *
4  *
5  *              This program is free software; you can redistribute it and/or
6  *              modify it under the terms of the GNU General Public License
7  *              as published by the Free Software Foundation; either version
8  *              2 of the License, or (at your option) any later version.
9  *
10  * Authors:    J Hadi Salim (hadi@cyberus.ca) 1998-2002
11  *
12  *             991129: -  Bug fix with grio mode
13  *		       - a better sing. AvgQ mode with Grio(WRED)
14  *		       - A finer grained VQ dequeue based on sugestion
15  *		         from Ren Liu
16  *		       - More error checks
17  *
18  *
19  *
20  *  For all the glorious comments look at Alexey's sch_red.c
21  */
22 
23 #include <linux/config.h>
24 #include <linux/module.h>
25 #include <asm/uaccess.h>
26 #include <asm/system.h>
27 #include <linux/bitops.h>
28 #include <linux/types.h>
29 #include <linux/kernel.h>
30 #include <linux/sched.h>
31 #include <linux/string.h>
32 #include <linux/mm.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/in.h>
36 #include <linux/errno.h>
37 #include <linux/interrupt.h>
38 #include <linux/if_ether.h>
39 #include <linux/inet.h>
40 #include <linux/netdevice.h>
41 #include <linux/etherdevice.h>
42 #include <linux/notifier.h>
43 #include <net/ip.h>
44 #include <net/route.h>
45 #include <linux/skbuff.h>
46 #include <net/sock.h>
47 #include <net/pkt_sched.h>
48 
49 #if 1 /* control */
50 #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
51 #else
52 #define DPRINTK(format,args...)
53 #endif
54 
55 #if 0 /* data */
56 #define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
57 #else
58 #define D2PRINTK(format,args...)
59 #endif
60 
61 struct gred_sched_data;
62 struct gred_sched;
63 
64 struct gred_sched_data
65 {
66 /* Parameters */
67 	u32		limit;		/* HARD maximal queue length	*/
68 	u32		qth_min;	/* Min average length threshold: A scaled */
69 	u32		qth_max;	/* Max average length threshold: A scaled */
70 	u32      	DP;		/* the drop pramaters */
71 	char		Wlog;		/* log(W)		*/
72 	char		Plog;		/* random number bits	*/
73 	u32		Scell_max;
74 	u32		Rmask;
75 	u32		bytesin;	/* bytes seen on virtualQ so far*/
76 	u32		packetsin;	/* packets seen on virtualQ so far*/
77 	u32		backlog;	/* bytes on the virtualQ */
78 	u32		forced;	/* packets dropped for exceeding limits */
79 	u32		early;	/* packets dropped as a warning */
80 	u32		other;	/* packets dropped by invoking drop() */
81 	u32		pdrop;	/* packets dropped because we exceeded physical queue limits */
82 	char		Scell_log;
83 	u8		Stab[256];
84 	u8              prio;        /* the prio of this vq */
85 
86 /* Variables */
87 	unsigned long	qave;		/* Average queue length: A scaled */
88 	int		qcount;		/* Packets since last random number generation */
89 	u32		qR;		/* Cached random number */
90 
91 	psched_time_t	qidlestart;	/* Start of idle period	*/
92 };
93 
94 struct gred_sched
95 {
96 	struct gred_sched_data *tab[MAX_DPs];
97 	u32 		DPs;
98 	u32 		def;
99 	u8 		initd;
100 	u8 		grio;
101 	u8 		eqp;
102 };
103 
104 static int
105 gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
106 {
107 	psched_time_t now;
108 	struct gred_sched_data *q=NULL;
109 	struct gred_sched *t= qdisc_priv(sch);
110 	unsigned long	qave=0;
111 	int i=0;
112 
113 	if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) {
114 		D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n");
115 		goto do_enqueue;
116 	}
117 
118 
119 	if ( ((skb->tc_index&0xf) > (t->DPs -1)) || !(q=t->tab[skb->tc_index&0xf])) {
120 		printk("GRED: setting to default (%d)\n ",t->def);
121 		if (!(q=t->tab[t->def])) {
122 			DPRINTK("GRED: setting to default FAILED! dropping!! "
123 			    "(%d)\n ", t->def);
124 			goto drop;
125 		}
126 		/* fix tc_index? --could be controvesial but needed for
127 		   requeueing */
128 		skb->tc_index=(skb->tc_index&0xfffffff0) | t->def;
129 	}
130 
131 	D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d "
132 	    "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog,
133 	    sch->qstats.backlog);
134 	/* sum up all the qaves of prios <= to ours to get the new qave*/
135 	if (!t->eqp && t->grio) {
136 		for (i=0;i<t->DPs;i++) {
137 			if ((!t->tab[i]) || (i==q->DP))
138 				continue;
139 
140 			if ((t->tab[i]->prio < q->prio) && (PSCHED_IS_PASTPERFECT(t->tab[i]->qidlestart)))
141 				qave +=t->tab[i]->qave;
142 		}
143 
144 	}
145 
146 	q->packetsin++;
147 	q->bytesin+=skb->len;
148 
149 	if (t->eqp && t->grio) {
150 		qave=0;
151 		q->qave=t->tab[t->def]->qave;
152 		q->qidlestart=t->tab[t->def]->qidlestart;
153 	}
154 
155 	if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
156 		long us_idle;
157 		PSCHED_GET_TIME(now);
158 		us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
159 		PSCHED_SET_PASTPERFECT(q->qidlestart);
160 
161 		q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF];
162 	} else {
163 		if (t->eqp) {
164 			q->qave += sch->qstats.backlog - (q->qave >> q->Wlog);
165 		} else {
166 			q->qave += q->backlog - (q->qave >> q->Wlog);
167 		}
168 
169 	}
170 
171 
172 	if (t->eqp && t->grio)
173 		t->tab[t->def]->qave=q->qave;
174 
175 	if ((q->qave+qave) < q->qth_min) {
176 		q->qcount = -1;
177 enqueue:
178 		if (q->backlog + skb->len <= q->limit) {
179 			q->backlog += skb->len;
180 do_enqueue:
181 			__skb_queue_tail(&sch->q, skb);
182 			sch->qstats.backlog += skb->len;
183 			sch->bstats.bytes += skb->len;
184 			sch->bstats.packets++;
185 			return 0;
186 		} else {
187 			q->pdrop++;
188 		}
189 
190 drop:
191 		kfree_skb(skb);
192 		sch->qstats.drops++;
193 		return NET_XMIT_DROP;
194 	}
195 	if ((q->qave+qave) >= q->qth_max) {
196 		q->qcount = -1;
197 		sch->qstats.overlimits++;
198 		q->forced++;
199 		goto drop;
200 	}
201 	if (++q->qcount) {
202 		if ((((qave+q->qave) - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
203 			goto enqueue;
204 		q->qcount = 0;
205 		q->qR = net_random()&q->Rmask;
206 		sch->qstats.overlimits++;
207 		q->early++;
208 		goto drop;
209 	}
210 	q->qR = net_random()&q->Rmask;
211 	goto enqueue;
212 }
213 
214 static int
215 gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
216 {
217 	struct gred_sched_data *q;
218 	struct gred_sched *t= qdisc_priv(sch);
219 	q= t->tab[(skb->tc_index&0xf)];
220 /* error checking here -- probably unnecessary */
221 	PSCHED_SET_PASTPERFECT(q->qidlestart);
222 
223 	__skb_queue_head(&sch->q, skb);
224 	sch->qstats.backlog += skb->len;
225 	sch->qstats.requeues++;
226 	q->backlog += skb->len;
227 	return 0;
228 }
229 
230 static struct sk_buff *
231 gred_dequeue(struct Qdisc* sch)
232 {
233 	struct sk_buff *skb;
234 	struct gred_sched_data *q;
235 	struct gred_sched *t= qdisc_priv(sch);
236 
237 	skb = __skb_dequeue(&sch->q);
238 	if (skb) {
239 		sch->qstats.backlog -= skb->len;
240 		q= t->tab[(skb->tc_index&0xf)];
241 		if (q) {
242 			q->backlog -= skb->len;
243 			if (!q->backlog && !t->eqp)
244 				PSCHED_GET_TIME(q->qidlestart);
245 		} else {
246 			D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf);
247 		}
248 		return skb;
249 	}
250 
251 	if (t->eqp) {
252 			q= t->tab[t->def];
253 			if (!q)
254 				D2PRINTK("no default VQ set: Results will be "
255 				       "screwed up\n");
256 			else
257 				PSCHED_GET_TIME(q->qidlestart);
258 	}
259 
260 	return NULL;
261 }
262 
263 static unsigned int gred_drop(struct Qdisc* sch)
264 {
265 	struct sk_buff *skb;
266 
267 	struct gred_sched_data *q;
268 	struct gred_sched *t= qdisc_priv(sch);
269 
270 	skb = __skb_dequeue_tail(&sch->q);
271 	if (skb) {
272 		unsigned int len = skb->len;
273 		sch->qstats.backlog -= len;
274 		sch->qstats.drops++;
275 		q= t->tab[(skb->tc_index&0xf)];
276 		if (q) {
277 			q->backlog -= len;
278 			q->other++;
279 			if (!q->backlog && !t->eqp)
280 				PSCHED_GET_TIME(q->qidlestart);
281 		} else {
282 			D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf);
283 		}
284 
285 		kfree_skb(skb);
286 		return len;
287 	}
288 
289 	q=t->tab[t->def];
290 	if (!q) {
291 		D2PRINTK("no default VQ set: Results might be screwed up\n");
292 		return 0;
293 	}
294 
295 	PSCHED_GET_TIME(q->qidlestart);
296 	return 0;
297 
298 }
299 
300 static void gred_reset(struct Qdisc* sch)
301 {
302 	int i;
303 	struct gred_sched_data *q;
304 	struct gred_sched *t= qdisc_priv(sch);
305 
306 	__skb_queue_purge(&sch->q);
307 
308 	sch->qstats.backlog = 0;
309 
310         for (i=0;i<t->DPs;i++) {
311 	        q= t->tab[i];
312 		if (!q)
313 			continue;
314 		PSCHED_SET_PASTPERFECT(q->qidlestart);
315 		q->qave = 0;
316 		q->qcount = -1;
317 		q->backlog = 0;
318 		q->other=0;
319 		q->forced=0;
320 		q->pdrop=0;
321 		q->early=0;
322 	}
323 }
324 
325 static int gred_change(struct Qdisc *sch, struct rtattr *opt)
326 {
327 	struct gred_sched *table = qdisc_priv(sch);
328 	struct gred_sched_data *q;
329 	struct tc_gred_qopt *ctl;
330 	struct tc_gred_sopt *sopt;
331 	struct rtattr *tb[TCA_GRED_STAB];
332 	struct rtattr *tb2[TCA_GRED_DPS];
333 	int i;
334 
335 	if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt))
336 		return -EINVAL;
337 
338 	if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) {
339 		rtattr_parse_nested(tb2, TCA_GRED_DPS, opt);
340 
341 	    if (tb2[TCA_GRED_DPS-1] == 0)
342 			return -EINVAL;
343 
344 		sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
345 		table->DPs=sopt->DPs;
346 		table->def=sopt->def_DP;
347 		table->grio=sopt->grio;
348 		table->initd=0;
349 		/* probably need to clear all the table DP entries as well */
350 		return 0;
351 	    }
352 
353 
354 	if (!table->DPs || tb[TCA_GRED_PARMS-1] == 0 || tb[TCA_GRED_STAB-1] == 0 ||
355 		RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) ||
356 		RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256)
357 			return -EINVAL;
358 
359 	ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]);
360 	if (ctl->DP > MAX_DPs-1 ) {
361 		/* misbehaving is punished! Put in the default drop probability */
362 		DPRINTK("\nGRED: DP %u not in  the proper range fixed. New DP "
363 			"set to default at %d\n",ctl->DP,table->def);
364 		ctl->DP=table->def;
365 	}
366 
367 	if (table->tab[ctl->DP] == NULL) {
368 		table->tab[ctl->DP]=kmalloc(sizeof(struct gred_sched_data),
369 					    GFP_KERNEL);
370 		if (NULL == table->tab[ctl->DP])
371 			return -ENOMEM;
372 		memset(table->tab[ctl->DP], 0, (sizeof(struct gred_sched_data)));
373 	}
374 	q= table->tab[ctl->DP];
375 
376 	if (table->grio) {
377 		if (ctl->prio <=0) {
378 			if (table->def && table->tab[table->def]) {
379 				DPRINTK("\nGRED: DP %u does not have a prio"
380 					"setting default to %d\n",ctl->DP,
381 					table->tab[table->def]->prio);
382 				q->prio=table->tab[table->def]->prio;
383 			} else {
384 				DPRINTK("\nGRED: DP %u does not have a prio"
385 					" setting default to 8\n",ctl->DP);
386 				q->prio=8;
387 			}
388 		} else {
389 			q->prio=ctl->prio;
390 		}
391 	} else {
392 		q->prio=8;
393 	}
394 
395 
396 	q->DP=ctl->DP;
397 	q->Wlog = ctl->Wlog;
398 	q->Plog = ctl->Plog;
399 	q->limit = ctl->limit;
400 	q->Scell_log = ctl->Scell_log;
401 	q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
402 	q->Scell_max = (255<<q->Scell_log);
403 	q->qth_min = ctl->qth_min<<ctl->Wlog;
404 	q->qth_max = ctl->qth_max<<ctl->Wlog;
405 	q->qave=0;
406 	q->backlog=0;
407 	q->qcount = -1;
408 	q->other=0;
409 	q->forced=0;
410 	q->pdrop=0;
411 	q->early=0;
412 
413 	PSCHED_SET_PASTPERFECT(q->qidlestart);
414 	memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
415 
416 	if ( table->initd && table->grio) {
417 	/* this looks ugly but it's not in the fast path */
418 		for (i=0;i<table->DPs;i++) {
419 			if ((!table->tab[i]) || (i==q->DP) )
420 				continue;
421 			if (table->tab[i]->prio == q->prio ){
422 				/* WRED mode detected */
423 				table->eqp=1;
424 				break;
425 			}
426 		}
427 	}
428 
429 	if (!table->initd) {
430 		table->initd=1;
431 		/*
432         	the first entry also goes into the default until
433         	over-written
434 		*/
435 
436 		if (table->tab[table->def] == NULL) {
437 			table->tab[table->def]=
438 				kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL);
439 			if (NULL == table->tab[table->def])
440 				return -ENOMEM;
441 
442 			memset(table->tab[table->def], 0,
443 			       (sizeof(struct gred_sched_data)));
444 		}
445 		q= table->tab[table->def];
446 		q->DP=table->def;
447 		q->Wlog = ctl->Wlog;
448 		q->Plog = ctl->Plog;
449 		q->limit = ctl->limit;
450 		q->Scell_log = ctl->Scell_log;
451 		q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
452 		q->Scell_max = (255<<q->Scell_log);
453 		q->qth_min = ctl->qth_min<<ctl->Wlog;
454 		q->qth_max = ctl->qth_max<<ctl->Wlog;
455 
456 		if (table->grio)
457 			q->prio=table->tab[ctl->DP]->prio;
458 		else
459 			q->prio=8;
460 
461 		q->qcount = -1;
462 		PSCHED_SET_PASTPERFECT(q->qidlestart);
463 		memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
464 	}
465 	return 0;
466 
467 }
468 
469 static int gred_init(struct Qdisc *sch, struct rtattr *opt)
470 {
471 	struct gred_sched *table = qdisc_priv(sch);
472 	struct tc_gred_sopt *sopt;
473 	struct rtattr *tb[TCA_GRED_STAB];
474 	struct rtattr *tb2[TCA_GRED_DPS];
475 
476 	if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt))
477 		return -EINVAL;
478 
479 	if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) {
480 		rtattr_parse_nested(tb2, TCA_GRED_DPS, opt);
481 
482 	    if (tb2[TCA_GRED_DPS-1] == 0)
483 			return -EINVAL;
484 
485 		sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
486 		table->DPs=sopt->DPs;
487 		table->def=sopt->def_DP;
488 		table->grio=sopt->grio;
489 		table->initd=0;
490 		return 0;
491 	}
492 
493 	DPRINTK("\n GRED_INIT error!\n");
494 	return -EINVAL;
495 }
496 
497 static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
498 {
499 	unsigned long qave;
500 	struct rtattr *rta;
501 	struct tc_gred_qopt *opt = NULL ;
502 	struct tc_gred_qopt *dst;
503 	struct gred_sched *table = qdisc_priv(sch);
504 	struct gred_sched_data *q;
505 	int i;
506 	unsigned char	 *b = skb->tail;
507 
508 	rta = (struct rtattr*)b;
509 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
510 
511 	opt=kmalloc(sizeof(struct tc_gred_qopt)*MAX_DPs, GFP_KERNEL);
512 
513 	if (opt  == NULL) {
514 		DPRINTK("gred_dump:failed to malloc for %Zd\n",
515 		    sizeof(struct tc_gred_qopt)*MAX_DPs);
516 		goto rtattr_failure;
517 	}
518 
519 	memset(opt, 0, (sizeof(struct tc_gred_qopt))*table->DPs);
520 
521 	if (!table->initd) {
522 		DPRINTK("NO GRED Queues setup!\n");
523 	}
524 
525 	for (i=0;i<MAX_DPs;i++) {
526 		dst= &opt[i];
527 		q= table->tab[i];
528 
529 		if (!q) {
530 			/* hack -- fix at some point with proper message
531 			   This is how we indicate to tc that there is no VQ
532 			   at this DP */
533 
534 			dst->DP=MAX_DPs+i;
535 			continue;
536 		}
537 
538 		dst->limit=q->limit;
539 		dst->qth_min=q->qth_min>>q->Wlog;
540 		dst->qth_max=q->qth_max>>q->Wlog;
541 		dst->DP=q->DP;
542 		dst->backlog=q->backlog;
543 		if (q->qave) {
544 			if (table->eqp && table->grio) {
545 				q->qidlestart=table->tab[table->def]->qidlestart;
546 				q->qave=table->tab[table->def]->qave;
547 			}
548 			if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
549 				long idle;
550 				psched_time_t now;
551 				PSCHED_GET_TIME(now);
552 				idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
553 				qave  = q->qave >> q->Stab[(idle>>q->Scell_log)&0xFF];
554 				dst->qave = qave >> q->Wlog;
555 
556 			} else {
557 				dst->qave = q->qave >> q->Wlog;
558 			}
559 		} else {
560 			dst->qave = 0;
561 		}
562 
563 
564 		dst->Wlog = q->Wlog;
565 		dst->Plog = q->Plog;
566 		dst->Scell_log = q->Scell_log;
567 		dst->other = q->other;
568 		dst->forced = q->forced;
569 		dst->early = q->early;
570 		dst->pdrop = q->pdrop;
571 		dst->prio = q->prio;
572 		dst->packets=q->packetsin;
573 		dst->bytesin=q->bytesin;
574 	}
575 
576 	RTA_PUT(skb, TCA_GRED_PARMS, sizeof(struct tc_gred_qopt)*MAX_DPs, opt);
577 	rta->rta_len = skb->tail - b;
578 
579 	kfree(opt);
580 	return skb->len;
581 
582 rtattr_failure:
583 	if (opt)
584 		kfree(opt);
585 	DPRINTK("gred_dump: FAILURE!!!!\n");
586 
587 /* also free the opt struct here */
588 	skb_trim(skb, b - skb->data);
589 	return -1;
590 }
591 
592 static void gred_destroy(struct Qdisc *sch)
593 {
594 	struct gred_sched *table = qdisc_priv(sch);
595 	int i;
596 
597 	for (i = 0;i < table->DPs; i++) {
598 		if (table->tab[i])
599 			kfree(table->tab[i]);
600 	}
601 }
602 
603 static struct Qdisc_ops gred_qdisc_ops = {
604 	.next		=	NULL,
605 	.cl_ops		=	NULL,
606 	.id		=	"gred",
607 	.priv_size	=	sizeof(struct gred_sched),
608 	.enqueue	=	gred_enqueue,
609 	.dequeue	=	gred_dequeue,
610 	.requeue	=	gred_requeue,
611 	.drop		=	gred_drop,
612 	.init		=	gred_init,
613 	.reset		=	gred_reset,
614 	.destroy	=	gred_destroy,
615 	.change		=	gred_change,
616 	.dump		=	gred_dump,
617 	.owner		=	THIS_MODULE,
618 };
619 
620 static int __init gred_module_init(void)
621 {
622 	return register_qdisc(&gred_qdisc_ops);
623 }
624 static void __exit gred_module_exit(void)
625 {
626 	unregister_qdisc(&gred_qdisc_ops);
627 }
628 module_init(gred_module_init)
629 module_exit(gred_module_exit)
630 MODULE_LICENSE("GPL");
631