1 /* 2 * net/sched/sch_gred.c Generic Random Early Detection queue. 3 * 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License 7 * as published by the Free Software Foundation; either version 8 * 2 of the License, or (at your option) any later version. 9 * 10 * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002 11 * 12 * 991129: - Bug fix with grio mode 13 * - a better sing. AvgQ mode with Grio(WRED) 14 * - A finer grained VQ dequeue based on sugestion 15 * from Ren Liu 16 * - More error checks 17 * 18 * For all the glorious comments look at include/net/red.h 19 */ 20 21 #include <linux/module.h> 22 #include <linux/types.h> 23 #include <linux/kernel.h> 24 #include <linux/skbuff.h> 25 #include <net/pkt_sched.h> 26 #include <net/red.h> 27 28 #define GRED_DEF_PRIO (MAX_DPs / 2) 29 #define GRED_VQ_MASK (MAX_DPs - 1) 30 31 struct gred_sched_data; 32 struct gred_sched; 33 34 struct gred_sched_data 35 { 36 u32 limit; /* HARD maximal queue length */ 37 u32 DP; /* the drop pramaters */ 38 u32 bytesin; /* bytes seen on virtualQ so far*/ 39 u32 packetsin; /* packets seen on virtualQ so far*/ 40 u32 backlog; /* bytes on the virtualQ */ 41 u8 prio; /* the prio of this vq */ 42 43 struct red_parms parms; 44 struct red_stats stats; 45 }; 46 47 enum { 48 GRED_WRED_MODE = 1, 49 GRED_RIO_MODE, 50 }; 51 52 struct gred_sched 53 { 54 struct gred_sched_data *tab[MAX_DPs]; 55 unsigned long flags; 56 u32 red_flags; 57 u32 DPs; 58 u32 def; 59 struct red_parms wred_set; 60 }; 61 62 static inline int gred_wred_mode(struct gred_sched *table) 63 { 64 return test_bit(GRED_WRED_MODE, &table->flags); 65 } 66 67 static inline void gred_enable_wred_mode(struct gred_sched *table) 68 { 69 __set_bit(GRED_WRED_MODE, &table->flags); 70 } 71 72 static inline void gred_disable_wred_mode(struct gred_sched *table) 73 { 74 __clear_bit(GRED_WRED_MODE, &table->flags); 75 } 76 77 static inline int gred_rio_mode(struct gred_sched *table) 78 { 79 return test_bit(GRED_RIO_MODE, &table->flags); 80 } 81 82 static inline void gred_enable_rio_mode(struct gred_sched *table) 83 { 84 __set_bit(GRED_RIO_MODE, &table->flags); 85 } 86 87 static inline void gred_disable_rio_mode(struct gred_sched *table) 88 { 89 __clear_bit(GRED_RIO_MODE, &table->flags); 90 } 91 92 static inline int gred_wred_mode_check(struct Qdisc *sch) 93 { 94 struct gred_sched *table = qdisc_priv(sch); 95 int i; 96 97 /* Really ugly O(n^2) but shouldn't be necessary too frequent. */ 98 for (i = 0; i < table->DPs; i++) { 99 struct gred_sched_data *q = table->tab[i]; 100 int n; 101 102 if (q == NULL) 103 continue; 104 105 for (n = 0; n < table->DPs; n++) 106 if (table->tab[n] && table->tab[n] != q && 107 table->tab[n]->prio == q->prio) 108 return 1; 109 } 110 111 return 0; 112 } 113 114 static inline unsigned int gred_backlog(struct gred_sched *table, 115 struct gred_sched_data *q, 116 struct Qdisc *sch) 117 { 118 if (gred_wred_mode(table)) 119 return sch->qstats.backlog; 120 else 121 return q->backlog; 122 } 123 124 static inline u16 tc_index_to_dp(struct sk_buff *skb) 125 { 126 return skb->tc_index & GRED_VQ_MASK; 127 } 128 129 static inline void gred_load_wred_set(struct gred_sched *table, 130 struct gred_sched_data *q) 131 { 132 q->parms.qavg = table->wred_set.qavg; 133 q->parms.qidlestart = table->wred_set.qidlestart; 134 } 135 136 static inline void gred_store_wred_set(struct gred_sched *table, 137 struct gred_sched_data *q) 138 { 139 table->wred_set.qavg = q->parms.qavg; 140 } 141 142 static inline int gred_use_ecn(struct gred_sched *t) 143 { 144 return t->red_flags & TC_RED_ECN; 145 } 146 147 static inline int gred_use_harddrop(struct gred_sched *t) 148 { 149 return t->red_flags & TC_RED_HARDDROP; 150 } 151 152 static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) 153 { 154 struct gred_sched_data *q=NULL; 155 struct gred_sched *t= qdisc_priv(sch); 156 unsigned long qavg = 0; 157 u16 dp = tc_index_to_dp(skb); 158 159 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 160 dp = t->def; 161 162 if ((q = t->tab[dp]) == NULL) { 163 /* Pass through packets not assigned to a DP 164 * if no default DP has been configured. This 165 * allows for DP flows to be left untouched. 166 */ 167 if (skb_queue_len(&sch->q) < sch->dev->tx_queue_len) 168 return qdisc_enqueue_tail(skb, sch); 169 else 170 goto drop; 171 } 172 173 /* fix tc_index? --could be controvesial but needed for 174 requeueing */ 175 skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; 176 } 177 178 /* sum up all the qaves of prios <= to ours to get the new qave */ 179 if (!gred_wred_mode(t) && gred_rio_mode(t)) { 180 int i; 181 182 for (i = 0; i < t->DPs; i++) { 183 if (t->tab[i] && t->tab[i]->prio < q->prio && 184 !red_is_idling(&t->tab[i]->parms)) 185 qavg +=t->tab[i]->parms.qavg; 186 } 187 188 } 189 190 q->packetsin++; 191 q->bytesin += skb->len; 192 193 if (gred_wred_mode(t)) 194 gred_load_wred_set(t, q); 195 196 q->parms.qavg = red_calc_qavg(&q->parms, gred_backlog(t, q, sch)); 197 198 if (red_is_idling(&q->parms)) 199 red_end_of_idle_period(&q->parms); 200 201 if (gred_wred_mode(t)) 202 gred_store_wred_set(t, q); 203 204 switch (red_action(&q->parms, q->parms.qavg + qavg)) { 205 case RED_DONT_MARK: 206 break; 207 208 case RED_PROB_MARK: 209 sch->qstats.overlimits++; 210 if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { 211 q->stats.prob_drop++; 212 goto congestion_drop; 213 } 214 215 q->stats.prob_mark++; 216 break; 217 218 case RED_HARD_MARK: 219 sch->qstats.overlimits++; 220 if (gred_use_harddrop(t) || !gred_use_ecn(t) || 221 !INET_ECN_set_ce(skb)) { 222 q->stats.forced_drop++; 223 goto congestion_drop; 224 } 225 q->stats.forced_mark++; 226 break; 227 } 228 229 if (q->backlog + skb->len <= q->limit) { 230 q->backlog += skb->len; 231 return qdisc_enqueue_tail(skb, sch); 232 } 233 234 q->stats.pdrop++; 235 drop: 236 return qdisc_drop(skb, sch); 237 238 congestion_drop: 239 qdisc_drop(skb, sch); 240 return NET_XMIT_CN; 241 } 242 243 static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch) 244 { 245 struct gred_sched *t = qdisc_priv(sch); 246 struct gred_sched_data *q; 247 u16 dp = tc_index_to_dp(skb); 248 249 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 250 if (net_ratelimit()) 251 printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x " 252 "for requeue, screwing up backlog.\n", 253 tc_index_to_dp(skb)); 254 } else { 255 if (red_is_idling(&q->parms)) 256 red_end_of_idle_period(&q->parms); 257 q->backlog += skb->len; 258 } 259 260 return qdisc_requeue(skb, sch); 261 } 262 263 static struct sk_buff *gred_dequeue(struct Qdisc* sch) 264 { 265 struct sk_buff *skb; 266 struct gred_sched *t = qdisc_priv(sch); 267 268 skb = qdisc_dequeue_head(sch); 269 270 if (skb) { 271 struct gred_sched_data *q; 272 u16 dp = tc_index_to_dp(skb); 273 274 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 275 if (net_ratelimit()) 276 printk(KERN_WARNING "GRED: Unable to relocate " 277 "VQ 0x%x after dequeue, screwing up " 278 "backlog.\n", tc_index_to_dp(skb)); 279 } else { 280 q->backlog -= skb->len; 281 282 if (!q->backlog && !gred_wred_mode(t)) 283 red_start_of_idle_period(&q->parms); 284 } 285 286 return skb; 287 } 288 289 if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) 290 red_start_of_idle_period(&t->wred_set); 291 292 return NULL; 293 } 294 295 static unsigned int gred_drop(struct Qdisc* sch) 296 { 297 struct sk_buff *skb; 298 struct gred_sched *t = qdisc_priv(sch); 299 300 skb = qdisc_dequeue_tail(sch); 301 if (skb) { 302 unsigned int len = skb->len; 303 struct gred_sched_data *q; 304 u16 dp = tc_index_to_dp(skb); 305 306 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 307 if (net_ratelimit()) 308 printk(KERN_WARNING "GRED: Unable to relocate " 309 "VQ 0x%x while dropping, screwing up " 310 "backlog.\n", tc_index_to_dp(skb)); 311 } else { 312 q->backlog -= len; 313 q->stats.other++; 314 315 if (!q->backlog && !gred_wred_mode(t)) 316 red_start_of_idle_period(&q->parms); 317 } 318 319 qdisc_drop(skb, sch); 320 return len; 321 } 322 323 if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) 324 red_start_of_idle_period(&t->wred_set); 325 326 return 0; 327 328 } 329 330 static void gred_reset(struct Qdisc* sch) 331 { 332 int i; 333 struct gred_sched *t = qdisc_priv(sch); 334 335 qdisc_reset_queue(sch); 336 337 for (i = 0; i < t->DPs; i++) { 338 struct gred_sched_data *q = t->tab[i]; 339 340 if (!q) 341 continue; 342 343 red_restart(&q->parms); 344 q->backlog = 0; 345 } 346 } 347 348 static inline void gred_destroy_vq(struct gred_sched_data *q) 349 { 350 kfree(q); 351 } 352 353 static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) 354 { 355 struct gred_sched *table = qdisc_priv(sch); 356 struct tc_gred_sopt *sopt; 357 int i; 358 359 if (dps == NULL) 360 return -EINVAL; 361 362 sopt = nla_data(dps); 363 364 if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs) 365 return -EINVAL; 366 367 sch_tree_lock(sch); 368 table->DPs = sopt->DPs; 369 table->def = sopt->def_DP; 370 table->red_flags = sopt->flags; 371 372 /* 373 * Every entry point to GRED is synchronized with the above code 374 * and the DP is checked against DPs, i.e. shadowed VQs can no 375 * longer be found so we can unlock right here. 376 */ 377 sch_tree_unlock(sch); 378 379 if (sopt->grio) { 380 gred_enable_rio_mode(table); 381 gred_disable_wred_mode(table); 382 if (gred_wred_mode_check(sch)) 383 gred_enable_wred_mode(table); 384 } else { 385 gred_disable_rio_mode(table); 386 gred_disable_wred_mode(table); 387 } 388 389 for (i = table->DPs; i < MAX_DPs; i++) { 390 if (table->tab[i]) { 391 printk(KERN_WARNING "GRED: Warning: Destroying " 392 "shadowed VQ 0x%x\n", i); 393 gred_destroy_vq(table->tab[i]); 394 table->tab[i] = NULL; 395 } 396 } 397 398 return 0; 399 } 400 401 static inline int gred_change_vq(struct Qdisc *sch, int dp, 402 struct tc_gred_qopt *ctl, int prio, u8 *stab) 403 { 404 struct gred_sched *table = qdisc_priv(sch); 405 struct gred_sched_data *q; 406 407 if (table->tab[dp] == NULL) { 408 table->tab[dp] = kzalloc(sizeof(*q), GFP_KERNEL); 409 if (table->tab[dp] == NULL) 410 return -ENOMEM; 411 } 412 413 q = table->tab[dp]; 414 q->DP = dp; 415 q->prio = prio; 416 q->limit = ctl->limit; 417 418 if (q->backlog == 0) 419 red_end_of_idle_period(&q->parms); 420 421 red_set_parms(&q->parms, 422 ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, 423 ctl->Scell_log, stab); 424 425 return 0; 426 } 427 428 static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = { 429 [TCA_GRED_PARMS] = { .len = sizeof(struct tc_gred_qopt) }, 430 [TCA_GRED_STAB] = { .len = 256 }, 431 [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) }, 432 }; 433 434 static int gred_change(struct Qdisc *sch, struct nlattr *opt) 435 { 436 struct gred_sched *table = qdisc_priv(sch); 437 struct tc_gred_qopt *ctl; 438 struct nlattr *tb[TCA_GRED_MAX + 1]; 439 int err, prio = GRED_DEF_PRIO; 440 u8 *stab; 441 442 if (opt == NULL) 443 return -EINVAL; 444 445 err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy); 446 if (err < 0) 447 return err; 448 449 if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) 450 return gred_change_table_def(sch, opt); 451 452 if (tb[TCA_GRED_PARMS] == NULL || 453 tb[TCA_GRED_STAB] == NULL) 454 return -EINVAL; 455 456 err = -EINVAL; 457 ctl = nla_data(tb[TCA_GRED_PARMS]); 458 stab = nla_data(tb[TCA_GRED_STAB]); 459 460 if (ctl->DP >= table->DPs) 461 goto errout; 462 463 if (gred_rio_mode(table)) { 464 if (ctl->prio == 0) { 465 int def_prio = GRED_DEF_PRIO; 466 467 if (table->tab[table->def]) 468 def_prio = table->tab[table->def]->prio; 469 470 printk(KERN_DEBUG "GRED: DP %u does not have a prio " 471 "setting default to %d\n", ctl->DP, def_prio); 472 473 prio = def_prio; 474 } else 475 prio = ctl->prio; 476 } 477 478 sch_tree_lock(sch); 479 480 err = gred_change_vq(sch, ctl->DP, ctl, prio, stab); 481 if (err < 0) 482 goto errout_locked; 483 484 if (gred_rio_mode(table)) { 485 gred_disable_wred_mode(table); 486 if (gred_wred_mode_check(sch)) 487 gred_enable_wred_mode(table); 488 } 489 490 err = 0; 491 492 errout_locked: 493 sch_tree_unlock(sch); 494 errout: 495 return err; 496 } 497 498 static int gred_init(struct Qdisc *sch, struct nlattr *opt) 499 { 500 struct nlattr *tb[TCA_GRED_MAX + 1]; 501 int err; 502 503 if (opt == NULL) 504 return -EINVAL; 505 506 err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy); 507 if (err < 0) 508 return err; 509 510 if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) 511 return -EINVAL; 512 513 return gred_change_table_def(sch, tb[TCA_GRED_DPS]); 514 } 515 516 static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) 517 { 518 struct gred_sched *table = qdisc_priv(sch); 519 struct nlattr *parms, *opts = NULL; 520 int i; 521 struct tc_gred_sopt sopt = { 522 .DPs = table->DPs, 523 .def_DP = table->def, 524 .grio = gred_rio_mode(table), 525 .flags = table->red_flags, 526 }; 527 528 opts = nla_nest_start(skb, TCA_OPTIONS); 529 if (opts == NULL) 530 goto nla_put_failure; 531 NLA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt); 532 parms = nla_nest_start(skb, TCA_GRED_PARMS); 533 if (parms == NULL) 534 goto nla_put_failure; 535 536 for (i = 0; i < MAX_DPs; i++) { 537 struct gred_sched_data *q = table->tab[i]; 538 struct tc_gred_qopt opt; 539 540 memset(&opt, 0, sizeof(opt)); 541 542 if (!q) { 543 /* hack -- fix at some point with proper message 544 This is how we indicate to tc that there is no VQ 545 at this DP */ 546 547 opt.DP = MAX_DPs + i; 548 goto append_opt; 549 } 550 551 opt.limit = q->limit; 552 opt.DP = q->DP; 553 opt.backlog = q->backlog; 554 opt.prio = q->prio; 555 opt.qth_min = q->parms.qth_min >> q->parms.Wlog; 556 opt.qth_max = q->parms.qth_max >> q->parms.Wlog; 557 opt.Wlog = q->parms.Wlog; 558 opt.Plog = q->parms.Plog; 559 opt.Scell_log = q->parms.Scell_log; 560 opt.other = q->stats.other; 561 opt.early = q->stats.prob_drop; 562 opt.forced = q->stats.forced_drop; 563 opt.pdrop = q->stats.pdrop; 564 opt.packets = q->packetsin; 565 opt.bytesin = q->bytesin; 566 567 if (gred_wred_mode(table)) { 568 q->parms.qidlestart = 569 table->tab[table->def]->parms.qidlestart; 570 q->parms.qavg = table->tab[table->def]->parms.qavg; 571 } 572 573 opt.qave = red_calc_qavg(&q->parms, q->parms.qavg); 574 575 append_opt: 576 if (nla_append(skb, sizeof(opt), &opt) < 0) 577 goto nla_put_failure; 578 } 579 580 nla_nest_end(skb, parms); 581 582 return nla_nest_end(skb, opts); 583 584 nla_put_failure: 585 return nla_nest_cancel(skb, opts); 586 } 587 588 static void gred_destroy(struct Qdisc *sch) 589 { 590 struct gred_sched *table = qdisc_priv(sch); 591 int i; 592 593 for (i = 0; i < table->DPs; i++) { 594 if (table->tab[i]) 595 gred_destroy_vq(table->tab[i]); 596 } 597 } 598 599 static struct Qdisc_ops gred_qdisc_ops __read_mostly = { 600 .id = "gred", 601 .priv_size = sizeof(struct gred_sched), 602 .enqueue = gred_enqueue, 603 .dequeue = gred_dequeue, 604 .requeue = gred_requeue, 605 .drop = gred_drop, 606 .init = gred_init, 607 .reset = gred_reset, 608 .destroy = gred_destroy, 609 .change = gred_change, 610 .dump = gred_dump, 611 .owner = THIS_MODULE, 612 }; 613 614 static int __init gred_module_init(void) 615 { 616 return register_qdisc(&gred_qdisc_ops); 617 } 618 619 static void __exit gred_module_exit(void) 620 { 621 unregister_qdisc(&gred_qdisc_ops); 622 } 623 624 module_init(gred_module_init) 625 module_exit(gred_module_exit) 626 627 MODULE_LICENSE("GPL"); 628