1 /* 2 * net/sched/sch_gred.c Generic Random Early Detection queue. 3 * 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License 7 * as published by the Free Software Foundation; either version 8 * 2 of the License, or (at your option) any later version. 9 * 10 * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002 11 * 12 * 991129: - Bug fix with grio mode 13 * - a better sing. AvgQ mode with Grio(WRED) 14 * - A finer grained VQ dequeue based on sugestion 15 * from Ren Liu 16 * - More error checks 17 * 18 * For all the glorious comments look at include/net/red.h 19 */ 20 21 #include <linux/slab.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/skbuff.h> 26 #include <net/pkt_sched.h> 27 #include <net/red.h> 28 29 #define GRED_DEF_PRIO (MAX_DPs / 2) 30 #define GRED_VQ_MASK (MAX_DPs - 1) 31 32 struct gred_sched_data; 33 struct gred_sched; 34 35 struct gred_sched_data { 36 u32 limit; /* HARD maximal queue length */ 37 u32 DP; /* the drop pramaters */ 38 u32 bytesin; /* bytes seen on virtualQ so far*/ 39 u32 packetsin; /* packets seen on virtualQ so far*/ 40 u32 backlog; /* bytes on the virtualQ */ 41 u8 prio; /* the prio of this vq */ 42 43 struct red_parms parms; 44 struct red_stats stats; 45 }; 46 47 enum { 48 GRED_WRED_MODE = 1, 49 GRED_RIO_MODE, 50 }; 51 52 struct gred_sched { 53 struct gred_sched_data *tab[MAX_DPs]; 54 unsigned long flags; 55 u32 red_flags; 56 u32 DPs; 57 u32 def; 58 struct red_parms wred_set; 59 }; 60 61 static inline int gred_wred_mode(struct gred_sched *table) 62 { 63 return test_bit(GRED_WRED_MODE, &table->flags); 64 } 65 66 static inline void gred_enable_wred_mode(struct gred_sched *table) 67 { 68 __set_bit(GRED_WRED_MODE, &table->flags); 69 } 70 71 static inline void gred_disable_wred_mode(struct gred_sched *table) 72 { 73 __clear_bit(GRED_WRED_MODE, &table->flags); 74 } 75 76 static inline int gred_rio_mode(struct gred_sched *table) 77 { 78 return test_bit(GRED_RIO_MODE, &table->flags); 79 } 80 81 static inline void gred_enable_rio_mode(struct gred_sched *table) 82 { 83 __set_bit(GRED_RIO_MODE, &table->flags); 84 } 85 86 static inline void gred_disable_rio_mode(struct gred_sched *table) 87 { 88 __clear_bit(GRED_RIO_MODE, &table->flags); 89 } 90 91 static inline int gred_wred_mode_check(struct Qdisc *sch) 92 { 93 struct gred_sched *table = qdisc_priv(sch); 94 int i; 95 96 /* Really ugly O(n^2) but shouldn't be necessary too frequent. */ 97 for (i = 0; i < table->DPs; i++) { 98 struct gred_sched_data *q = table->tab[i]; 99 int n; 100 101 if (q == NULL) 102 continue; 103 104 for (n = 0; n < table->DPs; n++) 105 if (table->tab[n] && table->tab[n] != q && 106 table->tab[n]->prio == q->prio) 107 return 1; 108 } 109 110 return 0; 111 } 112 113 static inline unsigned int gred_backlog(struct gred_sched *table, 114 struct gred_sched_data *q, 115 struct Qdisc *sch) 116 { 117 if (gred_wred_mode(table)) 118 return sch->qstats.backlog; 119 else 120 return q->backlog; 121 } 122 123 static inline u16 tc_index_to_dp(struct sk_buff *skb) 124 { 125 return skb->tc_index & GRED_VQ_MASK; 126 } 127 128 static inline void gred_load_wred_set(struct gred_sched *table, 129 struct gred_sched_data *q) 130 { 131 q->parms.qavg = table->wred_set.qavg; 132 q->parms.qidlestart = table->wred_set.qidlestart; 133 } 134 135 static inline void gred_store_wred_set(struct gred_sched *table, 136 struct gred_sched_data *q) 137 { 138 table->wred_set.qavg = q->parms.qavg; 139 } 140 141 static inline int gred_use_ecn(struct gred_sched *t) 142 { 143 return t->red_flags & TC_RED_ECN; 144 } 145 146 static inline int gred_use_harddrop(struct gred_sched *t) 147 { 148 return t->red_flags & TC_RED_HARDDROP; 149 } 150 151 static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) 152 { 153 struct gred_sched_data *q = NULL; 154 struct gred_sched *t = qdisc_priv(sch); 155 unsigned long qavg = 0; 156 u16 dp = tc_index_to_dp(skb); 157 158 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 159 dp = t->def; 160 161 q = t->tab[dp]; 162 if (!q) { 163 /* Pass through packets not assigned to a DP 164 * if no default DP has been configured. This 165 * allows for DP flows to be left untouched. 166 */ 167 if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len) 168 return qdisc_enqueue_tail(skb, sch); 169 else 170 goto drop; 171 } 172 173 /* fix tc_index? --could be controvesial but needed for 174 requeueing */ 175 skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; 176 } 177 178 /* sum up all the qaves of prios <= to ours to get the new qave */ 179 if (!gred_wred_mode(t) && gred_rio_mode(t)) { 180 int i; 181 182 for (i = 0; i < t->DPs; i++) { 183 if (t->tab[i] && t->tab[i]->prio < q->prio && 184 !red_is_idling(&t->tab[i]->parms)) 185 qavg += t->tab[i]->parms.qavg; 186 } 187 188 } 189 190 q->packetsin++; 191 q->bytesin += qdisc_pkt_len(skb); 192 193 if (gred_wred_mode(t)) 194 gred_load_wred_set(t, q); 195 196 q->parms.qavg = red_calc_qavg(&q->parms, gred_backlog(t, q, sch)); 197 198 if (red_is_idling(&q->parms)) 199 red_end_of_idle_period(&q->parms); 200 201 if (gred_wred_mode(t)) 202 gred_store_wred_set(t, q); 203 204 switch (red_action(&q->parms, q->parms.qavg + qavg)) { 205 case RED_DONT_MARK: 206 break; 207 208 case RED_PROB_MARK: 209 sch->qstats.overlimits++; 210 if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { 211 q->stats.prob_drop++; 212 goto congestion_drop; 213 } 214 215 q->stats.prob_mark++; 216 break; 217 218 case RED_HARD_MARK: 219 sch->qstats.overlimits++; 220 if (gred_use_harddrop(t) || !gred_use_ecn(t) || 221 !INET_ECN_set_ce(skb)) { 222 q->stats.forced_drop++; 223 goto congestion_drop; 224 } 225 q->stats.forced_mark++; 226 break; 227 } 228 229 if (q->backlog + qdisc_pkt_len(skb) <= q->limit) { 230 q->backlog += qdisc_pkt_len(skb); 231 return qdisc_enqueue_tail(skb, sch); 232 } 233 234 q->stats.pdrop++; 235 drop: 236 return qdisc_drop(skb, sch); 237 238 congestion_drop: 239 qdisc_drop(skb, sch); 240 return NET_XMIT_CN; 241 } 242 243 static struct sk_buff *gred_dequeue(struct Qdisc *sch) 244 { 245 struct sk_buff *skb; 246 struct gred_sched *t = qdisc_priv(sch); 247 248 skb = qdisc_dequeue_head(sch); 249 250 if (skb) { 251 struct gred_sched_data *q; 252 u16 dp = tc_index_to_dp(skb); 253 254 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 255 if (net_ratelimit()) 256 pr_warning("GRED: Unable to relocate VQ 0x%x " 257 "after dequeue, screwing up " 258 "backlog.\n", tc_index_to_dp(skb)); 259 } else { 260 q->backlog -= qdisc_pkt_len(skb); 261 262 if (!q->backlog && !gred_wred_mode(t)) 263 red_start_of_idle_period(&q->parms); 264 } 265 266 return skb; 267 } 268 269 if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) 270 red_start_of_idle_period(&t->wred_set); 271 272 return NULL; 273 } 274 275 static unsigned int gred_drop(struct Qdisc *sch) 276 { 277 struct sk_buff *skb; 278 struct gred_sched *t = qdisc_priv(sch); 279 280 skb = qdisc_dequeue_tail(sch); 281 if (skb) { 282 unsigned int len = qdisc_pkt_len(skb); 283 struct gred_sched_data *q; 284 u16 dp = tc_index_to_dp(skb); 285 286 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 287 if (net_ratelimit()) 288 pr_warning("GRED: Unable to relocate VQ 0x%x " 289 "while dropping, screwing up " 290 "backlog.\n", tc_index_to_dp(skb)); 291 } else { 292 q->backlog -= len; 293 q->stats.other++; 294 295 if (!q->backlog && !gred_wred_mode(t)) 296 red_start_of_idle_period(&q->parms); 297 } 298 299 qdisc_drop(skb, sch); 300 return len; 301 } 302 303 if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) 304 red_start_of_idle_period(&t->wred_set); 305 306 return 0; 307 308 } 309 310 static void gred_reset(struct Qdisc *sch) 311 { 312 int i; 313 struct gred_sched *t = qdisc_priv(sch); 314 315 qdisc_reset_queue(sch); 316 317 for (i = 0; i < t->DPs; i++) { 318 struct gred_sched_data *q = t->tab[i]; 319 320 if (!q) 321 continue; 322 323 red_restart(&q->parms); 324 q->backlog = 0; 325 } 326 } 327 328 static inline void gred_destroy_vq(struct gred_sched_data *q) 329 { 330 kfree(q); 331 } 332 333 static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) 334 { 335 struct gred_sched *table = qdisc_priv(sch); 336 struct tc_gred_sopt *sopt; 337 int i; 338 339 if (dps == NULL) 340 return -EINVAL; 341 342 sopt = nla_data(dps); 343 344 if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs) 345 return -EINVAL; 346 347 sch_tree_lock(sch); 348 table->DPs = sopt->DPs; 349 table->def = sopt->def_DP; 350 table->red_flags = sopt->flags; 351 352 /* 353 * Every entry point to GRED is synchronized with the above code 354 * and the DP is checked against DPs, i.e. shadowed VQs can no 355 * longer be found so we can unlock right here. 356 */ 357 sch_tree_unlock(sch); 358 359 if (sopt->grio) { 360 gred_enable_rio_mode(table); 361 gred_disable_wred_mode(table); 362 if (gred_wred_mode_check(sch)) 363 gred_enable_wred_mode(table); 364 } else { 365 gred_disable_rio_mode(table); 366 gred_disable_wred_mode(table); 367 } 368 369 for (i = table->DPs; i < MAX_DPs; i++) { 370 if (table->tab[i]) { 371 pr_warning("GRED: Warning: Destroying " 372 "shadowed VQ 0x%x\n", i); 373 gred_destroy_vq(table->tab[i]); 374 table->tab[i] = NULL; 375 } 376 } 377 378 return 0; 379 } 380 381 static inline int gred_change_vq(struct Qdisc *sch, int dp, 382 struct tc_gred_qopt *ctl, int prio, u8 *stab) 383 { 384 struct gred_sched *table = qdisc_priv(sch); 385 struct gred_sched_data *q; 386 387 if (table->tab[dp] == NULL) { 388 table->tab[dp] = kzalloc(sizeof(*q), GFP_KERNEL); 389 if (table->tab[dp] == NULL) 390 return -ENOMEM; 391 } 392 393 q = table->tab[dp]; 394 q->DP = dp; 395 q->prio = prio; 396 q->limit = ctl->limit; 397 398 if (q->backlog == 0) 399 red_end_of_idle_period(&q->parms); 400 401 red_set_parms(&q->parms, 402 ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, 403 ctl->Scell_log, stab); 404 405 return 0; 406 } 407 408 static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = { 409 [TCA_GRED_PARMS] = { .len = sizeof(struct tc_gred_qopt) }, 410 [TCA_GRED_STAB] = { .len = 256 }, 411 [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) }, 412 }; 413 414 static int gred_change(struct Qdisc *sch, struct nlattr *opt) 415 { 416 struct gred_sched *table = qdisc_priv(sch); 417 struct tc_gred_qopt *ctl; 418 struct nlattr *tb[TCA_GRED_MAX + 1]; 419 int err, prio = GRED_DEF_PRIO; 420 u8 *stab; 421 422 if (opt == NULL) 423 return -EINVAL; 424 425 err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy); 426 if (err < 0) 427 return err; 428 429 if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) 430 return gred_change_table_def(sch, opt); 431 432 if (tb[TCA_GRED_PARMS] == NULL || 433 tb[TCA_GRED_STAB] == NULL) 434 return -EINVAL; 435 436 err = -EINVAL; 437 ctl = nla_data(tb[TCA_GRED_PARMS]); 438 stab = nla_data(tb[TCA_GRED_STAB]); 439 440 if (ctl->DP >= table->DPs) 441 goto errout; 442 443 if (gred_rio_mode(table)) { 444 if (ctl->prio == 0) { 445 int def_prio = GRED_DEF_PRIO; 446 447 if (table->tab[table->def]) 448 def_prio = table->tab[table->def]->prio; 449 450 printk(KERN_DEBUG "GRED: DP %u does not have a prio " 451 "setting default to %d\n", ctl->DP, def_prio); 452 453 prio = def_prio; 454 } else 455 prio = ctl->prio; 456 } 457 458 sch_tree_lock(sch); 459 460 err = gred_change_vq(sch, ctl->DP, ctl, prio, stab); 461 if (err < 0) 462 goto errout_locked; 463 464 if (gred_rio_mode(table)) { 465 gred_disable_wred_mode(table); 466 if (gred_wred_mode_check(sch)) 467 gred_enable_wred_mode(table); 468 } 469 470 err = 0; 471 472 errout_locked: 473 sch_tree_unlock(sch); 474 errout: 475 return err; 476 } 477 478 static int gred_init(struct Qdisc *sch, struct nlattr *opt) 479 { 480 struct nlattr *tb[TCA_GRED_MAX + 1]; 481 int err; 482 483 if (opt == NULL) 484 return -EINVAL; 485 486 err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy); 487 if (err < 0) 488 return err; 489 490 if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) 491 return -EINVAL; 492 493 return gred_change_table_def(sch, tb[TCA_GRED_DPS]); 494 } 495 496 static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) 497 { 498 struct gred_sched *table = qdisc_priv(sch); 499 struct nlattr *parms, *opts = NULL; 500 int i; 501 struct tc_gred_sopt sopt = { 502 .DPs = table->DPs, 503 .def_DP = table->def, 504 .grio = gred_rio_mode(table), 505 .flags = table->red_flags, 506 }; 507 508 opts = nla_nest_start(skb, TCA_OPTIONS); 509 if (opts == NULL) 510 goto nla_put_failure; 511 NLA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt); 512 parms = nla_nest_start(skb, TCA_GRED_PARMS); 513 if (parms == NULL) 514 goto nla_put_failure; 515 516 for (i = 0; i < MAX_DPs; i++) { 517 struct gred_sched_data *q = table->tab[i]; 518 struct tc_gred_qopt opt; 519 520 memset(&opt, 0, sizeof(opt)); 521 522 if (!q) { 523 /* hack -- fix at some point with proper message 524 This is how we indicate to tc that there is no VQ 525 at this DP */ 526 527 opt.DP = MAX_DPs + i; 528 goto append_opt; 529 } 530 531 opt.limit = q->limit; 532 opt.DP = q->DP; 533 opt.backlog = q->backlog; 534 opt.prio = q->prio; 535 opt.qth_min = q->parms.qth_min >> q->parms.Wlog; 536 opt.qth_max = q->parms.qth_max >> q->parms.Wlog; 537 opt.Wlog = q->parms.Wlog; 538 opt.Plog = q->parms.Plog; 539 opt.Scell_log = q->parms.Scell_log; 540 opt.other = q->stats.other; 541 opt.early = q->stats.prob_drop; 542 opt.forced = q->stats.forced_drop; 543 opt.pdrop = q->stats.pdrop; 544 opt.packets = q->packetsin; 545 opt.bytesin = q->bytesin; 546 547 if (gred_wred_mode(table)) { 548 q->parms.qidlestart = 549 table->tab[table->def]->parms.qidlestart; 550 q->parms.qavg = table->tab[table->def]->parms.qavg; 551 } 552 553 opt.qave = red_calc_qavg(&q->parms, q->parms.qavg); 554 555 append_opt: 556 if (nla_append(skb, sizeof(opt), &opt) < 0) 557 goto nla_put_failure; 558 } 559 560 nla_nest_end(skb, parms); 561 562 return nla_nest_end(skb, opts); 563 564 nla_put_failure: 565 nla_nest_cancel(skb, opts); 566 return -EMSGSIZE; 567 } 568 569 static void gred_destroy(struct Qdisc *sch) 570 { 571 struct gred_sched *table = qdisc_priv(sch); 572 int i; 573 574 for (i = 0; i < table->DPs; i++) { 575 if (table->tab[i]) 576 gred_destroy_vq(table->tab[i]); 577 } 578 } 579 580 static struct Qdisc_ops gred_qdisc_ops __read_mostly = { 581 .id = "gred", 582 .priv_size = sizeof(struct gred_sched), 583 .enqueue = gred_enqueue, 584 .dequeue = gred_dequeue, 585 .peek = qdisc_peek_head, 586 .drop = gred_drop, 587 .init = gred_init, 588 .reset = gred_reset, 589 .destroy = gred_destroy, 590 .change = gred_change, 591 .dump = gred_dump, 592 .owner = THIS_MODULE, 593 }; 594 595 static int __init gred_module_init(void) 596 { 597 return register_qdisc(&gred_qdisc_ops); 598 } 599 600 static void __exit gred_module_exit(void) 601 { 602 unregister_qdisc(&gred_qdisc_ops); 603 } 604 605 module_init(gred_module_init) 606 module_exit(gred_module_exit) 607 608 MODULE_LICENSE("GPL"); 609