1 /* 2 * net/sched/sch_gred.c Generic Random Early Detection queue. 3 * 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License 7 * as published by the Free Software Foundation; either version 8 * 2 of the License, or (at your option) any later version. 9 * 10 * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002 11 * 12 * 991129: - Bug fix with grio mode 13 * - a better sing. AvgQ mode with Grio(WRED) 14 * - A finer grained VQ dequeue based on sugestion 15 * from Ren Liu 16 * - More error checks 17 * 18 * For all the glorious comments look at include/net/red.h 19 */ 20 21 #include <linux/slab.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/skbuff.h> 26 #include <net/pkt_sched.h> 27 #include <net/red.h> 28 29 #define GRED_DEF_PRIO (MAX_DPs / 2) 30 #define GRED_VQ_MASK (MAX_DPs - 1) 31 32 struct gred_sched_data; 33 struct gred_sched; 34 35 struct gred_sched_data { 36 u32 limit; /* HARD maximal queue length */ 37 u32 DP; /* the drop parameters */ 38 u32 bytesin; /* bytes seen on virtualQ so far*/ 39 u32 packetsin; /* packets seen on virtualQ so far*/ 40 u32 backlog; /* bytes on the virtualQ */ 41 u8 prio; /* the prio of this vq */ 42 43 struct red_parms parms; 44 struct red_vars vars; 45 struct red_stats stats; 46 }; 47 48 enum { 49 GRED_WRED_MODE = 1, 50 GRED_RIO_MODE, 51 }; 52 53 struct gred_sched { 54 struct gred_sched_data *tab[MAX_DPs]; 55 unsigned long flags; 56 u32 red_flags; 57 u32 DPs; 58 u32 def; 59 struct red_vars wred_set; 60 }; 61 62 static inline int gred_wred_mode(struct gred_sched *table) 63 { 64 return test_bit(GRED_WRED_MODE, &table->flags); 65 } 66 67 static inline void gred_enable_wred_mode(struct gred_sched *table) 68 { 69 __set_bit(GRED_WRED_MODE, &table->flags); 70 } 71 72 static inline void gred_disable_wred_mode(struct gred_sched *table) 73 { 74 __clear_bit(GRED_WRED_MODE, &table->flags); 75 } 76 77 static inline int gred_rio_mode(struct gred_sched *table) 78 { 79 return test_bit(GRED_RIO_MODE, &table->flags); 80 } 81 82 static inline void gred_enable_rio_mode(struct gred_sched *table) 83 { 84 __set_bit(GRED_RIO_MODE, &table->flags); 85 } 86 87 static inline void gred_disable_rio_mode(struct gred_sched *table) 88 { 89 __clear_bit(GRED_RIO_MODE, &table->flags); 90 } 91 92 static inline int gred_wred_mode_check(struct Qdisc *sch) 93 { 94 struct gred_sched *table = qdisc_priv(sch); 95 int i; 96 97 /* Really ugly O(n^2) but shouldn't be necessary too frequent. */ 98 for (i = 0; i < table->DPs; i++) { 99 struct gred_sched_data *q = table->tab[i]; 100 int n; 101 102 if (q == NULL) 103 continue; 104 105 for (n = 0; n < table->DPs; n++) 106 if (table->tab[n] && table->tab[n] != q && 107 table->tab[n]->prio == q->prio) 108 return 1; 109 } 110 111 return 0; 112 } 113 114 static inline unsigned int gred_backlog(struct gred_sched *table, 115 struct gred_sched_data *q, 116 struct Qdisc *sch) 117 { 118 if (gred_wred_mode(table)) 119 return sch->qstats.backlog; 120 else 121 return q->backlog; 122 } 123 124 static inline u16 tc_index_to_dp(struct sk_buff *skb) 125 { 126 return skb->tc_index & GRED_VQ_MASK; 127 } 128 129 static inline void gred_load_wred_set(const struct gred_sched *table, 130 struct gred_sched_data *q) 131 { 132 q->vars.qavg = table->wred_set.qavg; 133 q->vars.qidlestart = table->wred_set.qidlestart; 134 } 135 136 static inline void gred_store_wred_set(struct gred_sched *table, 137 struct gred_sched_data *q) 138 { 139 table->wred_set.qavg = q->vars.qavg; 140 } 141 142 static inline int gred_use_ecn(struct gred_sched *t) 143 { 144 return t->red_flags & TC_RED_ECN; 145 } 146 147 static inline int gred_use_harddrop(struct gred_sched *t) 148 { 149 return t->red_flags & TC_RED_HARDDROP; 150 } 151 152 static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) 153 { 154 struct gred_sched_data *q = NULL; 155 struct gred_sched *t = qdisc_priv(sch); 156 unsigned long qavg = 0; 157 u16 dp = tc_index_to_dp(skb); 158 159 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 160 dp = t->def; 161 162 q = t->tab[dp]; 163 if (!q) { 164 /* Pass through packets not assigned to a DP 165 * if no default DP has been configured. This 166 * allows for DP flows to be left untouched. 167 */ 168 if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len) 169 return qdisc_enqueue_tail(skb, sch); 170 else 171 goto drop; 172 } 173 174 /* fix tc_index? --could be controversial but needed for 175 requeueing */ 176 skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; 177 } 178 179 /* sum up all the qaves of prios <= to ours to get the new qave */ 180 if (!gred_wred_mode(t) && gred_rio_mode(t)) { 181 int i; 182 183 for (i = 0; i < t->DPs; i++) { 184 if (t->tab[i] && t->tab[i]->prio < q->prio && 185 !red_is_idling(&t->tab[i]->vars)) 186 qavg += t->tab[i]->vars.qavg; 187 } 188 189 } 190 191 q->packetsin++; 192 q->bytesin += qdisc_pkt_len(skb); 193 194 if (gred_wred_mode(t)) 195 gred_load_wred_set(t, q); 196 197 q->vars.qavg = red_calc_qavg(&q->parms, 198 &q->vars, 199 gred_backlog(t, q, sch)); 200 201 if (red_is_idling(&q->vars)) 202 red_end_of_idle_period(&q->vars); 203 204 if (gred_wred_mode(t)) 205 gred_store_wred_set(t, q); 206 207 switch (red_action(&q->parms, &q->vars, q->vars.qavg + qavg)) { 208 case RED_DONT_MARK: 209 break; 210 211 case RED_PROB_MARK: 212 sch->qstats.overlimits++; 213 if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { 214 q->stats.prob_drop++; 215 goto congestion_drop; 216 } 217 218 q->stats.prob_mark++; 219 break; 220 221 case RED_HARD_MARK: 222 sch->qstats.overlimits++; 223 if (gred_use_harddrop(t) || !gred_use_ecn(t) || 224 !INET_ECN_set_ce(skb)) { 225 q->stats.forced_drop++; 226 goto congestion_drop; 227 } 228 q->stats.forced_mark++; 229 break; 230 } 231 232 if (q->backlog + qdisc_pkt_len(skb) <= q->limit) { 233 q->backlog += qdisc_pkt_len(skb); 234 return qdisc_enqueue_tail(skb, sch); 235 } 236 237 q->stats.pdrop++; 238 drop: 239 return qdisc_drop(skb, sch); 240 241 congestion_drop: 242 qdisc_drop(skb, sch); 243 return NET_XMIT_CN; 244 } 245 246 static struct sk_buff *gred_dequeue(struct Qdisc *sch) 247 { 248 struct sk_buff *skb; 249 struct gred_sched *t = qdisc_priv(sch); 250 251 skb = qdisc_dequeue_head(sch); 252 253 if (skb) { 254 struct gred_sched_data *q; 255 u16 dp = tc_index_to_dp(skb); 256 257 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 258 net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x after dequeue, screwing up backlog\n", 259 tc_index_to_dp(skb)); 260 } else { 261 q->backlog -= qdisc_pkt_len(skb); 262 263 if (!q->backlog && !gred_wred_mode(t)) 264 red_start_of_idle_period(&q->vars); 265 } 266 267 return skb; 268 } 269 270 if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) 271 red_start_of_idle_period(&t->wred_set); 272 273 return NULL; 274 } 275 276 static unsigned int gred_drop(struct Qdisc *sch) 277 { 278 struct sk_buff *skb; 279 struct gred_sched *t = qdisc_priv(sch); 280 281 skb = qdisc_dequeue_tail(sch); 282 if (skb) { 283 unsigned int len = qdisc_pkt_len(skb); 284 struct gred_sched_data *q; 285 u16 dp = tc_index_to_dp(skb); 286 287 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 288 net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x while dropping, screwing up backlog\n", 289 tc_index_to_dp(skb)); 290 } else { 291 q->backlog -= len; 292 q->stats.other++; 293 294 if (!q->backlog && !gred_wred_mode(t)) 295 red_start_of_idle_period(&q->vars); 296 } 297 298 qdisc_drop(skb, sch); 299 return len; 300 } 301 302 if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) 303 red_start_of_idle_period(&t->wred_set); 304 305 return 0; 306 307 } 308 309 static void gred_reset(struct Qdisc *sch) 310 { 311 int i; 312 struct gred_sched *t = qdisc_priv(sch); 313 314 qdisc_reset_queue(sch); 315 316 for (i = 0; i < t->DPs; i++) { 317 struct gred_sched_data *q = t->tab[i]; 318 319 if (!q) 320 continue; 321 322 red_restart(&q->vars); 323 q->backlog = 0; 324 } 325 } 326 327 static inline void gred_destroy_vq(struct gred_sched_data *q) 328 { 329 kfree(q); 330 } 331 332 static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) 333 { 334 struct gred_sched *table = qdisc_priv(sch); 335 struct tc_gred_sopt *sopt; 336 int i; 337 338 if (dps == NULL) 339 return -EINVAL; 340 341 sopt = nla_data(dps); 342 343 if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs) 344 return -EINVAL; 345 346 sch_tree_lock(sch); 347 table->DPs = sopt->DPs; 348 table->def = sopt->def_DP; 349 table->red_flags = sopt->flags; 350 351 /* 352 * Every entry point to GRED is synchronized with the above code 353 * and the DP is checked against DPs, i.e. shadowed VQs can no 354 * longer be found so we can unlock right here. 355 */ 356 sch_tree_unlock(sch); 357 358 if (sopt->grio) { 359 gred_enable_rio_mode(table); 360 gred_disable_wred_mode(table); 361 if (gred_wred_mode_check(sch)) 362 gred_enable_wred_mode(table); 363 } else { 364 gred_disable_rio_mode(table); 365 gred_disable_wred_mode(table); 366 } 367 368 for (i = table->DPs; i < MAX_DPs; i++) { 369 if (table->tab[i]) { 370 pr_warning("GRED: Warning: Destroying " 371 "shadowed VQ 0x%x\n", i); 372 gred_destroy_vq(table->tab[i]); 373 table->tab[i] = NULL; 374 } 375 } 376 377 return 0; 378 } 379 380 static inline int gred_change_vq(struct Qdisc *sch, int dp, 381 struct tc_gred_qopt *ctl, int prio, 382 u8 *stab, u32 max_P, 383 struct gred_sched_data **prealloc) 384 { 385 struct gred_sched *table = qdisc_priv(sch); 386 struct gred_sched_data *q = table->tab[dp]; 387 388 if (!q) { 389 table->tab[dp] = q = *prealloc; 390 *prealloc = NULL; 391 if (!q) 392 return -ENOMEM; 393 } 394 395 q->DP = dp; 396 q->prio = prio; 397 q->limit = ctl->limit; 398 399 if (q->backlog == 0) 400 red_end_of_idle_period(&q->vars); 401 402 red_set_parms(&q->parms, 403 ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, 404 ctl->Scell_log, stab, max_P); 405 red_set_vars(&q->vars); 406 return 0; 407 } 408 409 static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = { 410 [TCA_GRED_PARMS] = { .len = sizeof(struct tc_gred_qopt) }, 411 [TCA_GRED_STAB] = { .len = 256 }, 412 [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) }, 413 [TCA_GRED_MAX_P] = { .type = NLA_U32 }, 414 }; 415 416 static int gred_change(struct Qdisc *sch, struct nlattr *opt) 417 { 418 struct gred_sched *table = qdisc_priv(sch); 419 struct tc_gred_qopt *ctl; 420 struct nlattr *tb[TCA_GRED_MAX + 1]; 421 int err, prio = GRED_DEF_PRIO; 422 u8 *stab; 423 u32 max_P; 424 struct gred_sched_data *prealloc; 425 426 if (opt == NULL) 427 return -EINVAL; 428 429 err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy); 430 if (err < 0) 431 return err; 432 433 if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) 434 return gred_change_table_def(sch, opt); 435 436 if (tb[TCA_GRED_PARMS] == NULL || 437 tb[TCA_GRED_STAB] == NULL) 438 return -EINVAL; 439 440 max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0; 441 442 err = -EINVAL; 443 ctl = nla_data(tb[TCA_GRED_PARMS]); 444 stab = nla_data(tb[TCA_GRED_STAB]); 445 446 if (ctl->DP >= table->DPs) 447 goto errout; 448 449 if (gred_rio_mode(table)) { 450 if (ctl->prio == 0) { 451 int def_prio = GRED_DEF_PRIO; 452 453 if (table->tab[table->def]) 454 def_prio = table->tab[table->def]->prio; 455 456 printk(KERN_DEBUG "GRED: DP %u does not have a prio " 457 "setting default to %d\n", ctl->DP, def_prio); 458 459 prio = def_prio; 460 } else 461 prio = ctl->prio; 462 } 463 464 prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL); 465 sch_tree_lock(sch); 466 467 err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc); 468 if (err < 0) 469 goto errout_locked; 470 471 if (gred_rio_mode(table)) { 472 gred_disable_wred_mode(table); 473 if (gred_wred_mode_check(sch)) 474 gred_enable_wred_mode(table); 475 } 476 477 err = 0; 478 479 errout_locked: 480 sch_tree_unlock(sch); 481 kfree(prealloc); 482 errout: 483 return err; 484 } 485 486 static int gred_init(struct Qdisc *sch, struct nlattr *opt) 487 { 488 struct nlattr *tb[TCA_GRED_MAX + 1]; 489 int err; 490 491 if (opt == NULL) 492 return -EINVAL; 493 494 err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy); 495 if (err < 0) 496 return err; 497 498 if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) 499 return -EINVAL; 500 501 return gred_change_table_def(sch, tb[TCA_GRED_DPS]); 502 } 503 504 static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) 505 { 506 struct gred_sched *table = qdisc_priv(sch); 507 struct nlattr *parms, *opts = NULL; 508 int i; 509 u32 max_p[MAX_DPs]; 510 struct tc_gred_sopt sopt = { 511 .DPs = table->DPs, 512 .def_DP = table->def, 513 .grio = gred_rio_mode(table), 514 .flags = table->red_flags, 515 }; 516 517 opts = nla_nest_start(skb, TCA_OPTIONS); 518 if (opts == NULL) 519 goto nla_put_failure; 520 if (nla_put(skb, TCA_GRED_DPS, sizeof(sopt), &sopt)) 521 goto nla_put_failure; 522 523 for (i = 0; i < MAX_DPs; i++) { 524 struct gred_sched_data *q = table->tab[i]; 525 526 max_p[i] = q ? q->parms.max_P : 0; 527 } 528 if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p)) 529 goto nla_put_failure; 530 531 parms = nla_nest_start(skb, TCA_GRED_PARMS); 532 if (parms == NULL) 533 goto nla_put_failure; 534 535 for (i = 0; i < MAX_DPs; i++) { 536 struct gred_sched_data *q = table->tab[i]; 537 struct tc_gred_qopt opt; 538 539 memset(&opt, 0, sizeof(opt)); 540 541 if (!q) { 542 /* hack -- fix at some point with proper message 543 This is how we indicate to tc that there is no VQ 544 at this DP */ 545 546 opt.DP = MAX_DPs + i; 547 goto append_opt; 548 } 549 550 opt.limit = q->limit; 551 opt.DP = q->DP; 552 opt.backlog = q->backlog; 553 opt.prio = q->prio; 554 opt.qth_min = q->parms.qth_min >> q->parms.Wlog; 555 opt.qth_max = q->parms.qth_max >> q->parms.Wlog; 556 opt.Wlog = q->parms.Wlog; 557 opt.Plog = q->parms.Plog; 558 opt.Scell_log = q->parms.Scell_log; 559 opt.other = q->stats.other; 560 opt.early = q->stats.prob_drop; 561 opt.forced = q->stats.forced_drop; 562 opt.pdrop = q->stats.pdrop; 563 opt.packets = q->packetsin; 564 opt.bytesin = q->bytesin; 565 566 if (gred_wred_mode(table)) 567 gred_load_wred_set(table, q); 568 569 opt.qave = red_calc_qavg(&q->parms, &q->vars, q->vars.qavg); 570 571 append_opt: 572 if (nla_append(skb, sizeof(opt), &opt) < 0) 573 goto nla_put_failure; 574 } 575 576 nla_nest_end(skb, parms); 577 578 return nla_nest_end(skb, opts); 579 580 nla_put_failure: 581 nla_nest_cancel(skb, opts); 582 return -EMSGSIZE; 583 } 584 585 static void gred_destroy(struct Qdisc *sch) 586 { 587 struct gred_sched *table = qdisc_priv(sch); 588 int i; 589 590 for (i = 0; i < table->DPs; i++) { 591 if (table->tab[i]) 592 gred_destroy_vq(table->tab[i]); 593 } 594 } 595 596 static struct Qdisc_ops gred_qdisc_ops __read_mostly = { 597 .id = "gred", 598 .priv_size = sizeof(struct gred_sched), 599 .enqueue = gred_enqueue, 600 .dequeue = gred_dequeue, 601 .peek = qdisc_peek_head, 602 .drop = gred_drop, 603 .init = gred_init, 604 .reset = gred_reset, 605 .destroy = gred_destroy, 606 .change = gred_change, 607 .dump = gred_dump, 608 .owner = THIS_MODULE, 609 }; 610 611 static int __init gred_module_init(void) 612 { 613 return register_qdisc(&gred_qdisc_ops); 614 } 615 616 static void __exit gred_module_exit(void) 617 { 618 unregister_qdisc(&gred_qdisc_ops); 619 } 620 621 module_init(gred_module_init) 622 module_exit(gred_module_exit) 623 624 MODULE_LICENSE("GPL"); 625