1 /* 2 * net/sched/sch_gred.c Generic Random Early Detection queue. 3 * 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License 7 * as published by the Free Software Foundation; either version 8 * 2 of the License, or (at your option) any later version. 9 * 10 * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002 11 * 12 * 991129: - Bug fix with grio mode 13 * - a better sing. AvgQ mode with Grio(WRED) 14 * - A finer grained VQ dequeue based on sugestion 15 * from Ren Liu 16 * - More error checks 17 * 18 * For all the glorious comments look at include/net/red.h 19 */ 20 21 #include <linux/slab.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/skbuff.h> 26 #include <net/pkt_sched.h> 27 #include <net/red.h> 28 29 #define GRED_DEF_PRIO (MAX_DPs / 2) 30 #define GRED_VQ_MASK (MAX_DPs - 1) 31 32 struct gred_sched_data; 33 struct gred_sched; 34 35 struct gred_sched_data { 36 u32 limit; /* HARD maximal queue length */ 37 u32 DP; /* the drop parameters */ 38 u32 bytesin; /* bytes seen on virtualQ so far*/ 39 u32 packetsin; /* packets seen on virtualQ so far*/ 40 u32 backlog; /* bytes on the virtualQ */ 41 u8 prio; /* the prio of this vq */ 42 43 struct red_parms parms; 44 struct red_vars vars; 45 struct red_stats stats; 46 }; 47 48 enum { 49 GRED_WRED_MODE = 1, 50 GRED_RIO_MODE, 51 }; 52 53 struct gred_sched { 54 struct gred_sched_data *tab[MAX_DPs]; 55 unsigned long flags; 56 u32 red_flags; 57 u32 DPs; 58 u32 def; 59 struct red_vars wred_set; 60 }; 61 62 static inline int gred_wred_mode(struct gred_sched *table) 63 { 64 return test_bit(GRED_WRED_MODE, &table->flags); 65 } 66 67 static inline void gred_enable_wred_mode(struct gred_sched *table) 68 { 69 __set_bit(GRED_WRED_MODE, &table->flags); 70 } 71 72 static inline void gred_disable_wred_mode(struct gred_sched *table) 73 { 74 __clear_bit(GRED_WRED_MODE, &table->flags); 75 } 76 77 static inline int gred_rio_mode(struct gred_sched *table) 78 { 79 return test_bit(GRED_RIO_MODE, &table->flags); 80 } 81 82 static inline void gred_enable_rio_mode(struct gred_sched *table) 83 { 84 __set_bit(GRED_RIO_MODE, &table->flags); 85 } 86 87 static inline void gred_disable_rio_mode(struct gred_sched *table) 88 { 89 __clear_bit(GRED_RIO_MODE, &table->flags); 90 } 91 92 static inline int gred_wred_mode_check(struct Qdisc *sch) 93 { 94 struct gred_sched *table = qdisc_priv(sch); 95 int i; 96 97 /* Really ugly O(n^2) but shouldn't be necessary too frequent. */ 98 for (i = 0; i < table->DPs; i++) { 99 struct gred_sched_data *q = table->tab[i]; 100 int n; 101 102 if (q == NULL) 103 continue; 104 105 for (n = i + 1; n < table->DPs; n++) 106 if (table->tab[n] && table->tab[n]->prio == q->prio) 107 return 1; 108 } 109 110 return 0; 111 } 112 113 static inline unsigned int gred_backlog(struct gred_sched *table, 114 struct gred_sched_data *q, 115 struct Qdisc *sch) 116 { 117 if (gred_wred_mode(table)) 118 return sch->qstats.backlog; 119 else 120 return q->backlog; 121 } 122 123 static inline u16 tc_index_to_dp(struct sk_buff *skb) 124 { 125 return skb->tc_index & GRED_VQ_MASK; 126 } 127 128 static inline void gred_load_wred_set(const struct gred_sched *table, 129 struct gred_sched_data *q) 130 { 131 q->vars.qavg = table->wred_set.qavg; 132 q->vars.qidlestart = table->wred_set.qidlestart; 133 } 134 135 static inline void gred_store_wred_set(struct gred_sched *table, 136 struct gred_sched_data *q) 137 { 138 table->wred_set.qavg = q->vars.qavg; 139 table->wred_set.qidlestart = q->vars.qidlestart; 140 } 141 142 static inline int gred_use_ecn(struct gred_sched *t) 143 { 144 return t->red_flags & TC_RED_ECN; 145 } 146 147 static inline int gred_use_harddrop(struct gred_sched *t) 148 { 149 return t->red_flags & TC_RED_HARDDROP; 150 } 151 152 static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, 153 struct sk_buff **to_free) 154 { 155 struct gred_sched_data *q = NULL; 156 struct gred_sched *t = qdisc_priv(sch); 157 unsigned long qavg = 0; 158 u16 dp = tc_index_to_dp(skb); 159 160 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 161 dp = t->def; 162 163 q = t->tab[dp]; 164 if (!q) { 165 /* Pass through packets not assigned to a DP 166 * if no default DP has been configured. This 167 * allows for DP flows to be left untouched. 168 */ 169 if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= 170 sch->limit)) 171 return qdisc_enqueue_tail(skb, sch); 172 else 173 goto drop; 174 } 175 176 /* fix tc_index? --could be controversial but needed for 177 requeueing */ 178 skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; 179 } 180 181 /* sum up all the qaves of prios < ours to get the new qave */ 182 if (!gred_wred_mode(t) && gred_rio_mode(t)) { 183 int i; 184 185 for (i = 0; i < t->DPs; i++) { 186 if (t->tab[i] && t->tab[i]->prio < q->prio && 187 !red_is_idling(&t->tab[i]->vars)) 188 qavg += t->tab[i]->vars.qavg; 189 } 190 191 } 192 193 q->packetsin++; 194 q->bytesin += qdisc_pkt_len(skb); 195 196 if (gred_wred_mode(t)) 197 gred_load_wred_set(t, q); 198 199 q->vars.qavg = red_calc_qavg(&q->parms, 200 &q->vars, 201 gred_backlog(t, q, sch)); 202 203 if (red_is_idling(&q->vars)) 204 red_end_of_idle_period(&q->vars); 205 206 if (gred_wred_mode(t)) 207 gred_store_wred_set(t, q); 208 209 switch (red_action(&q->parms, &q->vars, q->vars.qavg + qavg)) { 210 case RED_DONT_MARK: 211 break; 212 213 case RED_PROB_MARK: 214 qdisc_qstats_overlimit(sch); 215 if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { 216 q->stats.prob_drop++; 217 goto congestion_drop; 218 } 219 220 q->stats.prob_mark++; 221 break; 222 223 case RED_HARD_MARK: 224 qdisc_qstats_overlimit(sch); 225 if (gred_use_harddrop(t) || !gred_use_ecn(t) || 226 !INET_ECN_set_ce(skb)) { 227 q->stats.forced_drop++; 228 goto congestion_drop; 229 } 230 q->stats.forced_mark++; 231 break; 232 } 233 234 if (gred_backlog(t, q, sch) + qdisc_pkt_len(skb) <= q->limit) { 235 q->backlog += qdisc_pkt_len(skb); 236 return qdisc_enqueue_tail(skb, sch); 237 } 238 239 q->stats.pdrop++; 240 drop: 241 return qdisc_drop(skb, sch, to_free); 242 243 congestion_drop: 244 qdisc_drop(skb, sch, to_free); 245 return NET_XMIT_CN; 246 } 247 248 static struct sk_buff *gred_dequeue(struct Qdisc *sch) 249 { 250 struct sk_buff *skb; 251 struct gred_sched *t = qdisc_priv(sch); 252 253 skb = qdisc_dequeue_head(sch); 254 255 if (skb) { 256 struct gred_sched_data *q; 257 u16 dp = tc_index_to_dp(skb); 258 259 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 260 net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x after dequeue, screwing up backlog\n", 261 tc_index_to_dp(skb)); 262 } else { 263 q->backlog -= qdisc_pkt_len(skb); 264 265 if (gred_wred_mode(t)) { 266 if (!sch->qstats.backlog) 267 red_start_of_idle_period(&t->wred_set); 268 } else { 269 if (!q->backlog) 270 red_start_of_idle_period(&q->vars); 271 } 272 } 273 274 return skb; 275 } 276 277 return NULL; 278 } 279 280 static void gred_reset(struct Qdisc *sch) 281 { 282 int i; 283 struct gred_sched *t = qdisc_priv(sch); 284 285 qdisc_reset_queue(sch); 286 287 for (i = 0; i < t->DPs; i++) { 288 struct gred_sched_data *q = t->tab[i]; 289 290 if (!q) 291 continue; 292 293 red_restart(&q->vars); 294 q->backlog = 0; 295 } 296 } 297 298 static inline void gred_destroy_vq(struct gred_sched_data *q) 299 { 300 kfree(q); 301 } 302 303 static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) 304 { 305 struct gred_sched *table = qdisc_priv(sch); 306 struct tc_gred_sopt *sopt; 307 int i; 308 309 if (!dps) 310 return -EINVAL; 311 312 sopt = nla_data(dps); 313 314 if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || 315 sopt->def_DP >= sopt->DPs) 316 return -EINVAL; 317 318 sch_tree_lock(sch); 319 table->DPs = sopt->DPs; 320 table->def = sopt->def_DP; 321 table->red_flags = sopt->flags; 322 323 /* 324 * Every entry point to GRED is synchronized with the above code 325 * and the DP is checked against DPs, i.e. shadowed VQs can no 326 * longer be found so we can unlock right here. 327 */ 328 sch_tree_unlock(sch); 329 330 if (sopt->grio) { 331 gred_enable_rio_mode(table); 332 gred_disable_wred_mode(table); 333 if (gred_wred_mode_check(sch)) 334 gred_enable_wred_mode(table); 335 } else { 336 gred_disable_rio_mode(table); 337 gred_disable_wred_mode(table); 338 } 339 340 for (i = table->DPs; i < MAX_DPs; i++) { 341 if (table->tab[i]) { 342 pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n", 343 i); 344 gred_destroy_vq(table->tab[i]); 345 table->tab[i] = NULL; 346 } 347 } 348 349 return 0; 350 } 351 352 static inline int gred_change_vq(struct Qdisc *sch, int dp, 353 struct tc_gred_qopt *ctl, int prio, 354 u8 *stab, u32 max_P, 355 struct gred_sched_data **prealloc) 356 { 357 struct gred_sched *table = qdisc_priv(sch); 358 struct gred_sched_data *q = table->tab[dp]; 359 360 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) 361 return -EINVAL; 362 363 if (!q) { 364 table->tab[dp] = q = *prealloc; 365 *prealloc = NULL; 366 if (!q) 367 return -ENOMEM; 368 } 369 370 q->DP = dp; 371 q->prio = prio; 372 if (ctl->limit > sch->limit) 373 q->limit = sch->limit; 374 else 375 q->limit = ctl->limit; 376 377 if (q->backlog == 0) 378 red_end_of_idle_period(&q->vars); 379 380 red_set_parms(&q->parms, 381 ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, 382 ctl->Scell_log, stab, max_P); 383 red_set_vars(&q->vars); 384 return 0; 385 } 386 387 static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = { 388 [TCA_GRED_PARMS] = { .len = sizeof(struct tc_gred_qopt) }, 389 [TCA_GRED_STAB] = { .len = 256 }, 390 [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) }, 391 [TCA_GRED_MAX_P] = { .type = NLA_U32 }, 392 [TCA_GRED_LIMIT] = { .type = NLA_U32 }, 393 }; 394 395 static int gred_change(struct Qdisc *sch, struct nlattr *opt, 396 struct netlink_ext_ack *extack) 397 { 398 struct gred_sched *table = qdisc_priv(sch); 399 struct tc_gred_qopt *ctl; 400 struct nlattr *tb[TCA_GRED_MAX + 1]; 401 int err, prio = GRED_DEF_PRIO; 402 u8 *stab; 403 u32 max_P; 404 struct gred_sched_data *prealloc; 405 406 if (opt == NULL) 407 return -EINVAL; 408 409 err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL); 410 if (err < 0) 411 return err; 412 413 if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) { 414 if (tb[TCA_GRED_LIMIT] != NULL) 415 sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); 416 return gred_change_table_def(sch, opt); 417 } 418 419 if (tb[TCA_GRED_PARMS] == NULL || 420 tb[TCA_GRED_STAB] == NULL || 421 tb[TCA_GRED_LIMIT] != NULL) 422 return -EINVAL; 423 424 max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0; 425 426 err = -EINVAL; 427 ctl = nla_data(tb[TCA_GRED_PARMS]); 428 stab = nla_data(tb[TCA_GRED_STAB]); 429 430 if (ctl->DP >= table->DPs) 431 goto errout; 432 433 if (gred_rio_mode(table)) { 434 if (ctl->prio == 0) { 435 int def_prio = GRED_DEF_PRIO; 436 437 if (table->tab[table->def]) 438 def_prio = table->tab[table->def]->prio; 439 440 printk(KERN_DEBUG "GRED: DP %u does not have a prio " 441 "setting default to %d\n", ctl->DP, def_prio); 442 443 prio = def_prio; 444 } else 445 prio = ctl->prio; 446 } 447 448 prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL); 449 sch_tree_lock(sch); 450 451 err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc); 452 if (err < 0) 453 goto errout_locked; 454 455 if (gred_rio_mode(table)) { 456 gred_disable_wred_mode(table); 457 if (gred_wred_mode_check(sch)) 458 gred_enable_wred_mode(table); 459 } 460 461 err = 0; 462 463 errout_locked: 464 sch_tree_unlock(sch); 465 kfree(prealloc); 466 errout: 467 return err; 468 } 469 470 static int gred_init(struct Qdisc *sch, struct nlattr *opt, 471 struct netlink_ext_ack *extack) 472 { 473 struct nlattr *tb[TCA_GRED_MAX + 1]; 474 int err; 475 476 if (!opt) 477 return -EINVAL; 478 479 err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL); 480 if (err < 0) 481 return err; 482 483 if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) 484 return -EINVAL; 485 486 if (tb[TCA_GRED_LIMIT]) 487 sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); 488 else 489 sch->limit = qdisc_dev(sch)->tx_queue_len 490 * psched_mtu(qdisc_dev(sch)); 491 492 return gred_change_table_def(sch, tb[TCA_GRED_DPS]); 493 } 494 495 static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) 496 { 497 struct gred_sched *table = qdisc_priv(sch); 498 struct nlattr *parms, *opts = NULL; 499 int i; 500 u32 max_p[MAX_DPs]; 501 struct tc_gred_sopt sopt = { 502 .DPs = table->DPs, 503 .def_DP = table->def, 504 .grio = gred_rio_mode(table), 505 .flags = table->red_flags, 506 }; 507 508 opts = nla_nest_start(skb, TCA_OPTIONS); 509 if (opts == NULL) 510 goto nla_put_failure; 511 if (nla_put(skb, TCA_GRED_DPS, sizeof(sopt), &sopt)) 512 goto nla_put_failure; 513 514 for (i = 0; i < MAX_DPs; i++) { 515 struct gred_sched_data *q = table->tab[i]; 516 517 max_p[i] = q ? q->parms.max_P : 0; 518 } 519 if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p)) 520 goto nla_put_failure; 521 522 if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit)) 523 goto nla_put_failure; 524 525 parms = nla_nest_start(skb, TCA_GRED_PARMS); 526 if (parms == NULL) 527 goto nla_put_failure; 528 529 for (i = 0; i < MAX_DPs; i++) { 530 struct gred_sched_data *q = table->tab[i]; 531 struct tc_gred_qopt opt; 532 unsigned long qavg; 533 534 memset(&opt, 0, sizeof(opt)); 535 536 if (!q) { 537 /* hack -- fix at some point with proper message 538 This is how we indicate to tc that there is no VQ 539 at this DP */ 540 541 opt.DP = MAX_DPs + i; 542 goto append_opt; 543 } 544 545 opt.limit = q->limit; 546 opt.DP = q->DP; 547 opt.backlog = gred_backlog(table, q, sch); 548 opt.prio = q->prio; 549 opt.qth_min = q->parms.qth_min >> q->parms.Wlog; 550 opt.qth_max = q->parms.qth_max >> q->parms.Wlog; 551 opt.Wlog = q->parms.Wlog; 552 opt.Plog = q->parms.Plog; 553 opt.Scell_log = q->parms.Scell_log; 554 opt.other = q->stats.other; 555 opt.early = q->stats.prob_drop; 556 opt.forced = q->stats.forced_drop; 557 opt.pdrop = q->stats.pdrop; 558 opt.packets = q->packetsin; 559 opt.bytesin = q->bytesin; 560 561 if (gred_wred_mode(table)) 562 gred_load_wred_set(table, q); 563 564 qavg = red_calc_qavg(&q->parms, &q->vars, 565 q->vars.qavg >> q->parms.Wlog); 566 opt.qave = qavg >> q->parms.Wlog; 567 568 append_opt: 569 if (nla_append(skb, sizeof(opt), &opt) < 0) 570 goto nla_put_failure; 571 } 572 573 nla_nest_end(skb, parms); 574 575 return nla_nest_end(skb, opts); 576 577 nla_put_failure: 578 nla_nest_cancel(skb, opts); 579 return -EMSGSIZE; 580 } 581 582 static void gred_destroy(struct Qdisc *sch) 583 { 584 struct gred_sched *table = qdisc_priv(sch); 585 int i; 586 587 for (i = 0; i < table->DPs; i++) { 588 if (table->tab[i]) 589 gred_destroy_vq(table->tab[i]); 590 } 591 } 592 593 static struct Qdisc_ops gred_qdisc_ops __read_mostly = { 594 .id = "gred", 595 .priv_size = sizeof(struct gred_sched), 596 .enqueue = gred_enqueue, 597 .dequeue = gred_dequeue, 598 .peek = qdisc_peek_head, 599 .init = gred_init, 600 .reset = gred_reset, 601 .destroy = gred_destroy, 602 .change = gred_change, 603 .dump = gred_dump, 604 .owner = THIS_MODULE, 605 }; 606 607 static int __init gred_module_init(void) 608 { 609 return register_qdisc(&gred_qdisc_ops); 610 } 611 612 static void __exit gred_module_exit(void) 613 { 614 unregister_qdisc(&gred_qdisc_ops); 615 } 616 617 module_init(gred_module_init) 618 module_exit(gred_module_exit) 619 620 MODULE_LICENSE("GPL"); 621