1 /* 2 * net/sched/sch_red.c Random Early Detection queue. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * 11 * Changes: 12 * J Hadi Salim 980914: computation fixes 13 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly. 14 * J Hadi Salim 980816: ECN support 15 */ 16 17 #include <linux/module.h> 18 #include <linux/types.h> 19 #include <linux/kernel.h> 20 #include <linux/skbuff.h> 21 #include <net/pkt_sched.h> 22 #include <net/pkt_cls.h> 23 #include <net/inet_ecn.h> 24 #include <net/red.h> 25 26 27 /* Parameters, settable by user: 28 ----------------------------- 29 30 limit - bytes (must be > qth_max + burst) 31 32 Hard limit on queue length, should be chosen >qth_max 33 to allow packet bursts. This parameter does not 34 affect the algorithms behaviour and can be chosen 35 arbitrarily high (well, less than ram size) 36 Really, this limit will never be reached 37 if RED works correctly. 38 */ 39 40 struct red_sched_data { 41 u32 limit; /* HARD maximal queue length */ 42 unsigned char flags; 43 struct timer_list adapt_timer; 44 struct Qdisc *sch; 45 struct red_parms parms; 46 struct red_vars vars; 47 struct red_stats stats; 48 struct Qdisc *qdisc; 49 }; 50 51 static inline int red_use_ecn(struct red_sched_data *q) 52 { 53 return q->flags & TC_RED_ECN; 54 } 55 56 static inline int red_use_harddrop(struct red_sched_data *q) 57 { 58 return q->flags & TC_RED_HARDDROP; 59 } 60 61 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, 62 struct sk_buff **to_free) 63 { 64 struct red_sched_data *q = qdisc_priv(sch); 65 struct Qdisc *child = q->qdisc; 66 int ret; 67 68 q->vars.qavg = red_calc_qavg(&q->parms, 69 &q->vars, 70 child->qstats.backlog); 71 72 if (red_is_idling(&q->vars)) 73 red_end_of_idle_period(&q->vars); 74 75 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) { 76 case RED_DONT_MARK: 77 break; 78 79 case RED_PROB_MARK: 80 qdisc_qstats_overlimit(sch); 81 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { 82 q->stats.prob_drop++; 83 goto congestion_drop; 84 } 85 86 q->stats.prob_mark++; 87 break; 88 89 case RED_HARD_MARK: 90 qdisc_qstats_overlimit(sch); 91 if (red_use_harddrop(q) || !red_use_ecn(q) || 92 !INET_ECN_set_ce(skb)) { 93 q->stats.forced_drop++; 94 goto congestion_drop; 95 } 96 97 q->stats.forced_mark++; 98 break; 99 } 100 101 ret = qdisc_enqueue(skb, child, to_free); 102 if (likely(ret == NET_XMIT_SUCCESS)) { 103 qdisc_qstats_backlog_inc(sch, skb); 104 sch->q.qlen++; 105 } else if (net_xmit_drop_count(ret)) { 106 q->stats.pdrop++; 107 qdisc_qstats_drop(sch); 108 } 109 return ret; 110 111 congestion_drop: 112 qdisc_drop(skb, sch, to_free); 113 return NET_XMIT_CN; 114 } 115 116 static struct sk_buff *red_dequeue(struct Qdisc *sch) 117 { 118 struct sk_buff *skb; 119 struct red_sched_data *q = qdisc_priv(sch); 120 struct Qdisc *child = q->qdisc; 121 122 skb = child->dequeue(child); 123 if (skb) { 124 qdisc_bstats_update(sch, skb); 125 qdisc_qstats_backlog_dec(sch, skb); 126 sch->q.qlen--; 127 } else { 128 if (!red_is_idling(&q->vars)) 129 red_start_of_idle_period(&q->vars); 130 } 131 return skb; 132 } 133 134 static struct sk_buff *red_peek(struct Qdisc *sch) 135 { 136 struct red_sched_data *q = qdisc_priv(sch); 137 struct Qdisc *child = q->qdisc; 138 139 return child->ops->peek(child); 140 } 141 142 static void red_reset(struct Qdisc *sch) 143 { 144 struct red_sched_data *q = qdisc_priv(sch); 145 146 qdisc_reset(q->qdisc); 147 sch->qstats.backlog = 0; 148 sch->q.qlen = 0; 149 red_restart(&q->vars); 150 } 151 152 static int red_offload(struct Qdisc *sch, bool enable) 153 { 154 struct red_sched_data *q = qdisc_priv(sch); 155 struct net_device *dev = qdisc_dev(sch); 156 struct tc_red_qopt_offload opt = { 157 .handle = sch->handle, 158 .parent = sch->parent, 159 }; 160 int err; 161 162 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 163 return -EOPNOTSUPP; 164 165 if (enable) { 166 opt.command = TC_RED_REPLACE; 167 opt.set.min = q->parms.qth_min >> q->parms.Wlog; 168 opt.set.max = q->parms.qth_max >> q->parms.Wlog; 169 opt.set.probability = q->parms.max_P; 170 opt.set.is_ecn = red_use_ecn(q); 171 } else { 172 opt.command = TC_RED_DESTROY; 173 } 174 175 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); 176 177 if (!err && enable) 178 sch->flags |= TCQ_F_OFFLOADED; 179 else 180 sch->flags &= ~TCQ_F_OFFLOADED; 181 182 return err; 183 } 184 185 static void red_destroy(struct Qdisc *sch) 186 { 187 struct red_sched_data *q = qdisc_priv(sch); 188 189 del_timer_sync(&q->adapt_timer); 190 red_offload(sch, false); 191 qdisc_destroy(q->qdisc); 192 } 193 194 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { 195 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) }, 196 [TCA_RED_STAB] = { .len = RED_STAB_SIZE }, 197 [TCA_RED_MAX_P] = { .type = NLA_U32 }, 198 }; 199 200 static int red_change(struct Qdisc *sch, struct nlattr *opt) 201 { 202 struct red_sched_data *q = qdisc_priv(sch); 203 struct nlattr *tb[TCA_RED_MAX + 1]; 204 struct tc_red_qopt *ctl; 205 struct Qdisc *child = NULL; 206 int err; 207 u32 max_P; 208 209 if (opt == NULL) 210 return -EINVAL; 211 212 err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL); 213 if (err < 0) 214 return err; 215 216 if (tb[TCA_RED_PARMS] == NULL || 217 tb[TCA_RED_STAB] == NULL) 218 return -EINVAL; 219 220 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0; 221 222 ctl = nla_data(tb[TCA_RED_PARMS]); 223 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) 224 return -EINVAL; 225 226 if (ctl->limit > 0) { 227 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit); 228 if (IS_ERR(child)) 229 return PTR_ERR(child); 230 } 231 232 if (child != &noop_qdisc) 233 qdisc_hash_add(child, true); 234 sch_tree_lock(sch); 235 q->flags = ctl->flags; 236 q->limit = ctl->limit; 237 if (child) { 238 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, 239 q->qdisc->qstats.backlog); 240 qdisc_destroy(q->qdisc); 241 q->qdisc = child; 242 } 243 244 red_set_parms(&q->parms, 245 ctl->qth_min, ctl->qth_max, ctl->Wlog, 246 ctl->Plog, ctl->Scell_log, 247 nla_data(tb[TCA_RED_STAB]), 248 max_P); 249 red_set_vars(&q->vars); 250 251 del_timer(&q->adapt_timer); 252 if (ctl->flags & TC_RED_ADAPTATIVE) 253 mod_timer(&q->adapt_timer, jiffies + HZ/2); 254 255 if (!q->qdisc->q.qlen) 256 red_start_of_idle_period(&q->vars); 257 258 sch_tree_unlock(sch); 259 red_offload(sch, true); 260 return 0; 261 } 262 263 static inline void red_adaptative_timer(struct timer_list *t) 264 { 265 struct red_sched_data *q = from_timer(q, t, adapt_timer); 266 struct Qdisc *sch = q->sch; 267 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); 268 269 spin_lock(root_lock); 270 red_adaptative_algo(&q->parms, &q->vars); 271 mod_timer(&q->adapt_timer, jiffies + HZ/2); 272 spin_unlock(root_lock); 273 } 274 275 static int red_init(struct Qdisc *sch, struct nlattr *opt) 276 { 277 struct red_sched_data *q = qdisc_priv(sch); 278 279 q->qdisc = &noop_qdisc; 280 q->sch = sch; 281 timer_setup(&q->adapt_timer, red_adaptative_timer, 0); 282 return red_change(sch, opt); 283 } 284 285 static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt) 286 { 287 struct net_device *dev = qdisc_dev(sch); 288 struct tc_red_qopt_offload hw_stats = { 289 .command = TC_RED_STATS, 290 .handle = sch->handle, 291 .parent = sch->parent, 292 { 293 .stats.bstats = &sch->bstats, 294 .stats.qstats = &sch->qstats, 295 }, 296 }; 297 298 if (!(sch->flags & TCQ_F_OFFLOADED)) 299 return 0; 300 301 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, 302 &hw_stats); 303 } 304 305 static int red_dump(struct Qdisc *sch, struct sk_buff *skb) 306 { 307 struct red_sched_data *q = qdisc_priv(sch); 308 struct nlattr *opts = NULL; 309 struct tc_red_qopt opt = { 310 .limit = q->limit, 311 .flags = q->flags, 312 .qth_min = q->parms.qth_min >> q->parms.Wlog, 313 .qth_max = q->parms.qth_max >> q->parms.Wlog, 314 .Wlog = q->parms.Wlog, 315 .Plog = q->parms.Plog, 316 .Scell_log = q->parms.Scell_log, 317 }; 318 int err; 319 320 sch->qstats.backlog = q->qdisc->qstats.backlog; 321 err = red_dump_offload_stats(sch, &opt); 322 if (err) 323 goto nla_put_failure; 324 325 opts = nla_nest_start(skb, TCA_OPTIONS); 326 if (opts == NULL) 327 goto nla_put_failure; 328 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) || 329 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P)) 330 goto nla_put_failure; 331 return nla_nest_end(skb, opts); 332 333 nla_put_failure: 334 nla_nest_cancel(skb, opts); 335 return -EMSGSIZE; 336 } 337 338 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 339 { 340 struct red_sched_data *q = qdisc_priv(sch); 341 struct net_device *dev = qdisc_dev(sch); 342 struct tc_red_xstats st = { 343 .early = q->stats.prob_drop + q->stats.forced_drop, 344 .pdrop = q->stats.pdrop, 345 .other = q->stats.other, 346 .marked = q->stats.prob_mark + q->stats.forced_mark, 347 }; 348 349 if (sch->flags & TCQ_F_OFFLOADED) { 350 struct red_stats hw_stats = {0}; 351 struct tc_red_qopt_offload hw_stats_request = { 352 .command = TC_RED_XSTATS, 353 .handle = sch->handle, 354 .parent = sch->parent, 355 { 356 .xstats = &hw_stats, 357 }, 358 }; 359 if (!dev->netdev_ops->ndo_setup_tc(dev, 360 TC_SETUP_QDISC_RED, 361 &hw_stats_request)) { 362 st.early += hw_stats.prob_drop + hw_stats.forced_drop; 363 st.pdrop += hw_stats.pdrop; 364 st.other += hw_stats.other; 365 st.marked += hw_stats.prob_mark + hw_stats.forced_mark; 366 } 367 } 368 369 return gnet_stats_copy_app(d, &st, sizeof(st)); 370 } 371 372 static int red_dump_class(struct Qdisc *sch, unsigned long cl, 373 struct sk_buff *skb, struct tcmsg *tcm) 374 { 375 struct red_sched_data *q = qdisc_priv(sch); 376 377 tcm->tcm_handle |= TC_H_MIN(1); 378 tcm->tcm_info = q->qdisc->handle; 379 return 0; 380 } 381 382 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 383 struct Qdisc **old) 384 { 385 struct red_sched_data *q = qdisc_priv(sch); 386 387 if (new == NULL) 388 new = &noop_qdisc; 389 390 *old = qdisc_replace(sch, new, &q->qdisc); 391 return 0; 392 } 393 394 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg) 395 { 396 struct red_sched_data *q = qdisc_priv(sch); 397 return q->qdisc; 398 } 399 400 static unsigned long red_find(struct Qdisc *sch, u32 classid) 401 { 402 return 1; 403 } 404 405 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) 406 { 407 if (!walker->stop) { 408 if (walker->count >= walker->skip) 409 if (walker->fn(sch, 1, walker) < 0) { 410 walker->stop = 1; 411 return; 412 } 413 walker->count++; 414 } 415 } 416 417 static const struct Qdisc_class_ops red_class_ops = { 418 .graft = red_graft, 419 .leaf = red_leaf, 420 .find = red_find, 421 .walk = red_walk, 422 .dump = red_dump_class, 423 }; 424 425 static struct Qdisc_ops red_qdisc_ops __read_mostly = { 426 .id = "red", 427 .priv_size = sizeof(struct red_sched_data), 428 .cl_ops = &red_class_ops, 429 .enqueue = red_enqueue, 430 .dequeue = red_dequeue, 431 .peek = red_peek, 432 .init = red_init, 433 .reset = red_reset, 434 .destroy = red_destroy, 435 .change = red_change, 436 .dump = red_dump, 437 .dump_stats = red_dump_stats, 438 .owner = THIS_MODULE, 439 }; 440 441 static int __init red_module_init(void) 442 { 443 return register_qdisc(&red_qdisc_ops); 444 } 445 446 static void __exit red_module_exit(void) 447 { 448 unregister_qdisc(&red_qdisc_ops); 449 } 450 451 module_init(red_module_init) 452 module_exit(red_module_exit) 453 454 MODULE_LICENSE("GPL"); 455