1 /* 2 * net/sched/sch_red.c Random Early Detection queue. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * 11 * Changes: 12 * J Hadi Salim 980914: computation fixes 13 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly. 14 * J Hadi Salim 980816: ECN support 15 */ 16 17 #include <linux/module.h> 18 #include <linux/types.h> 19 #include <linux/kernel.h> 20 #include <linux/skbuff.h> 21 #include <net/pkt_sched.h> 22 #include <net/pkt_cls.h> 23 #include <net/inet_ecn.h> 24 #include <net/red.h> 25 26 27 /* Parameters, settable by user: 28 ----------------------------- 29 30 limit - bytes (must be > qth_max + burst) 31 32 Hard limit on queue length, should be chosen >qth_max 33 to allow packet bursts. This parameter does not 34 affect the algorithms behaviour and can be chosen 35 arbitrarily high (well, less than ram size) 36 Really, this limit will never be reached 37 if RED works correctly. 38 */ 39 40 struct red_sched_data { 41 u32 limit; /* HARD maximal queue length */ 42 unsigned char flags; 43 struct timer_list adapt_timer; 44 struct Qdisc *sch; 45 struct red_parms parms; 46 struct red_vars vars; 47 struct red_stats stats; 48 struct Qdisc *qdisc; 49 }; 50 51 static inline int red_use_ecn(struct red_sched_data *q) 52 { 53 return q->flags & TC_RED_ECN; 54 } 55 56 static inline int red_use_harddrop(struct red_sched_data *q) 57 { 58 return q->flags & TC_RED_HARDDROP; 59 } 60 61 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, 62 struct sk_buff **to_free) 63 { 64 struct red_sched_data *q = qdisc_priv(sch); 65 struct Qdisc *child = q->qdisc; 66 int ret; 67 68 q->vars.qavg = red_calc_qavg(&q->parms, 69 &q->vars, 70 child->qstats.backlog); 71 72 if (red_is_idling(&q->vars)) 73 red_end_of_idle_period(&q->vars); 74 75 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) { 76 case RED_DONT_MARK: 77 break; 78 79 case RED_PROB_MARK: 80 qdisc_qstats_overlimit(sch); 81 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { 82 q->stats.prob_drop++; 83 goto congestion_drop; 84 } 85 86 q->stats.prob_mark++; 87 break; 88 89 case RED_HARD_MARK: 90 qdisc_qstats_overlimit(sch); 91 if (red_use_harddrop(q) || !red_use_ecn(q) || 92 !INET_ECN_set_ce(skb)) { 93 q->stats.forced_drop++; 94 goto congestion_drop; 95 } 96 97 q->stats.forced_mark++; 98 break; 99 } 100 101 ret = qdisc_enqueue(skb, child, to_free); 102 if (likely(ret == NET_XMIT_SUCCESS)) { 103 qdisc_qstats_backlog_inc(sch, skb); 104 sch->q.qlen++; 105 } else if (net_xmit_drop_count(ret)) { 106 q->stats.pdrop++; 107 qdisc_qstats_drop(sch); 108 } 109 return ret; 110 111 congestion_drop: 112 qdisc_drop(skb, sch, to_free); 113 return NET_XMIT_CN; 114 } 115 116 static struct sk_buff *red_dequeue(struct Qdisc *sch) 117 { 118 struct sk_buff *skb; 119 struct red_sched_data *q = qdisc_priv(sch); 120 struct Qdisc *child = q->qdisc; 121 122 skb = child->dequeue(child); 123 if (skb) { 124 qdisc_bstats_update(sch, skb); 125 qdisc_qstats_backlog_dec(sch, skb); 126 sch->q.qlen--; 127 } else { 128 if (!red_is_idling(&q->vars)) 129 red_start_of_idle_period(&q->vars); 130 } 131 return skb; 132 } 133 134 static struct sk_buff *red_peek(struct Qdisc *sch) 135 { 136 struct red_sched_data *q = qdisc_priv(sch); 137 struct Qdisc *child = q->qdisc; 138 139 return child->ops->peek(child); 140 } 141 142 static void red_reset(struct Qdisc *sch) 143 { 144 struct red_sched_data *q = qdisc_priv(sch); 145 146 qdisc_reset(q->qdisc); 147 sch->qstats.backlog = 0; 148 sch->q.qlen = 0; 149 red_restart(&q->vars); 150 } 151 152 static int red_offload(struct Qdisc *sch, bool enable) 153 { 154 struct red_sched_data *q = qdisc_priv(sch); 155 struct net_device *dev = qdisc_dev(sch); 156 struct tc_red_qopt_offload opt = { 157 .handle = sch->handle, 158 .parent = sch->parent, 159 }; 160 161 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 162 return -EOPNOTSUPP; 163 164 if (enable) { 165 opt.command = TC_RED_REPLACE; 166 opt.set.min = q->parms.qth_min >> q->parms.Wlog; 167 opt.set.max = q->parms.qth_max >> q->parms.Wlog; 168 opt.set.probability = q->parms.max_P; 169 opt.set.is_ecn = red_use_ecn(q); 170 opt.set.qstats = &sch->qstats; 171 } else { 172 opt.command = TC_RED_DESTROY; 173 } 174 175 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); 176 } 177 178 static void red_destroy(struct Qdisc *sch) 179 { 180 struct red_sched_data *q = qdisc_priv(sch); 181 182 del_timer_sync(&q->adapt_timer); 183 red_offload(sch, false); 184 qdisc_destroy(q->qdisc); 185 } 186 187 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { 188 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) }, 189 [TCA_RED_STAB] = { .len = RED_STAB_SIZE }, 190 [TCA_RED_MAX_P] = { .type = NLA_U32 }, 191 }; 192 193 static int red_change(struct Qdisc *sch, struct nlattr *opt, 194 struct netlink_ext_ack *extack) 195 { 196 struct red_sched_data *q = qdisc_priv(sch); 197 struct nlattr *tb[TCA_RED_MAX + 1]; 198 struct tc_red_qopt *ctl; 199 struct Qdisc *child = NULL; 200 int err; 201 u32 max_P; 202 203 if (opt == NULL) 204 return -EINVAL; 205 206 err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL); 207 if (err < 0) 208 return err; 209 210 if (tb[TCA_RED_PARMS] == NULL || 211 tb[TCA_RED_STAB] == NULL) 212 return -EINVAL; 213 214 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0; 215 216 ctl = nla_data(tb[TCA_RED_PARMS]); 217 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) 218 return -EINVAL; 219 220 if (ctl->limit > 0) { 221 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit, 222 extack); 223 if (IS_ERR(child)) 224 return PTR_ERR(child); 225 226 /* child is fifo, no need to check for noop_qdisc */ 227 qdisc_hash_add(child, true); 228 } 229 230 sch_tree_lock(sch); 231 q->flags = ctl->flags; 232 q->limit = ctl->limit; 233 if (child) { 234 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, 235 q->qdisc->qstats.backlog); 236 qdisc_destroy(q->qdisc); 237 q->qdisc = child; 238 } 239 240 red_set_parms(&q->parms, 241 ctl->qth_min, ctl->qth_max, ctl->Wlog, 242 ctl->Plog, ctl->Scell_log, 243 nla_data(tb[TCA_RED_STAB]), 244 max_P); 245 red_set_vars(&q->vars); 246 247 del_timer(&q->adapt_timer); 248 if (ctl->flags & TC_RED_ADAPTATIVE) 249 mod_timer(&q->adapt_timer, jiffies + HZ/2); 250 251 if (!q->qdisc->q.qlen) 252 red_start_of_idle_period(&q->vars); 253 254 sch_tree_unlock(sch); 255 red_offload(sch, true); 256 return 0; 257 } 258 259 static inline void red_adaptative_timer(struct timer_list *t) 260 { 261 struct red_sched_data *q = from_timer(q, t, adapt_timer); 262 struct Qdisc *sch = q->sch; 263 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); 264 265 spin_lock(root_lock); 266 red_adaptative_algo(&q->parms, &q->vars); 267 mod_timer(&q->adapt_timer, jiffies + HZ/2); 268 spin_unlock(root_lock); 269 } 270 271 static int red_init(struct Qdisc *sch, struct nlattr *opt, 272 struct netlink_ext_ack *extack) 273 { 274 struct red_sched_data *q = qdisc_priv(sch); 275 276 q->qdisc = &noop_qdisc; 277 q->sch = sch; 278 timer_setup(&q->adapt_timer, red_adaptative_timer, 0); 279 return red_change(sch, opt, extack); 280 } 281 282 static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt) 283 { 284 struct net_device *dev = qdisc_dev(sch); 285 struct tc_red_qopt_offload hw_stats = { 286 .command = TC_RED_STATS, 287 .handle = sch->handle, 288 .parent = sch->parent, 289 { 290 .stats.bstats = &sch->bstats, 291 .stats.qstats = &sch->qstats, 292 }, 293 }; 294 int err; 295 296 sch->flags &= ~TCQ_F_OFFLOADED; 297 298 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 299 return 0; 300 301 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, 302 &hw_stats); 303 if (err == -EOPNOTSUPP) 304 return 0; 305 306 if (!err) 307 sch->flags |= TCQ_F_OFFLOADED; 308 309 return err; 310 } 311 312 static int red_dump(struct Qdisc *sch, struct sk_buff *skb) 313 { 314 struct red_sched_data *q = qdisc_priv(sch); 315 struct nlattr *opts = NULL; 316 struct tc_red_qopt opt = { 317 .limit = q->limit, 318 .flags = q->flags, 319 .qth_min = q->parms.qth_min >> q->parms.Wlog, 320 .qth_max = q->parms.qth_max >> q->parms.Wlog, 321 .Wlog = q->parms.Wlog, 322 .Plog = q->parms.Plog, 323 .Scell_log = q->parms.Scell_log, 324 }; 325 int err; 326 327 err = red_dump_offload_stats(sch, &opt); 328 if (err) 329 goto nla_put_failure; 330 331 opts = nla_nest_start(skb, TCA_OPTIONS); 332 if (opts == NULL) 333 goto nla_put_failure; 334 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) || 335 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P)) 336 goto nla_put_failure; 337 return nla_nest_end(skb, opts); 338 339 nla_put_failure: 340 nla_nest_cancel(skb, opts); 341 return -EMSGSIZE; 342 } 343 344 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 345 { 346 struct red_sched_data *q = qdisc_priv(sch); 347 struct net_device *dev = qdisc_dev(sch); 348 struct tc_red_xstats st = {0}; 349 350 if (sch->flags & TCQ_F_OFFLOADED) { 351 struct tc_red_qopt_offload hw_stats_request = { 352 .command = TC_RED_XSTATS, 353 .handle = sch->handle, 354 .parent = sch->parent, 355 { 356 .xstats = &q->stats, 357 }, 358 }; 359 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, 360 &hw_stats_request); 361 } 362 st.early = q->stats.prob_drop + q->stats.forced_drop; 363 st.pdrop = q->stats.pdrop; 364 st.other = q->stats.other; 365 st.marked = q->stats.prob_mark + q->stats.forced_mark; 366 367 return gnet_stats_copy_app(d, &st, sizeof(st)); 368 } 369 370 static int red_dump_class(struct Qdisc *sch, unsigned long cl, 371 struct sk_buff *skb, struct tcmsg *tcm) 372 { 373 struct red_sched_data *q = qdisc_priv(sch); 374 375 tcm->tcm_handle |= TC_H_MIN(1); 376 tcm->tcm_info = q->qdisc->handle; 377 return 0; 378 } 379 380 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 381 struct Qdisc **old, struct netlink_ext_ack *extack) 382 { 383 struct red_sched_data *q = qdisc_priv(sch); 384 385 if (new == NULL) 386 new = &noop_qdisc; 387 388 *old = qdisc_replace(sch, new, &q->qdisc); 389 return 0; 390 } 391 392 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg) 393 { 394 struct red_sched_data *q = qdisc_priv(sch); 395 return q->qdisc; 396 } 397 398 static unsigned long red_find(struct Qdisc *sch, u32 classid) 399 { 400 return 1; 401 } 402 403 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) 404 { 405 if (!walker->stop) { 406 if (walker->count >= walker->skip) 407 if (walker->fn(sch, 1, walker) < 0) { 408 walker->stop = 1; 409 return; 410 } 411 walker->count++; 412 } 413 } 414 415 static const struct Qdisc_class_ops red_class_ops = { 416 .graft = red_graft, 417 .leaf = red_leaf, 418 .find = red_find, 419 .walk = red_walk, 420 .dump = red_dump_class, 421 }; 422 423 static struct Qdisc_ops red_qdisc_ops __read_mostly = { 424 .id = "red", 425 .priv_size = sizeof(struct red_sched_data), 426 .cl_ops = &red_class_ops, 427 .enqueue = red_enqueue, 428 .dequeue = red_dequeue, 429 .peek = red_peek, 430 .init = red_init, 431 .reset = red_reset, 432 .destroy = red_destroy, 433 .change = red_change, 434 .dump = red_dump, 435 .dump_stats = red_dump_stats, 436 .owner = THIS_MODULE, 437 }; 438 439 static int __init red_module_init(void) 440 { 441 return register_qdisc(&red_qdisc_ops); 442 } 443 444 static void __exit red_module_exit(void) 445 { 446 unregister_qdisc(&red_qdisc_ops); 447 } 448 449 module_init(red_module_init) 450 module_exit(red_module_exit) 451 452 MODULE_LICENSE("GPL"); 453