1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * net/sched/sch_red.c Random Early Detection queue. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 * 7 * Changes: 8 * J Hadi Salim 980914: computation fixes 9 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly. 10 * J Hadi Salim 980816: ECN support 11 */ 12 13 #include <linux/module.h> 14 #include <linux/types.h> 15 #include <linux/kernel.h> 16 #include <linux/skbuff.h> 17 #include <net/pkt_sched.h> 18 #include <net/pkt_cls.h> 19 #include <net/inet_ecn.h> 20 #include <net/red.h> 21 22 23 /* Parameters, settable by user: 24 ----------------------------- 25 26 limit - bytes (must be > qth_max + burst) 27 28 Hard limit on queue length, should be chosen >qth_max 29 to allow packet bursts. This parameter does not 30 affect the algorithms behaviour and can be chosen 31 arbitrarily high (well, less than ram size) 32 Really, this limit will never be reached 33 if RED works correctly. 34 */ 35 36 struct red_sched_data { 37 u32 limit; /* HARD maximal queue length */ 38 39 unsigned char flags; 40 /* Non-flags in tc_red_qopt.flags. */ 41 unsigned char userbits; 42 43 struct timer_list adapt_timer; 44 struct Qdisc *sch; 45 struct red_parms parms; 46 struct red_vars vars; 47 struct red_stats stats; 48 struct Qdisc *qdisc; 49 }; 50 51 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP) 52 53 static inline int red_use_ecn(struct red_sched_data *q) 54 { 55 return q->flags & TC_RED_ECN; 56 } 57 58 static inline int red_use_harddrop(struct red_sched_data *q) 59 { 60 return q->flags & TC_RED_HARDDROP; 61 } 62 63 static int red_use_nodrop(struct red_sched_data *q) 64 { 65 return q->flags & TC_RED_NODROP; 66 } 67 68 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, 69 struct sk_buff **to_free) 70 { 71 struct red_sched_data *q = qdisc_priv(sch); 72 struct Qdisc *child = q->qdisc; 73 int ret; 74 75 q->vars.qavg = red_calc_qavg(&q->parms, 76 &q->vars, 77 child->qstats.backlog); 78 79 if (red_is_idling(&q->vars)) 80 red_end_of_idle_period(&q->vars); 81 82 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) { 83 case RED_DONT_MARK: 84 break; 85 86 case RED_PROB_MARK: 87 qdisc_qstats_overlimit(sch); 88 if (!red_use_ecn(q)) { 89 q->stats.prob_drop++; 90 goto congestion_drop; 91 } 92 93 if (INET_ECN_set_ce(skb)) { 94 q->stats.prob_mark++; 95 } else if (!red_use_nodrop(q)) { 96 q->stats.prob_drop++; 97 goto congestion_drop; 98 } 99 100 /* Non-ECT packet in ECN nodrop mode: queue it. */ 101 break; 102 103 case RED_HARD_MARK: 104 qdisc_qstats_overlimit(sch); 105 if (red_use_harddrop(q) || !red_use_ecn(q)) { 106 q->stats.forced_drop++; 107 goto congestion_drop; 108 } 109 110 if (INET_ECN_set_ce(skb)) { 111 q->stats.forced_mark++; 112 } else if (!red_use_nodrop(q)) { 113 q->stats.forced_drop++; 114 goto congestion_drop; 115 } 116 117 /* Non-ECT packet in ECN nodrop mode: queue it. */ 118 break; 119 } 120 121 ret = qdisc_enqueue(skb, child, to_free); 122 if (likely(ret == NET_XMIT_SUCCESS)) { 123 qdisc_qstats_backlog_inc(sch, skb); 124 sch->q.qlen++; 125 } else if (net_xmit_drop_count(ret)) { 126 q->stats.pdrop++; 127 qdisc_qstats_drop(sch); 128 } 129 return ret; 130 131 congestion_drop: 132 qdisc_drop(skb, sch, to_free); 133 return NET_XMIT_CN; 134 } 135 136 static struct sk_buff *red_dequeue(struct Qdisc *sch) 137 { 138 struct sk_buff *skb; 139 struct red_sched_data *q = qdisc_priv(sch); 140 struct Qdisc *child = q->qdisc; 141 142 skb = child->dequeue(child); 143 if (skb) { 144 qdisc_bstats_update(sch, skb); 145 qdisc_qstats_backlog_dec(sch, skb); 146 sch->q.qlen--; 147 } else { 148 if (!red_is_idling(&q->vars)) 149 red_start_of_idle_period(&q->vars); 150 } 151 return skb; 152 } 153 154 static struct sk_buff *red_peek(struct Qdisc *sch) 155 { 156 struct red_sched_data *q = qdisc_priv(sch); 157 struct Qdisc *child = q->qdisc; 158 159 return child->ops->peek(child); 160 } 161 162 static void red_reset(struct Qdisc *sch) 163 { 164 struct red_sched_data *q = qdisc_priv(sch); 165 166 qdisc_reset(q->qdisc); 167 sch->qstats.backlog = 0; 168 sch->q.qlen = 0; 169 red_restart(&q->vars); 170 } 171 172 static int red_offload(struct Qdisc *sch, bool enable) 173 { 174 struct red_sched_data *q = qdisc_priv(sch); 175 struct net_device *dev = qdisc_dev(sch); 176 struct tc_red_qopt_offload opt = { 177 .handle = sch->handle, 178 .parent = sch->parent, 179 }; 180 181 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 182 return -EOPNOTSUPP; 183 184 if (enable) { 185 opt.command = TC_RED_REPLACE; 186 opt.set.min = q->parms.qth_min >> q->parms.Wlog; 187 opt.set.max = q->parms.qth_max >> q->parms.Wlog; 188 opt.set.probability = q->parms.max_P; 189 opt.set.limit = q->limit; 190 opt.set.is_ecn = red_use_ecn(q); 191 opt.set.is_harddrop = red_use_harddrop(q); 192 opt.set.is_nodrop = red_use_nodrop(q); 193 opt.set.qstats = &sch->qstats; 194 } else { 195 opt.command = TC_RED_DESTROY; 196 } 197 198 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); 199 } 200 201 static void red_destroy(struct Qdisc *sch) 202 { 203 struct red_sched_data *q = qdisc_priv(sch); 204 205 del_timer_sync(&q->adapt_timer); 206 red_offload(sch, false); 207 qdisc_put(q->qdisc); 208 } 209 210 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { 211 [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS }, 212 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) }, 213 [TCA_RED_STAB] = { .len = RED_STAB_SIZE }, 214 [TCA_RED_MAX_P] = { .type = NLA_U32 }, 215 [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS), 216 }; 217 218 static int red_change(struct Qdisc *sch, struct nlattr *opt, 219 struct netlink_ext_ack *extack) 220 { 221 struct Qdisc *old_child = NULL, *child = NULL; 222 struct red_sched_data *q = qdisc_priv(sch); 223 struct nlattr *tb[TCA_RED_MAX + 1]; 224 struct nla_bitfield32 flags_bf; 225 struct tc_red_qopt *ctl; 226 unsigned char userbits; 227 unsigned char flags; 228 int err; 229 u32 max_P; 230 231 if (opt == NULL) 232 return -EINVAL; 233 234 err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy, 235 NULL); 236 if (err < 0) 237 return err; 238 239 if (tb[TCA_RED_PARMS] == NULL || 240 tb[TCA_RED_STAB] == NULL) 241 return -EINVAL; 242 243 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0; 244 245 ctl = nla_data(tb[TCA_RED_PARMS]); 246 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) 247 return -EINVAL; 248 249 err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS, 250 tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS, 251 &flags_bf, &userbits, extack); 252 if (err) 253 return err; 254 255 if (ctl->limit > 0) { 256 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit, 257 extack); 258 if (IS_ERR(child)) 259 return PTR_ERR(child); 260 261 /* child is fifo, no need to check for noop_qdisc */ 262 qdisc_hash_add(child, true); 263 } 264 265 sch_tree_lock(sch); 266 267 flags = (q->flags & ~flags_bf.selector) | flags_bf.value; 268 err = red_validate_flags(flags, extack); 269 if (err) 270 goto unlock_out; 271 272 q->flags = flags; 273 q->userbits = userbits; 274 q->limit = ctl->limit; 275 if (child) { 276 qdisc_tree_flush_backlog(q->qdisc); 277 old_child = q->qdisc; 278 q->qdisc = child; 279 } 280 281 red_set_parms(&q->parms, 282 ctl->qth_min, ctl->qth_max, ctl->Wlog, 283 ctl->Plog, ctl->Scell_log, 284 nla_data(tb[TCA_RED_STAB]), 285 max_P); 286 red_set_vars(&q->vars); 287 288 del_timer(&q->adapt_timer); 289 if (ctl->flags & TC_RED_ADAPTATIVE) 290 mod_timer(&q->adapt_timer, jiffies + HZ/2); 291 292 if (!q->qdisc->q.qlen) 293 red_start_of_idle_period(&q->vars); 294 295 sch_tree_unlock(sch); 296 297 red_offload(sch, true); 298 299 if (old_child) 300 qdisc_put(old_child); 301 return 0; 302 303 unlock_out: 304 sch_tree_unlock(sch); 305 if (child) 306 qdisc_put(child); 307 return err; 308 } 309 310 static inline void red_adaptative_timer(struct timer_list *t) 311 { 312 struct red_sched_data *q = from_timer(q, t, adapt_timer); 313 struct Qdisc *sch = q->sch; 314 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); 315 316 spin_lock(root_lock); 317 red_adaptative_algo(&q->parms, &q->vars); 318 mod_timer(&q->adapt_timer, jiffies + HZ/2); 319 spin_unlock(root_lock); 320 } 321 322 static int red_init(struct Qdisc *sch, struct nlattr *opt, 323 struct netlink_ext_ack *extack) 324 { 325 struct red_sched_data *q = qdisc_priv(sch); 326 327 q->qdisc = &noop_qdisc; 328 q->sch = sch; 329 timer_setup(&q->adapt_timer, red_adaptative_timer, 0); 330 return red_change(sch, opt, extack); 331 } 332 333 static int red_dump_offload_stats(struct Qdisc *sch) 334 { 335 struct tc_red_qopt_offload hw_stats = { 336 .command = TC_RED_STATS, 337 .handle = sch->handle, 338 .parent = sch->parent, 339 { 340 .stats.bstats = &sch->bstats, 341 .stats.qstats = &sch->qstats, 342 }, 343 }; 344 345 return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats); 346 } 347 348 static int red_dump(struct Qdisc *sch, struct sk_buff *skb) 349 { 350 struct red_sched_data *q = qdisc_priv(sch); 351 struct nlattr *opts = NULL; 352 struct tc_red_qopt opt = { 353 .limit = q->limit, 354 .flags = (q->flags & TC_RED_HISTORIC_FLAGS) | 355 q->userbits, 356 .qth_min = q->parms.qth_min >> q->parms.Wlog, 357 .qth_max = q->parms.qth_max >> q->parms.Wlog, 358 .Wlog = q->parms.Wlog, 359 .Plog = q->parms.Plog, 360 .Scell_log = q->parms.Scell_log, 361 }; 362 int err; 363 364 err = red_dump_offload_stats(sch); 365 if (err) 366 goto nla_put_failure; 367 368 opts = nla_nest_start_noflag(skb, TCA_OPTIONS); 369 if (opts == NULL) 370 goto nla_put_failure; 371 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) || 372 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) || 373 nla_put_bitfield32(skb, TCA_RED_FLAGS, 374 q->flags, TC_RED_SUPPORTED_FLAGS)) 375 goto nla_put_failure; 376 return nla_nest_end(skb, opts); 377 378 nla_put_failure: 379 nla_nest_cancel(skb, opts); 380 return -EMSGSIZE; 381 } 382 383 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 384 { 385 struct red_sched_data *q = qdisc_priv(sch); 386 struct net_device *dev = qdisc_dev(sch); 387 struct tc_red_xstats st = {0}; 388 389 if (sch->flags & TCQ_F_OFFLOADED) { 390 struct tc_red_qopt_offload hw_stats_request = { 391 .command = TC_RED_XSTATS, 392 .handle = sch->handle, 393 .parent = sch->parent, 394 { 395 .xstats = &q->stats, 396 }, 397 }; 398 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, 399 &hw_stats_request); 400 } 401 st.early = q->stats.prob_drop + q->stats.forced_drop; 402 st.pdrop = q->stats.pdrop; 403 st.other = q->stats.other; 404 st.marked = q->stats.prob_mark + q->stats.forced_mark; 405 406 return gnet_stats_copy_app(d, &st, sizeof(st)); 407 } 408 409 static int red_dump_class(struct Qdisc *sch, unsigned long cl, 410 struct sk_buff *skb, struct tcmsg *tcm) 411 { 412 struct red_sched_data *q = qdisc_priv(sch); 413 414 tcm->tcm_handle |= TC_H_MIN(1); 415 tcm->tcm_info = q->qdisc->handle; 416 return 0; 417 } 418 419 static void red_graft_offload(struct Qdisc *sch, 420 struct Qdisc *new, struct Qdisc *old, 421 struct netlink_ext_ack *extack) 422 { 423 struct tc_red_qopt_offload graft_offload = { 424 .handle = sch->handle, 425 .parent = sch->parent, 426 .child_handle = new->handle, 427 .command = TC_RED_GRAFT, 428 }; 429 430 qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old, 431 TC_SETUP_QDISC_RED, &graft_offload, extack); 432 } 433 434 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 435 struct Qdisc **old, struct netlink_ext_ack *extack) 436 { 437 struct red_sched_data *q = qdisc_priv(sch); 438 439 if (new == NULL) 440 new = &noop_qdisc; 441 442 *old = qdisc_replace(sch, new, &q->qdisc); 443 444 red_graft_offload(sch, new, *old, extack); 445 return 0; 446 } 447 448 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg) 449 { 450 struct red_sched_data *q = qdisc_priv(sch); 451 return q->qdisc; 452 } 453 454 static unsigned long red_find(struct Qdisc *sch, u32 classid) 455 { 456 return 1; 457 } 458 459 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) 460 { 461 if (!walker->stop) { 462 if (walker->count >= walker->skip) 463 if (walker->fn(sch, 1, walker) < 0) { 464 walker->stop = 1; 465 return; 466 } 467 walker->count++; 468 } 469 } 470 471 static const struct Qdisc_class_ops red_class_ops = { 472 .graft = red_graft, 473 .leaf = red_leaf, 474 .find = red_find, 475 .walk = red_walk, 476 .dump = red_dump_class, 477 }; 478 479 static struct Qdisc_ops red_qdisc_ops __read_mostly = { 480 .id = "red", 481 .priv_size = sizeof(struct red_sched_data), 482 .cl_ops = &red_class_ops, 483 .enqueue = red_enqueue, 484 .dequeue = red_dequeue, 485 .peek = red_peek, 486 .init = red_init, 487 .reset = red_reset, 488 .destroy = red_destroy, 489 .change = red_change, 490 .dump = red_dump, 491 .dump_stats = red_dump_stats, 492 .owner = THIS_MODULE, 493 }; 494 495 static int __init red_module_init(void) 496 { 497 return register_qdisc(&red_qdisc_ops); 498 } 499 500 static void __exit red_module_exit(void) 501 { 502 unregister_qdisc(&red_qdisc_ops); 503 } 504 505 module_init(red_module_init) 506 module_exit(red_module_exit) 507 508 MODULE_LICENSE("GPL"); 509