1 /* 2 * net/sched/sch_prio.c Simple 3-band priority "scheduler". 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * Fixes: 19990609: J Hadi Salim <hadi@nortelnetworks.com>: 11 * Init -- EINVAL when opt undefined 12 */ 13 14 #include <linux/module.h> 15 #include <linux/slab.h> 16 #include <linux/types.h> 17 #include <linux/kernel.h> 18 #include <linux/string.h> 19 #include <linux/errno.h> 20 #include <linux/skbuff.h> 21 #include <net/netlink.h> 22 #include <net/pkt_sched.h> 23 #include <net/pkt_cls.h> 24 25 struct prio_sched_data { 26 int bands; 27 struct tcf_proto __rcu *filter_list; 28 struct tcf_block *block; 29 u8 prio2band[TC_PRIO_MAX+1]; 30 struct Qdisc *queues[TCQ_PRIO_BANDS]; 31 }; 32 33 34 static struct Qdisc * 35 prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) 36 { 37 struct prio_sched_data *q = qdisc_priv(sch); 38 u32 band = skb->priority; 39 struct tcf_result res; 40 struct tcf_proto *fl; 41 int err; 42 43 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 44 if (TC_H_MAJ(skb->priority) != sch->handle) { 45 fl = rcu_dereference_bh(q->filter_list); 46 err = tcf_classify(skb, fl, &res, false); 47 #ifdef CONFIG_NET_CLS_ACT 48 switch (err) { 49 case TC_ACT_STOLEN: 50 case TC_ACT_QUEUED: 51 case TC_ACT_TRAP: 52 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 53 case TC_ACT_SHOT: 54 return NULL; 55 } 56 #endif 57 if (!fl || err < 0) { 58 if (TC_H_MAJ(band)) 59 band = 0; 60 return q->queues[q->prio2band[band & TC_PRIO_MAX]]; 61 } 62 band = res.classid; 63 } 64 band = TC_H_MIN(band) - 1; 65 if (band >= q->bands) 66 return q->queues[q->prio2band[0]]; 67 68 return q->queues[band]; 69 } 70 71 static int 72 prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) 73 { 74 struct Qdisc *qdisc; 75 int ret; 76 77 qdisc = prio_classify(skb, sch, &ret); 78 #ifdef CONFIG_NET_CLS_ACT 79 if (qdisc == NULL) { 80 81 if (ret & __NET_XMIT_BYPASS) 82 qdisc_qstats_drop(sch); 83 __qdisc_drop(skb, to_free); 84 return ret; 85 } 86 #endif 87 88 ret = qdisc_enqueue(skb, qdisc, to_free); 89 if (ret == NET_XMIT_SUCCESS) { 90 qdisc_qstats_backlog_inc(sch, skb); 91 sch->q.qlen++; 92 return NET_XMIT_SUCCESS; 93 } 94 if (net_xmit_drop_count(ret)) 95 qdisc_qstats_drop(sch); 96 return ret; 97 } 98 99 static struct sk_buff *prio_peek(struct Qdisc *sch) 100 { 101 struct prio_sched_data *q = qdisc_priv(sch); 102 int prio; 103 104 for (prio = 0; prio < q->bands; prio++) { 105 struct Qdisc *qdisc = q->queues[prio]; 106 struct sk_buff *skb = qdisc->ops->peek(qdisc); 107 if (skb) 108 return skb; 109 } 110 return NULL; 111 } 112 113 static struct sk_buff *prio_dequeue(struct Qdisc *sch) 114 { 115 struct prio_sched_data *q = qdisc_priv(sch); 116 int prio; 117 118 for (prio = 0; prio < q->bands; prio++) { 119 struct Qdisc *qdisc = q->queues[prio]; 120 struct sk_buff *skb = qdisc_dequeue_peeked(qdisc); 121 if (skb) { 122 qdisc_bstats_update(sch, skb); 123 qdisc_qstats_backlog_dec(sch, skb); 124 sch->q.qlen--; 125 return skb; 126 } 127 } 128 return NULL; 129 130 } 131 132 static void 133 prio_reset(struct Qdisc *sch) 134 { 135 int prio; 136 struct prio_sched_data *q = qdisc_priv(sch); 137 138 for (prio = 0; prio < q->bands; prio++) 139 qdisc_reset(q->queues[prio]); 140 sch->qstats.backlog = 0; 141 sch->q.qlen = 0; 142 } 143 144 static void 145 prio_destroy(struct Qdisc *sch) 146 { 147 int prio; 148 struct prio_sched_data *q = qdisc_priv(sch); 149 150 tcf_block_put(q->block); 151 for (prio = 0; prio < q->bands; prio++) 152 qdisc_destroy(q->queues[prio]); 153 } 154 155 static int prio_tune(struct Qdisc *sch, struct nlattr *opt) 156 { 157 struct prio_sched_data *q = qdisc_priv(sch); 158 struct Qdisc *queues[TCQ_PRIO_BANDS]; 159 int oldbands = q->bands, i; 160 struct tc_prio_qopt *qopt; 161 162 if (nla_len(opt) < sizeof(*qopt)) 163 return -EINVAL; 164 qopt = nla_data(opt); 165 166 if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) 167 return -EINVAL; 168 169 for (i = 0; i <= TC_PRIO_MAX; i++) { 170 if (qopt->priomap[i] >= qopt->bands) 171 return -EINVAL; 172 } 173 174 /* Before commit, make sure we can allocate all new qdiscs */ 175 for (i = oldbands; i < qopt->bands; i++) { 176 queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, 177 TC_H_MAKE(sch->handle, i + 1)); 178 if (!queues[i]) { 179 while (i > oldbands) 180 qdisc_destroy(queues[--i]); 181 return -ENOMEM; 182 } 183 } 184 185 sch_tree_lock(sch); 186 q->bands = qopt->bands; 187 memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); 188 189 for (i = q->bands; i < oldbands; i++) { 190 struct Qdisc *child = q->queues[i]; 191 192 qdisc_tree_reduce_backlog(child, child->q.qlen, 193 child->qstats.backlog); 194 qdisc_destroy(child); 195 } 196 197 for (i = oldbands; i < q->bands; i++) { 198 q->queues[i] = queues[i]; 199 if (q->queues[i] != &noop_qdisc) 200 qdisc_hash_add(q->queues[i], true); 201 } 202 203 sch_tree_unlock(sch); 204 return 0; 205 } 206 207 static int prio_init(struct Qdisc *sch, struct nlattr *opt) 208 { 209 struct prio_sched_data *q = qdisc_priv(sch); 210 int err; 211 212 if (!opt) 213 return -EINVAL; 214 215 err = tcf_block_get(&q->block, &q->filter_list); 216 if (err) 217 return err; 218 219 return prio_tune(sch, opt); 220 } 221 222 static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) 223 { 224 struct prio_sched_data *q = qdisc_priv(sch); 225 unsigned char *b = skb_tail_pointer(skb); 226 struct tc_prio_qopt opt; 227 228 opt.bands = q->bands; 229 memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1); 230 231 if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) 232 goto nla_put_failure; 233 234 return skb->len; 235 236 nla_put_failure: 237 nlmsg_trim(skb, b); 238 return -1; 239 } 240 241 static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 242 struct Qdisc **old) 243 { 244 struct prio_sched_data *q = qdisc_priv(sch); 245 unsigned long band = arg - 1; 246 247 if (new == NULL) 248 new = &noop_qdisc; 249 250 *old = qdisc_replace(sch, new, &q->queues[band]); 251 return 0; 252 } 253 254 static struct Qdisc * 255 prio_leaf(struct Qdisc *sch, unsigned long arg) 256 { 257 struct prio_sched_data *q = qdisc_priv(sch); 258 unsigned long band = arg - 1; 259 260 return q->queues[band]; 261 } 262 263 static unsigned long prio_find(struct Qdisc *sch, u32 classid) 264 { 265 struct prio_sched_data *q = qdisc_priv(sch); 266 unsigned long band = TC_H_MIN(classid); 267 268 if (band - 1 >= q->bands) 269 return 0; 270 return band; 271 } 272 273 static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid) 274 { 275 return prio_find(sch, classid); 276 } 277 278 279 static void prio_unbind(struct Qdisc *q, unsigned long cl) 280 { 281 } 282 283 static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, 284 struct tcmsg *tcm) 285 { 286 struct prio_sched_data *q = qdisc_priv(sch); 287 288 tcm->tcm_handle |= TC_H_MIN(cl); 289 tcm->tcm_info = q->queues[cl-1]->handle; 290 return 0; 291 } 292 293 static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, 294 struct gnet_dump *d) 295 { 296 struct prio_sched_data *q = qdisc_priv(sch); 297 struct Qdisc *cl_q; 298 299 cl_q = q->queues[cl - 1]; 300 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), 301 d, NULL, &cl_q->bstats) < 0 || 302 gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) 303 return -1; 304 305 return 0; 306 } 307 308 static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) 309 { 310 struct prio_sched_data *q = qdisc_priv(sch); 311 int prio; 312 313 if (arg->stop) 314 return; 315 316 for (prio = 0; prio < q->bands; prio++) { 317 if (arg->count < arg->skip) { 318 arg->count++; 319 continue; 320 } 321 if (arg->fn(sch, prio + 1, arg) < 0) { 322 arg->stop = 1; 323 break; 324 } 325 arg->count++; 326 } 327 } 328 329 static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl) 330 { 331 struct prio_sched_data *q = qdisc_priv(sch); 332 333 if (cl) 334 return NULL; 335 return q->block; 336 } 337 338 static const struct Qdisc_class_ops prio_class_ops = { 339 .graft = prio_graft, 340 .leaf = prio_leaf, 341 .find = prio_find, 342 .walk = prio_walk, 343 .tcf_block = prio_tcf_block, 344 .bind_tcf = prio_bind, 345 .unbind_tcf = prio_unbind, 346 .dump = prio_dump_class, 347 .dump_stats = prio_dump_class_stats, 348 }; 349 350 static struct Qdisc_ops prio_qdisc_ops __read_mostly = { 351 .next = NULL, 352 .cl_ops = &prio_class_ops, 353 .id = "prio", 354 .priv_size = sizeof(struct prio_sched_data), 355 .enqueue = prio_enqueue, 356 .dequeue = prio_dequeue, 357 .peek = prio_peek, 358 .init = prio_init, 359 .reset = prio_reset, 360 .destroy = prio_destroy, 361 .change = prio_tune, 362 .dump = prio_dump, 363 .owner = THIS_MODULE, 364 }; 365 366 static int __init prio_module_init(void) 367 { 368 return register_qdisc(&prio_qdisc_ops); 369 } 370 371 static void __exit prio_module_exit(void) 372 { 373 unregister_qdisc(&prio_qdisc_ops); 374 } 375 376 module_init(prio_module_init) 377 module_exit(prio_module_exit) 378 379 MODULE_LICENSE("GPL"); 380