1 /* 2 * net/sched/cls_api.c Packet classifier API. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * 11 * Changes: 12 * 13 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support 14 * 15 */ 16 17 #include <linux/module.h> 18 #include <linux/types.h> 19 #include <linux/kernel.h> 20 #include <linux/string.h> 21 #include <linux/errno.h> 22 #include <linux/skbuff.h> 23 #include <linux/init.h> 24 #include <linux/kmod.h> 25 #include <linux/netlink.h> 26 #include <linux/err.h> 27 #include <net/net_namespace.h> 28 #include <net/sock.h> 29 #include <net/netlink.h> 30 #include <net/pkt_sched.h> 31 #include <net/pkt_cls.h> 32 33 /* The list of all installed classifier types */ 34 35 static struct tcf_proto_ops *tcf_proto_base __read_mostly; 36 37 /* Protects list of registered TC modules. It is pure SMP lock. */ 38 static DEFINE_RWLOCK(cls_mod_lock); 39 40 /* Find classifier type by string name */ 41 42 static struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind) 43 { 44 struct tcf_proto_ops *t = NULL; 45 46 if (kind) { 47 read_lock(&cls_mod_lock); 48 for (t = tcf_proto_base; t; t = t->next) { 49 if (nla_strcmp(kind, t->kind) == 0) { 50 if (!try_module_get(t->owner)) 51 t = NULL; 52 break; 53 } 54 } 55 read_unlock(&cls_mod_lock); 56 } 57 return t; 58 } 59 60 /* Register(unregister) new classifier type */ 61 62 int register_tcf_proto_ops(struct tcf_proto_ops *ops) 63 { 64 struct tcf_proto_ops *t, **tp; 65 int rc = -EEXIST; 66 67 write_lock(&cls_mod_lock); 68 for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next) 69 if (!strcmp(ops->kind, t->kind)) 70 goto out; 71 72 ops->next = NULL; 73 *tp = ops; 74 rc = 0; 75 out: 76 write_unlock(&cls_mod_lock); 77 return rc; 78 } 79 EXPORT_SYMBOL(register_tcf_proto_ops); 80 81 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) 82 { 83 struct tcf_proto_ops *t, **tp; 84 int rc = -ENOENT; 85 86 write_lock(&cls_mod_lock); 87 for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next) 88 if (t == ops) 89 break; 90 91 if (!t) 92 goto out; 93 *tp = t->next; 94 rc = 0; 95 out: 96 write_unlock(&cls_mod_lock); 97 return rc; 98 } 99 EXPORT_SYMBOL(unregister_tcf_proto_ops); 100 101 static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, 102 struct tcf_proto *tp, unsigned long fh, int event); 103 104 105 /* Select new prio value from the range, managed by kernel. */ 106 107 static inline u32 tcf_auto_prio(struct tcf_proto *tp) 108 { 109 u32 first = TC_H_MAKE(0xC0000000U, 0U); 110 111 if (tp) 112 first = tp->prio-1; 113 114 return first; 115 } 116 117 /* Add/change/delete/get a filter node */ 118 119 static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 120 { 121 struct net *net = skb->sk->sk_net; 122 struct nlattr *tca[TCA_MAX + 1]; 123 struct tcmsg *t; 124 u32 protocol; 125 u32 prio; 126 u32 nprio; 127 u32 parent; 128 struct net_device *dev; 129 struct Qdisc *q; 130 struct tcf_proto **back, **chain; 131 struct tcf_proto *tp; 132 struct tcf_proto_ops *tp_ops; 133 const struct Qdisc_class_ops *cops; 134 unsigned long cl; 135 unsigned long fh; 136 int err; 137 138 if (net != &init_net) 139 return -EINVAL; 140 141 replay: 142 t = NLMSG_DATA(n); 143 protocol = TC_H_MIN(t->tcm_info); 144 prio = TC_H_MAJ(t->tcm_info); 145 nprio = prio; 146 parent = t->tcm_parent; 147 cl = 0; 148 149 if (prio == 0) { 150 /* If no priority is given, user wants we allocated it. */ 151 if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) 152 return -ENOENT; 153 prio = TC_H_MAKE(0x80000000U, 0U); 154 } 155 156 /* Find head of filter chain. */ 157 158 /* Find link */ 159 dev = __dev_get_by_index(&init_net, t->tcm_ifindex); 160 if (dev == NULL) 161 return -ENODEV; 162 163 err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); 164 if (err < 0) 165 return err; 166 167 /* Find qdisc */ 168 if (!parent) { 169 q = dev->qdisc_sleeping; 170 parent = q->handle; 171 } else { 172 q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent)); 173 if (q == NULL) 174 return -EINVAL; 175 } 176 177 /* Is it classful? */ 178 if ((cops = q->ops->cl_ops) == NULL) 179 return -EINVAL; 180 181 /* Do we search for filter, attached to class? */ 182 if (TC_H_MIN(parent)) { 183 cl = cops->get(q, parent); 184 if (cl == 0) 185 return -ENOENT; 186 } 187 188 /* And the last stroke */ 189 chain = cops->tcf_chain(q, cl); 190 err = -EINVAL; 191 if (chain == NULL) 192 goto errout; 193 194 /* Check the chain for existence of proto-tcf with this priority */ 195 for (back = chain; (tp=*back) != NULL; back = &tp->next) { 196 if (tp->prio >= prio) { 197 if (tp->prio == prio) { 198 if (!nprio || (tp->protocol != protocol && protocol)) 199 goto errout; 200 } else 201 tp = NULL; 202 break; 203 } 204 } 205 206 if (tp == NULL) { 207 /* Proto-tcf does not exist, create new one */ 208 209 if (tca[TCA_KIND] == NULL || !protocol) 210 goto errout; 211 212 err = -ENOENT; 213 if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) 214 goto errout; 215 216 217 /* Create new proto tcf */ 218 219 err = -ENOBUFS; 220 tp = kzalloc(sizeof(*tp), GFP_KERNEL); 221 if (tp == NULL) 222 goto errout; 223 err = -EINVAL; 224 tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]); 225 if (tp_ops == NULL) { 226 #ifdef CONFIG_KMOD 227 struct nlattr *kind = tca[TCA_KIND]; 228 char name[IFNAMSIZ]; 229 230 if (kind != NULL && 231 nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { 232 rtnl_unlock(); 233 request_module("cls_%s", name); 234 rtnl_lock(); 235 tp_ops = tcf_proto_lookup_ops(kind); 236 /* We dropped the RTNL semaphore in order to 237 * perform the module load. So, even if we 238 * succeeded in loading the module we have to 239 * replay the request. We indicate this using 240 * -EAGAIN. 241 */ 242 if (tp_ops != NULL) { 243 module_put(tp_ops->owner); 244 err = -EAGAIN; 245 } 246 } 247 #endif 248 kfree(tp); 249 goto errout; 250 } 251 tp->ops = tp_ops; 252 tp->protocol = protocol; 253 tp->prio = nprio ? : tcf_auto_prio(*back); 254 tp->q = q; 255 tp->classify = tp_ops->classify; 256 tp->classid = parent; 257 258 err = tp_ops->init(tp); 259 if (err != 0) { 260 module_put(tp_ops->owner); 261 kfree(tp); 262 goto errout; 263 } 264 265 qdisc_lock_tree(dev); 266 tp->next = *back; 267 *back = tp; 268 qdisc_unlock_tree(dev); 269 270 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) 271 goto errout; 272 273 fh = tp->ops->get(tp, t->tcm_handle); 274 275 if (fh == 0) { 276 if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { 277 qdisc_lock_tree(dev); 278 *back = tp->next; 279 qdisc_unlock_tree(dev); 280 281 tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER); 282 tcf_destroy(tp); 283 err = 0; 284 goto errout; 285 } 286 287 err = -ENOENT; 288 if (n->nlmsg_type != RTM_NEWTFILTER || 289 !(n->nlmsg_flags & NLM_F_CREATE)) 290 goto errout; 291 } else { 292 switch (n->nlmsg_type) { 293 case RTM_NEWTFILTER: 294 err = -EEXIST; 295 if (n->nlmsg_flags & NLM_F_EXCL) 296 goto errout; 297 break; 298 case RTM_DELTFILTER: 299 err = tp->ops->delete(tp, fh); 300 if (err == 0) 301 tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER); 302 goto errout; 303 case RTM_GETTFILTER: 304 err = tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER); 305 goto errout; 306 default: 307 err = -EINVAL; 308 goto errout; 309 } 310 } 311 312 err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh); 313 if (err == 0) 314 tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER); 315 316 errout: 317 if (cl) 318 cops->put(q, cl); 319 if (err == -EAGAIN) 320 /* Replay the request. */ 321 goto replay; 322 return err; 323 } 324 325 static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, 326 unsigned long fh, u32 pid, u32 seq, u16 flags, int event) 327 { 328 struct tcmsg *tcm; 329 struct nlmsghdr *nlh; 330 unsigned char *b = skb_tail_pointer(skb); 331 332 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); 333 tcm = NLMSG_DATA(nlh); 334 tcm->tcm_family = AF_UNSPEC; 335 tcm->tcm__pad1 = 0; 336 tcm->tcm__pad1 = 0; 337 tcm->tcm_ifindex = tp->q->dev->ifindex; 338 tcm->tcm_parent = tp->classid; 339 tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); 340 NLA_PUT_STRING(skb, TCA_KIND, tp->ops->kind); 341 tcm->tcm_handle = fh; 342 if (RTM_DELTFILTER != event) { 343 tcm->tcm_handle = 0; 344 if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0) 345 goto nla_put_failure; 346 } 347 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 348 return skb->len; 349 350 nlmsg_failure: 351 nla_put_failure: 352 nlmsg_trim(skb, b); 353 return -1; 354 } 355 356 static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, 357 struct tcf_proto *tp, unsigned long fh, int event) 358 { 359 struct sk_buff *skb; 360 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; 361 362 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 363 if (!skb) 364 return -ENOBUFS; 365 366 if (tcf_fill_node(skb, tp, fh, pid, n->nlmsg_seq, 0, event) <= 0) { 367 kfree_skb(skb); 368 return -EINVAL; 369 } 370 371 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, 372 n->nlmsg_flags & NLM_F_ECHO); 373 } 374 375 struct tcf_dump_args { 376 struct tcf_walker w; 377 struct sk_buff *skb; 378 struct netlink_callback *cb; 379 }; 380 381 static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, 382 struct tcf_walker *arg) 383 { 384 struct tcf_dump_args *a = (void *)arg; 385 386 return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid, 387 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); 388 } 389 390 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) 391 { 392 struct net *net = skb->sk->sk_net; 393 int t; 394 int s_t; 395 struct net_device *dev; 396 struct Qdisc *q; 397 struct tcf_proto *tp, **chain; 398 struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh); 399 unsigned long cl = 0; 400 const struct Qdisc_class_ops *cops; 401 struct tcf_dump_args arg; 402 403 if (net != &init_net) 404 return 0; 405 406 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 407 return skb->len; 408 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) 409 return skb->len; 410 411 if (!tcm->tcm_parent) 412 q = dev->qdisc_sleeping; 413 else 414 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); 415 if (!q) 416 goto out; 417 if ((cops = q->ops->cl_ops) == NULL) 418 goto errout; 419 if (TC_H_MIN(tcm->tcm_parent)) { 420 cl = cops->get(q, tcm->tcm_parent); 421 if (cl == 0) 422 goto errout; 423 } 424 chain = cops->tcf_chain(q, cl); 425 if (chain == NULL) 426 goto errout; 427 428 s_t = cb->args[0]; 429 430 for (tp=*chain, t=0; tp; tp = tp->next, t++) { 431 if (t < s_t) continue; 432 if (TC_H_MAJ(tcm->tcm_info) && 433 TC_H_MAJ(tcm->tcm_info) != tp->prio) 434 continue; 435 if (TC_H_MIN(tcm->tcm_info) && 436 TC_H_MIN(tcm->tcm_info) != tp->protocol) 437 continue; 438 if (t > s_t) 439 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); 440 if (cb->args[1] == 0) { 441 if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid, 442 cb->nlh->nlmsg_seq, NLM_F_MULTI, 443 RTM_NEWTFILTER) <= 0) 444 break; 445 446 cb->args[1] = 1; 447 } 448 if (tp->ops->walk == NULL) 449 continue; 450 arg.w.fn = tcf_node_dump; 451 arg.skb = skb; 452 arg.cb = cb; 453 arg.w.stop = 0; 454 arg.w.skip = cb->args[1]-1; 455 arg.w.count = 0; 456 tp->ops->walk(tp, &arg.w); 457 cb->args[1] = arg.w.count+1; 458 if (arg.w.stop) 459 break; 460 } 461 462 cb->args[0] = t; 463 464 errout: 465 if (cl) 466 cops->put(q, cl); 467 out: 468 dev_put(dev); 469 return skb->len; 470 } 471 472 void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts) 473 { 474 #ifdef CONFIG_NET_CLS_ACT 475 if (exts->action) { 476 tcf_action_destroy(exts->action, TCA_ACT_UNBIND); 477 exts->action = NULL; 478 } 479 #endif 480 } 481 EXPORT_SYMBOL(tcf_exts_destroy); 482 483 int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb, 484 struct nlattr *rate_tlv, struct tcf_exts *exts, 485 const struct tcf_ext_map *map) 486 { 487 memset(exts, 0, sizeof(*exts)); 488 489 #ifdef CONFIG_NET_CLS_ACT 490 { 491 struct tc_action *act; 492 493 if (map->police && tb[map->police]) { 494 act = tcf_action_init_1(tb[map->police], rate_tlv, 495 "police", TCA_ACT_NOREPLACE, 496 TCA_ACT_BIND); 497 if (IS_ERR(act)) 498 return PTR_ERR(act); 499 500 act->type = TCA_OLD_COMPAT; 501 exts->action = act; 502 } else if (map->action && tb[map->action]) { 503 act = tcf_action_init(tb[map->action], rate_tlv, NULL, 504 TCA_ACT_NOREPLACE, TCA_ACT_BIND); 505 if (IS_ERR(act)) 506 return PTR_ERR(act); 507 508 exts->action = act; 509 } 510 } 511 #else 512 if ((map->action && tb[map->action]) || 513 (map->police && tb[map->police])) 514 return -EOPNOTSUPP; 515 #endif 516 517 return 0; 518 } 519 EXPORT_SYMBOL(tcf_exts_validate); 520 521 void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, 522 struct tcf_exts *src) 523 { 524 #ifdef CONFIG_NET_CLS_ACT 525 if (src->action) { 526 struct tc_action *act; 527 tcf_tree_lock(tp); 528 act = xchg(&dst->action, src->action); 529 tcf_tree_unlock(tp); 530 if (act) 531 tcf_action_destroy(act, TCA_ACT_UNBIND); 532 } 533 #endif 534 } 535 EXPORT_SYMBOL(tcf_exts_change); 536 537 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts, 538 const struct tcf_ext_map *map) 539 { 540 #ifdef CONFIG_NET_CLS_ACT 541 if (map->action && exts->action) { 542 /* 543 * again for backward compatible mode - we want 544 * to work with both old and new modes of entering 545 * tc data even if iproute2 was newer - jhs 546 */ 547 struct nlattr *nest; 548 549 if (exts->action->type != TCA_OLD_COMPAT) { 550 nest = nla_nest_start(skb, map->action); 551 if (nest == NULL) 552 goto nla_put_failure; 553 if (tcf_action_dump(skb, exts->action, 0, 0) < 0) 554 goto nla_put_failure; 555 nla_nest_end(skb, nest); 556 } else if (map->police) { 557 nest = nla_nest_start(skb, map->police); 558 if (nest == NULL) 559 goto nla_put_failure; 560 if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0) 561 goto nla_put_failure; 562 nla_nest_end(skb, nest); 563 } 564 } 565 #endif 566 return 0; 567 nla_put_failure: __attribute__ ((unused)) 568 return -1; 569 } 570 EXPORT_SYMBOL(tcf_exts_dump); 571 572 573 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts, 574 const struct tcf_ext_map *map) 575 { 576 #ifdef CONFIG_NET_CLS_ACT 577 if (exts->action) 578 if (tcf_action_copy_stats(skb, exts->action, 1) < 0) 579 goto nla_put_failure; 580 #endif 581 return 0; 582 nla_put_failure: __attribute__ ((unused)) 583 return -1; 584 } 585 EXPORT_SYMBOL(tcf_exts_dump_stats); 586 587 static int __init tc_filter_init(void) 588 { 589 rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL); 590 rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL); 591 rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter, 592 tc_dump_tfilter); 593 594 return 0; 595 } 596 597 subsys_initcall(tc_filter_init); 598