1 /* 2 * Berkeley Packet Filter based traffic classifier 3 * 4 * Might be used to classify traffic through flexible, user-defined and 5 * possibly JIT-ed BPF filters for traffic control as an alternative to 6 * ematches. 7 * 8 * (C) 2013 Daniel Borkmann <dborkman@redhat.com> 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License version 2 as 12 * published by the Free Software Foundation. 13 */ 14 15 #include <linux/module.h> 16 #include <linux/types.h> 17 #include <linux/skbuff.h> 18 #include <linux/filter.h> 19 #include <linux/bpf.h> 20 21 #include <net/rtnetlink.h> 22 #include <net/pkt_cls.h> 23 #include <net/sock.h> 24 25 MODULE_LICENSE("GPL"); 26 MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); 27 MODULE_DESCRIPTION("TC BPF based classifier"); 28 29 #define CLS_BPF_NAME_LEN 256 30 31 struct cls_bpf_head { 32 struct list_head plist; 33 u32 hgen; 34 struct rcu_head rcu; 35 }; 36 37 struct cls_bpf_prog { 38 struct bpf_prog *filter; 39 struct list_head link; 40 struct tcf_result res; 41 bool exts_integrated; 42 struct tcf_exts exts; 43 u32 handle; 44 union { 45 u32 bpf_fd; 46 u16 bpf_num_ops; 47 }; 48 struct sock_filter *bpf_ops; 49 const char *bpf_name; 50 struct tcf_proto *tp; 51 struct rcu_head rcu; 52 }; 53 54 static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { 55 [TCA_BPF_CLASSID] = { .type = NLA_U32 }, 56 [TCA_BPF_FLAGS] = { .type = NLA_U32 }, 57 [TCA_BPF_FD] = { .type = NLA_U32 }, 58 [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN }, 59 [TCA_BPF_OPS_LEN] = { .type = NLA_U16 }, 60 [TCA_BPF_OPS] = { .type = NLA_BINARY, 61 .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, 62 }; 63 64 static int cls_bpf_exec_opcode(int code) 65 { 66 switch (code) { 67 case TC_ACT_OK: 68 case TC_ACT_RECLASSIFY: 69 case TC_ACT_SHOT: 70 case TC_ACT_PIPE: 71 case TC_ACT_STOLEN: 72 case TC_ACT_QUEUED: 73 case TC_ACT_REDIRECT: 74 case TC_ACT_UNSPEC: 75 return code; 76 default: 77 return TC_ACT_UNSPEC; 78 } 79 } 80 81 static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, 82 struct tcf_result *res) 83 { 84 struct cls_bpf_head *head = rcu_dereference_bh(tp->root); 85 struct cls_bpf_prog *prog; 86 #ifdef CONFIG_NET_CLS_ACT 87 bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; 88 #else 89 bool at_ingress = false; 90 #endif 91 int ret = -1; 92 93 if (unlikely(!skb_mac_header_was_set(skb))) 94 return -1; 95 96 /* Needed here for accessing maps. */ 97 rcu_read_lock(); 98 list_for_each_entry_rcu(prog, &head->plist, link) { 99 int filter_res; 100 101 qdisc_skb_cb(skb)->tc_classid = prog->res.classid; 102 103 if (at_ingress) { 104 /* It is safe to push/pull even if skb_shared() */ 105 __skb_push(skb, skb->mac_len); 106 filter_res = BPF_PROG_RUN(prog->filter, skb); 107 __skb_pull(skb, skb->mac_len); 108 } else { 109 filter_res = BPF_PROG_RUN(prog->filter, skb); 110 } 111 112 if (prog->exts_integrated) { 113 res->class = prog->res.class; 114 res->classid = qdisc_skb_cb(skb)->tc_classid; 115 116 ret = cls_bpf_exec_opcode(filter_res); 117 if (ret == TC_ACT_UNSPEC) 118 continue; 119 break; 120 } 121 122 if (filter_res == 0) 123 continue; 124 125 *res = prog->res; 126 if (filter_res != -1) 127 res->classid = filter_res; 128 129 ret = tcf_exts_exec(skb, &prog->exts, res); 130 if (ret < 0) 131 continue; 132 133 break; 134 } 135 rcu_read_unlock(); 136 137 return ret; 138 } 139 140 static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) 141 { 142 return !prog->bpf_ops; 143 } 144 145 static int cls_bpf_init(struct tcf_proto *tp) 146 { 147 struct cls_bpf_head *head; 148 149 head = kzalloc(sizeof(*head), GFP_KERNEL); 150 if (head == NULL) 151 return -ENOBUFS; 152 153 INIT_LIST_HEAD_RCU(&head->plist); 154 rcu_assign_pointer(tp->root, head); 155 156 return 0; 157 } 158 159 static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog) 160 { 161 tcf_exts_destroy(&prog->exts); 162 163 if (cls_bpf_is_ebpf(prog)) 164 bpf_prog_put(prog->filter); 165 else 166 bpf_prog_destroy(prog->filter); 167 168 kfree(prog->bpf_name); 169 kfree(prog->bpf_ops); 170 kfree(prog); 171 } 172 173 static void __cls_bpf_delete_prog(struct rcu_head *rcu) 174 { 175 struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu); 176 177 cls_bpf_delete_prog(prog->tp, prog); 178 } 179 180 static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) 181 { 182 struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg; 183 184 list_del_rcu(&prog->link); 185 tcf_unbind_filter(tp, &prog->res); 186 call_rcu(&prog->rcu, __cls_bpf_delete_prog); 187 188 return 0; 189 } 190 191 static bool cls_bpf_destroy(struct tcf_proto *tp, bool force) 192 { 193 struct cls_bpf_head *head = rtnl_dereference(tp->root); 194 struct cls_bpf_prog *prog, *tmp; 195 196 if (!force && !list_empty(&head->plist)) 197 return false; 198 199 list_for_each_entry_safe(prog, tmp, &head->plist, link) { 200 list_del_rcu(&prog->link); 201 tcf_unbind_filter(tp, &prog->res); 202 call_rcu(&prog->rcu, __cls_bpf_delete_prog); 203 } 204 205 RCU_INIT_POINTER(tp->root, NULL); 206 kfree_rcu(head, rcu); 207 return true; 208 } 209 210 static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) 211 { 212 struct cls_bpf_head *head = rtnl_dereference(tp->root); 213 struct cls_bpf_prog *prog; 214 unsigned long ret = 0UL; 215 216 if (head == NULL) 217 return 0UL; 218 219 list_for_each_entry(prog, &head->plist, link) { 220 if (prog->handle == handle) { 221 ret = (unsigned long) prog; 222 break; 223 } 224 } 225 226 return ret; 227 } 228 229 static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog) 230 { 231 struct sock_filter *bpf_ops; 232 struct sock_fprog_kern fprog_tmp; 233 struct bpf_prog *fp; 234 u16 bpf_size, bpf_num_ops; 235 int ret; 236 237 bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]); 238 if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) 239 return -EINVAL; 240 241 bpf_size = bpf_num_ops * sizeof(*bpf_ops); 242 if (bpf_size != nla_len(tb[TCA_BPF_OPS])) 243 return -EINVAL; 244 245 bpf_ops = kzalloc(bpf_size, GFP_KERNEL); 246 if (bpf_ops == NULL) 247 return -ENOMEM; 248 249 memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); 250 251 fprog_tmp.len = bpf_num_ops; 252 fprog_tmp.filter = bpf_ops; 253 254 ret = bpf_prog_create(&fp, &fprog_tmp); 255 if (ret < 0) { 256 kfree(bpf_ops); 257 return ret; 258 } 259 260 prog->bpf_ops = bpf_ops; 261 prog->bpf_num_ops = bpf_num_ops; 262 prog->bpf_name = NULL; 263 prog->filter = fp; 264 265 return 0; 266 } 267 268 static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, 269 const struct tcf_proto *tp) 270 { 271 struct bpf_prog *fp; 272 char *name = NULL; 273 u32 bpf_fd; 274 275 bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); 276 277 fp = bpf_prog_get(bpf_fd); 278 if (IS_ERR(fp)) 279 return PTR_ERR(fp); 280 281 if (fp->type != BPF_PROG_TYPE_SCHED_CLS) { 282 bpf_prog_put(fp); 283 return -EINVAL; 284 } 285 286 if (tb[TCA_BPF_NAME]) { 287 name = kmemdup(nla_data(tb[TCA_BPF_NAME]), 288 nla_len(tb[TCA_BPF_NAME]), 289 GFP_KERNEL); 290 if (!name) { 291 bpf_prog_put(fp); 292 return -ENOMEM; 293 } 294 } 295 296 prog->bpf_ops = NULL; 297 prog->bpf_fd = bpf_fd; 298 prog->bpf_name = name; 299 prog->filter = fp; 300 301 return 0; 302 } 303 304 static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, 305 struct cls_bpf_prog *prog, 306 unsigned long base, struct nlattr **tb, 307 struct nlattr *est, bool ovr) 308 { 309 bool is_bpf, is_ebpf, have_exts = false; 310 struct tcf_exts exts; 311 u32 classid; 312 int ret; 313 314 is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS]; 315 is_ebpf = tb[TCA_BPF_FD]; 316 317 if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) || 318 !tb[TCA_BPF_CLASSID]) 319 return -EINVAL; 320 321 tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); 322 ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); 323 if (ret < 0) 324 return ret; 325 326 classid = nla_get_u32(tb[TCA_BPF_CLASSID]); 327 if (tb[TCA_BPF_FLAGS]) { 328 u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]); 329 330 if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) { 331 tcf_exts_destroy(&exts); 332 return -EINVAL; 333 } 334 335 have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT; 336 } 337 338 prog->res.classid = classid; 339 prog->exts_integrated = have_exts; 340 341 ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : 342 cls_bpf_prog_from_efd(tb, prog, tp); 343 if (ret < 0) { 344 tcf_exts_destroy(&exts); 345 return ret; 346 } 347 348 tcf_bind_filter(tp, &prog->res, base); 349 tcf_exts_change(tp, &prog->exts, &exts); 350 351 return 0; 352 } 353 354 static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, 355 struct cls_bpf_head *head) 356 { 357 unsigned int i = 0x80000000; 358 u32 handle; 359 360 do { 361 if (++head->hgen == 0x7FFFFFFF) 362 head->hgen = 1; 363 } while (--i > 0 && cls_bpf_get(tp, head->hgen)); 364 365 if (unlikely(i == 0)) { 366 pr_err("Insufficient number of handles\n"); 367 handle = 0; 368 } else { 369 handle = head->hgen; 370 } 371 372 return handle; 373 } 374 375 static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, 376 struct tcf_proto *tp, unsigned long base, 377 u32 handle, struct nlattr **tca, 378 unsigned long *arg, bool ovr) 379 { 380 struct cls_bpf_head *head = rtnl_dereference(tp->root); 381 struct cls_bpf_prog *oldprog = (struct cls_bpf_prog *) *arg; 382 struct nlattr *tb[TCA_BPF_MAX + 1]; 383 struct cls_bpf_prog *prog; 384 int ret; 385 386 if (tca[TCA_OPTIONS] == NULL) 387 return -EINVAL; 388 389 ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy); 390 if (ret < 0) 391 return ret; 392 393 prog = kzalloc(sizeof(*prog), GFP_KERNEL); 394 if (!prog) 395 return -ENOBUFS; 396 397 tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE); 398 399 if (oldprog) { 400 if (handle && oldprog->handle != handle) { 401 ret = -EINVAL; 402 goto errout; 403 } 404 } 405 406 if (handle == 0) 407 prog->handle = cls_bpf_grab_new_handle(tp, head); 408 else 409 prog->handle = handle; 410 if (prog->handle == 0) { 411 ret = -EINVAL; 412 goto errout; 413 } 414 415 ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr); 416 if (ret < 0) 417 goto errout; 418 419 if (oldprog) { 420 list_replace_rcu(&oldprog->link, &prog->link); 421 tcf_unbind_filter(tp, &oldprog->res); 422 call_rcu(&oldprog->rcu, __cls_bpf_delete_prog); 423 } else { 424 list_add_rcu(&prog->link, &head->plist); 425 } 426 427 *arg = (unsigned long) prog; 428 return 0; 429 errout: 430 kfree(prog); 431 432 return ret; 433 } 434 435 static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog, 436 struct sk_buff *skb) 437 { 438 struct nlattr *nla; 439 440 if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops)) 441 return -EMSGSIZE; 442 443 nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops * 444 sizeof(struct sock_filter)); 445 if (nla == NULL) 446 return -EMSGSIZE; 447 448 memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); 449 450 return 0; 451 } 452 453 static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, 454 struct sk_buff *skb) 455 { 456 if (nla_put_u32(skb, TCA_BPF_FD, prog->bpf_fd)) 457 return -EMSGSIZE; 458 459 if (prog->bpf_name && 460 nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name)) 461 return -EMSGSIZE; 462 463 return 0; 464 } 465 466 static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, 467 struct sk_buff *skb, struct tcmsg *tm) 468 { 469 struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh; 470 struct nlattr *nest; 471 int ret; 472 473 if (prog == NULL) 474 return skb->len; 475 476 tm->tcm_handle = prog->handle; 477 478 nest = nla_nest_start(skb, TCA_OPTIONS); 479 if (nest == NULL) 480 goto nla_put_failure; 481 482 if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid)) 483 goto nla_put_failure; 484 485 if (cls_bpf_is_ebpf(prog)) 486 ret = cls_bpf_dump_ebpf_info(prog, skb); 487 else 488 ret = cls_bpf_dump_bpf_info(prog, skb); 489 if (ret) 490 goto nla_put_failure; 491 492 if (tcf_exts_dump(skb, &prog->exts) < 0) 493 goto nla_put_failure; 494 495 nla_nest_end(skb, nest); 496 497 if (tcf_exts_dump_stats(skb, &prog->exts) < 0) 498 goto nla_put_failure; 499 500 return skb->len; 501 502 nla_put_failure: 503 nla_nest_cancel(skb, nest); 504 return -1; 505 } 506 507 static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) 508 { 509 struct cls_bpf_head *head = rtnl_dereference(tp->root); 510 struct cls_bpf_prog *prog; 511 512 list_for_each_entry(prog, &head->plist, link) { 513 if (arg->count < arg->skip) 514 goto skip; 515 if (arg->fn(tp, (unsigned long) prog, arg) < 0) { 516 arg->stop = 1; 517 break; 518 } 519 skip: 520 arg->count++; 521 } 522 } 523 524 static struct tcf_proto_ops cls_bpf_ops __read_mostly = { 525 .kind = "bpf", 526 .owner = THIS_MODULE, 527 .classify = cls_bpf_classify, 528 .init = cls_bpf_init, 529 .destroy = cls_bpf_destroy, 530 .get = cls_bpf_get, 531 .change = cls_bpf_change, 532 .delete = cls_bpf_delete, 533 .walk = cls_bpf_walk, 534 .dump = cls_bpf_dump, 535 }; 536 537 static int __init cls_bpf_init_mod(void) 538 { 539 return register_tcf_proto_ops(&cls_bpf_ops); 540 } 541 542 static void __exit cls_bpf_exit_mod(void) 543 { 544 unregister_tcf_proto_ops(&cls_bpf_ops); 545 } 546 547 module_init(cls_bpf_init_mod); 548 module_exit(cls_bpf_exit_mod); 549