1 /* 2 * Berkeley Packet Filter based traffic classifier 3 * 4 * Might be used to classify traffic through flexible, user-defined and 5 * possibly JIT-ed BPF filters for traffic control as an alternative to 6 * ematches. 7 * 8 * (C) 2013 Daniel Borkmann <dborkman@redhat.com> 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License version 2 as 12 * published by the Free Software Foundation. 13 */ 14 15 #include <linux/module.h> 16 #include <linux/types.h> 17 #include <linux/skbuff.h> 18 #include <linux/filter.h> 19 #include <net/rtnetlink.h> 20 #include <net/pkt_cls.h> 21 #include <net/sock.h> 22 23 MODULE_LICENSE("GPL"); 24 MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); 25 MODULE_DESCRIPTION("TC BPF based classifier"); 26 27 struct cls_bpf_head { 28 struct list_head plist; 29 u32 hgen; 30 }; 31 32 struct cls_bpf_prog { 33 struct sk_filter *filter; 34 struct sock_filter *bpf_ops; 35 struct tcf_exts exts; 36 struct tcf_result res; 37 struct list_head link; 38 u32 handle; 39 u16 bpf_len; 40 }; 41 42 static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { 43 [TCA_BPF_CLASSID] = { .type = NLA_U32 }, 44 [TCA_BPF_OPS_LEN] = { .type = NLA_U16 }, 45 [TCA_BPF_OPS] = { .type = NLA_BINARY, 46 .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, 47 }; 48 49 static const struct tcf_ext_map bpf_ext_map = { 50 .action = TCA_BPF_ACT, 51 .police = TCA_BPF_POLICE, 52 }; 53 54 static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, 55 struct tcf_result *res) 56 { 57 struct cls_bpf_head *head = tp->root; 58 struct cls_bpf_prog *prog; 59 int ret; 60 61 list_for_each_entry(prog, &head->plist, link) { 62 int filter_res = SK_RUN_FILTER(prog->filter, skb); 63 64 if (filter_res == 0) 65 continue; 66 67 *res = prog->res; 68 if (filter_res != -1) 69 res->classid = filter_res; 70 71 ret = tcf_exts_exec(skb, &prog->exts, res); 72 if (ret < 0) 73 continue; 74 75 return ret; 76 } 77 78 return -1; 79 } 80 81 static int cls_bpf_init(struct tcf_proto *tp) 82 { 83 struct cls_bpf_head *head; 84 85 head = kzalloc(sizeof(*head), GFP_KERNEL); 86 if (head == NULL) 87 return -ENOBUFS; 88 89 INIT_LIST_HEAD(&head->plist); 90 tp->root = head; 91 92 return 0; 93 } 94 95 static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog) 96 { 97 tcf_unbind_filter(tp, &prog->res); 98 tcf_exts_destroy(tp, &prog->exts); 99 100 sk_unattached_filter_destroy(prog->filter); 101 102 kfree(prog->bpf_ops); 103 kfree(prog); 104 } 105 106 static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) 107 { 108 struct cls_bpf_head *head = tp->root; 109 struct cls_bpf_prog *prog, *todel = (struct cls_bpf_prog *) arg; 110 111 list_for_each_entry(prog, &head->plist, link) { 112 if (prog == todel) { 113 tcf_tree_lock(tp); 114 list_del(&prog->link); 115 tcf_tree_unlock(tp); 116 117 cls_bpf_delete_prog(tp, prog); 118 return 0; 119 } 120 } 121 122 return -ENOENT; 123 } 124 125 static void cls_bpf_destroy(struct tcf_proto *tp) 126 { 127 struct cls_bpf_head *head = tp->root; 128 struct cls_bpf_prog *prog, *tmp; 129 130 list_for_each_entry_safe(prog, tmp, &head->plist, link) { 131 list_del(&prog->link); 132 cls_bpf_delete_prog(tp, prog); 133 } 134 135 kfree(head); 136 } 137 138 static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) 139 { 140 struct cls_bpf_head *head = tp->root; 141 struct cls_bpf_prog *prog; 142 unsigned long ret = 0UL; 143 144 if (head == NULL) 145 return 0UL; 146 147 list_for_each_entry(prog, &head->plist, link) { 148 if (prog->handle == handle) { 149 ret = (unsigned long) prog; 150 break; 151 } 152 } 153 154 return ret; 155 } 156 157 static void cls_bpf_put(struct tcf_proto *tp, unsigned long f) 158 { 159 } 160 161 static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, 162 struct cls_bpf_prog *prog, 163 unsigned long base, struct nlattr **tb, 164 struct nlattr *est) 165 { 166 struct sock_filter *bpf_ops, *bpf_old; 167 struct tcf_exts exts; 168 struct sock_fprog tmp; 169 struct sk_filter *fp, *fp_old; 170 u16 bpf_size, bpf_len; 171 u32 classid; 172 int ret; 173 174 if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID]) 175 return -EINVAL; 176 177 ret = tcf_exts_validate(net, tp, tb, est, &exts, &bpf_ext_map); 178 if (ret < 0) 179 return ret; 180 181 classid = nla_get_u32(tb[TCA_BPF_CLASSID]); 182 bpf_len = nla_get_u16(tb[TCA_BPF_OPS_LEN]); 183 if (bpf_len > BPF_MAXINSNS || bpf_len == 0) { 184 ret = -EINVAL; 185 goto errout; 186 } 187 188 bpf_size = bpf_len * sizeof(*bpf_ops); 189 bpf_ops = kzalloc(bpf_size, GFP_KERNEL); 190 if (bpf_ops == NULL) { 191 ret = -ENOMEM; 192 goto errout; 193 } 194 195 memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); 196 197 tmp.len = bpf_len; 198 tmp.filter = (struct sock_filter __user *) bpf_ops; 199 200 ret = sk_unattached_filter_create(&fp, &tmp); 201 if (ret) 202 goto errout_free; 203 204 tcf_tree_lock(tp); 205 fp_old = prog->filter; 206 bpf_old = prog->bpf_ops; 207 208 prog->bpf_len = bpf_len; 209 prog->bpf_ops = bpf_ops; 210 prog->filter = fp; 211 prog->res.classid = classid; 212 tcf_tree_unlock(tp); 213 214 tcf_bind_filter(tp, &prog->res, base); 215 tcf_exts_change(tp, &prog->exts, &exts); 216 217 if (fp_old) 218 sk_unattached_filter_destroy(fp_old); 219 if (bpf_old) 220 kfree(bpf_old); 221 222 return 0; 223 224 errout_free: 225 kfree(bpf_ops); 226 errout: 227 tcf_exts_destroy(tp, &exts); 228 return ret; 229 } 230 231 static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, 232 struct cls_bpf_head *head) 233 { 234 unsigned int i = 0x80000000; 235 236 do { 237 if (++head->hgen == 0x7FFFFFFF) 238 head->hgen = 1; 239 } while (--i > 0 && cls_bpf_get(tp, head->hgen)); 240 if (i == 0) 241 pr_err("Insufficient number of handles\n"); 242 243 return i; 244 } 245 246 static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, 247 struct tcf_proto *tp, unsigned long base, 248 u32 handle, struct nlattr **tca, 249 unsigned long *arg) 250 { 251 struct cls_bpf_head *head = tp->root; 252 struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg; 253 struct nlattr *tb[TCA_BPF_MAX + 1]; 254 int ret; 255 256 if (tca[TCA_OPTIONS] == NULL) 257 return -EINVAL; 258 259 ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy); 260 if (ret < 0) 261 return ret; 262 263 if (prog != NULL) { 264 if (handle && prog->handle != handle) 265 return -EINVAL; 266 return cls_bpf_modify_existing(net, tp, prog, base, tb, 267 tca[TCA_RATE]); 268 } 269 270 prog = kzalloc(sizeof(*prog), GFP_KERNEL); 271 if (prog == NULL) 272 return -ENOBUFS; 273 274 if (handle == 0) 275 prog->handle = cls_bpf_grab_new_handle(tp, head); 276 else 277 prog->handle = handle; 278 if (prog->handle == 0) { 279 ret = -EINVAL; 280 goto errout; 281 } 282 283 ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE]); 284 if (ret < 0) 285 goto errout; 286 287 tcf_tree_lock(tp); 288 list_add(&prog->link, &head->plist); 289 tcf_tree_unlock(tp); 290 291 *arg = (unsigned long) prog; 292 293 return 0; 294 errout: 295 if (*arg == 0UL && prog) 296 kfree(prog); 297 298 return ret; 299 } 300 301 static int cls_bpf_dump(struct tcf_proto *tp, unsigned long fh, 302 struct sk_buff *skb, struct tcmsg *tm) 303 { 304 struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh; 305 struct nlattr *nest, *nla; 306 307 if (prog == NULL) 308 return skb->len; 309 310 tm->tcm_handle = prog->handle; 311 312 nest = nla_nest_start(skb, TCA_OPTIONS); 313 if (nest == NULL) 314 goto nla_put_failure; 315 316 if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid)) 317 goto nla_put_failure; 318 if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_len)) 319 goto nla_put_failure; 320 321 nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_len * 322 sizeof(struct sock_filter)); 323 if (nla == NULL) 324 goto nla_put_failure; 325 326 memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); 327 328 if (tcf_exts_dump(skb, &prog->exts, &bpf_ext_map) < 0) 329 goto nla_put_failure; 330 331 nla_nest_end(skb, nest); 332 333 if (tcf_exts_dump_stats(skb, &prog->exts, &bpf_ext_map) < 0) 334 goto nla_put_failure; 335 336 return skb->len; 337 338 nla_put_failure: 339 nla_nest_cancel(skb, nest); 340 return -1; 341 } 342 343 static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) 344 { 345 struct cls_bpf_head *head = tp->root; 346 struct cls_bpf_prog *prog; 347 348 list_for_each_entry(prog, &head->plist, link) { 349 if (arg->count < arg->skip) 350 goto skip; 351 if (arg->fn(tp, (unsigned long) prog, arg) < 0) { 352 arg->stop = 1; 353 break; 354 } 355 skip: 356 arg->count++; 357 } 358 } 359 360 static struct tcf_proto_ops cls_bpf_ops __read_mostly = { 361 .kind = "bpf", 362 .owner = THIS_MODULE, 363 .classify = cls_bpf_classify, 364 .init = cls_bpf_init, 365 .destroy = cls_bpf_destroy, 366 .get = cls_bpf_get, 367 .put = cls_bpf_put, 368 .change = cls_bpf_change, 369 .delete = cls_bpf_delete, 370 .walk = cls_bpf_walk, 371 .dump = cls_bpf_dump, 372 }; 373 374 static int __init cls_bpf_init_mod(void) 375 { 376 return register_tcf_proto_ops(&cls_bpf_ops); 377 } 378 379 static void __exit cls_bpf_exit_mod(void) 380 { 381 unregister_tcf_proto_ops(&cls_bpf_ops); 382 } 383 384 module_init(cls_bpf_init_mod); 385 module_exit(cls_bpf_exit_mod); 386