1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/bpf.h> 4 #include <linux/filter.h> 5 #include <net/net_namespace.h> 6 7 /* 8 * Functions to manage BPF programs attached to netns 9 */ 10 11 struct bpf_netns_link { 12 struct bpf_link link; 13 enum bpf_attach_type type; 14 enum netns_bpf_attach_type netns_type; 15 16 /* We don't hold a ref to net in order to auto-detach the link 17 * when netns is going away. Instead we rely on pernet 18 * pre_exit callback to clear this pointer. Must be accessed 19 * with netns_bpf_mutex held. 20 */ 21 struct net *net; 22 struct list_head node; /* node in list of links attached to net */ 23 }; 24 25 /* Protects updates to netns_bpf */ 26 DEFINE_MUTEX(netns_bpf_mutex); 27 28 static void netns_bpf_attach_type_unneed(enum netns_bpf_attach_type type) 29 { 30 switch (type) { 31 #ifdef CONFIG_INET 32 case NETNS_BPF_SK_LOOKUP: 33 static_branch_dec(&bpf_sk_lookup_enabled); 34 break; 35 #endif 36 default: 37 break; 38 } 39 } 40 41 static void netns_bpf_attach_type_need(enum netns_bpf_attach_type type) 42 { 43 switch (type) { 44 #ifdef CONFIG_INET 45 case NETNS_BPF_SK_LOOKUP: 46 static_branch_inc(&bpf_sk_lookup_enabled); 47 break; 48 #endif 49 default: 50 break; 51 } 52 } 53 54 /* Must be called with netns_bpf_mutex held. */ 55 static void netns_bpf_run_array_detach(struct net *net, 56 enum netns_bpf_attach_type type) 57 { 58 struct bpf_prog_array *run_array; 59 60 run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL, 61 lockdep_is_held(&netns_bpf_mutex)); 62 bpf_prog_array_free(run_array); 63 } 64 65 static int link_index(struct net *net, enum netns_bpf_attach_type type, 66 struct bpf_netns_link *link) 67 { 68 struct bpf_netns_link *pos; 69 int i = 0; 70 71 list_for_each_entry(pos, &net->bpf.links[type], node) { 72 if (pos == link) 73 return i; 74 i++; 75 } 76 return -ENOENT; 77 } 78 79 static int link_count(struct net *net, enum netns_bpf_attach_type type) 80 { 81 struct list_head *pos; 82 int i = 0; 83 84 list_for_each(pos, &net->bpf.links[type]) 85 i++; 86 return i; 87 } 88 89 static void fill_prog_array(struct net *net, enum netns_bpf_attach_type type, 90 struct bpf_prog_array *prog_array) 91 { 92 struct bpf_netns_link *pos; 93 unsigned int i = 0; 94 95 list_for_each_entry(pos, &net->bpf.links[type], node) { 96 prog_array->items[i].prog = pos->link.prog; 97 i++; 98 } 99 } 100 101 static void bpf_netns_link_release(struct bpf_link *link) 102 { 103 struct bpf_netns_link *net_link = 104 container_of(link, struct bpf_netns_link, link); 105 enum netns_bpf_attach_type type = net_link->netns_type; 106 struct bpf_prog_array *old_array, *new_array; 107 struct net *net; 108 int cnt, idx; 109 110 mutex_lock(&netns_bpf_mutex); 111 112 /* We can race with cleanup_net, but if we see a non-NULL 113 * struct net pointer, pre_exit has not run yet and wait for 114 * netns_bpf_mutex. 115 */ 116 net = net_link->net; 117 if (!net) 118 goto out_unlock; 119 120 /* Mark attach point as unused */ 121 netns_bpf_attach_type_unneed(type); 122 123 /* Remember link position in case of safe delete */ 124 idx = link_index(net, type, net_link); 125 list_del(&net_link->node); 126 127 cnt = link_count(net, type); 128 if (!cnt) { 129 netns_bpf_run_array_detach(net, type); 130 goto out_unlock; 131 } 132 133 old_array = rcu_dereference_protected(net->bpf.run_array[type], 134 lockdep_is_held(&netns_bpf_mutex)); 135 new_array = bpf_prog_array_alloc(cnt, GFP_KERNEL); 136 if (!new_array) { 137 WARN_ON(bpf_prog_array_delete_safe_at(old_array, idx)); 138 goto out_unlock; 139 } 140 fill_prog_array(net, type, new_array); 141 rcu_assign_pointer(net->bpf.run_array[type], new_array); 142 bpf_prog_array_free(old_array); 143 144 out_unlock: 145 mutex_unlock(&netns_bpf_mutex); 146 } 147 148 static void bpf_netns_link_dealloc(struct bpf_link *link) 149 { 150 struct bpf_netns_link *net_link = 151 container_of(link, struct bpf_netns_link, link); 152 153 kfree(net_link); 154 } 155 156 static int bpf_netns_link_update_prog(struct bpf_link *link, 157 struct bpf_prog *new_prog, 158 struct bpf_prog *old_prog) 159 { 160 struct bpf_netns_link *net_link = 161 container_of(link, struct bpf_netns_link, link); 162 enum netns_bpf_attach_type type = net_link->netns_type; 163 struct bpf_prog_array *run_array; 164 struct net *net; 165 int idx, ret; 166 167 if (old_prog && old_prog != link->prog) 168 return -EPERM; 169 if (new_prog->type != link->prog->type) 170 return -EINVAL; 171 172 mutex_lock(&netns_bpf_mutex); 173 174 net = net_link->net; 175 if (!net || !check_net(net)) { 176 /* Link auto-detached or netns dying */ 177 ret = -ENOLINK; 178 goto out_unlock; 179 } 180 181 run_array = rcu_dereference_protected(net->bpf.run_array[type], 182 lockdep_is_held(&netns_bpf_mutex)); 183 idx = link_index(net, type, net_link); 184 ret = bpf_prog_array_update_at(run_array, idx, new_prog); 185 if (ret) 186 goto out_unlock; 187 188 old_prog = xchg(&link->prog, new_prog); 189 bpf_prog_put(old_prog); 190 191 out_unlock: 192 mutex_unlock(&netns_bpf_mutex); 193 return ret; 194 } 195 196 static int bpf_netns_link_fill_info(const struct bpf_link *link, 197 struct bpf_link_info *info) 198 { 199 const struct bpf_netns_link *net_link = 200 container_of(link, struct bpf_netns_link, link); 201 unsigned int inum = 0; 202 struct net *net; 203 204 mutex_lock(&netns_bpf_mutex); 205 net = net_link->net; 206 if (net && check_net(net)) 207 inum = net->ns.inum; 208 mutex_unlock(&netns_bpf_mutex); 209 210 info->netns.netns_ino = inum; 211 info->netns.attach_type = net_link->type; 212 return 0; 213 } 214 215 static void bpf_netns_link_show_fdinfo(const struct bpf_link *link, 216 struct seq_file *seq) 217 { 218 struct bpf_link_info info = {}; 219 220 bpf_netns_link_fill_info(link, &info); 221 seq_printf(seq, 222 "netns_ino:\t%u\n" 223 "attach_type:\t%u\n", 224 info.netns.netns_ino, 225 info.netns.attach_type); 226 } 227 228 static const struct bpf_link_ops bpf_netns_link_ops = { 229 .release = bpf_netns_link_release, 230 .dealloc = bpf_netns_link_dealloc, 231 .update_prog = bpf_netns_link_update_prog, 232 .fill_link_info = bpf_netns_link_fill_info, 233 .show_fdinfo = bpf_netns_link_show_fdinfo, 234 }; 235 236 /* Must be called with netns_bpf_mutex held. */ 237 static int __netns_bpf_prog_query(const union bpf_attr *attr, 238 union bpf_attr __user *uattr, 239 struct net *net, 240 enum netns_bpf_attach_type type) 241 { 242 __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); 243 struct bpf_prog_array *run_array; 244 u32 prog_cnt = 0, flags = 0; 245 246 run_array = rcu_dereference_protected(net->bpf.run_array[type], 247 lockdep_is_held(&netns_bpf_mutex)); 248 if (run_array) 249 prog_cnt = bpf_prog_array_length(run_array); 250 251 if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) 252 return -EFAULT; 253 if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) 254 return -EFAULT; 255 if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) 256 return 0; 257 258 return bpf_prog_array_copy_to_user(run_array, prog_ids, 259 attr->query.prog_cnt); 260 } 261 262 int netns_bpf_prog_query(const union bpf_attr *attr, 263 union bpf_attr __user *uattr) 264 { 265 enum netns_bpf_attach_type type; 266 struct net *net; 267 int ret; 268 269 if (attr->query.query_flags) 270 return -EINVAL; 271 272 type = to_netns_bpf_attach_type(attr->query.attach_type); 273 if (type < 0) 274 return -EINVAL; 275 276 net = get_net_ns_by_fd(attr->query.target_fd); 277 if (IS_ERR(net)) 278 return PTR_ERR(net); 279 280 mutex_lock(&netns_bpf_mutex); 281 ret = __netns_bpf_prog_query(attr, uattr, net, type); 282 mutex_unlock(&netns_bpf_mutex); 283 284 put_net(net); 285 return ret; 286 } 287 288 int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) 289 { 290 struct bpf_prog_array *run_array; 291 enum netns_bpf_attach_type type; 292 struct bpf_prog *attached; 293 struct net *net; 294 int ret; 295 296 if (attr->target_fd || attr->attach_flags || attr->replace_bpf_fd) 297 return -EINVAL; 298 299 type = to_netns_bpf_attach_type(attr->attach_type); 300 if (type < 0) 301 return -EINVAL; 302 303 net = current->nsproxy->net_ns; 304 mutex_lock(&netns_bpf_mutex); 305 306 /* Attaching prog directly is not compatible with links */ 307 if (!list_empty(&net->bpf.links[type])) { 308 ret = -EEXIST; 309 goto out_unlock; 310 } 311 312 switch (type) { 313 case NETNS_BPF_FLOW_DISSECTOR: 314 ret = flow_dissector_bpf_prog_attach_check(net, prog); 315 break; 316 default: 317 ret = -EINVAL; 318 break; 319 } 320 if (ret) 321 goto out_unlock; 322 323 attached = net->bpf.progs[type]; 324 if (attached == prog) { 325 /* The same program cannot be attached twice */ 326 ret = -EINVAL; 327 goto out_unlock; 328 } 329 330 run_array = rcu_dereference_protected(net->bpf.run_array[type], 331 lockdep_is_held(&netns_bpf_mutex)); 332 if (run_array) { 333 WRITE_ONCE(run_array->items[0].prog, prog); 334 } else { 335 run_array = bpf_prog_array_alloc(1, GFP_KERNEL); 336 if (!run_array) { 337 ret = -ENOMEM; 338 goto out_unlock; 339 } 340 run_array->items[0].prog = prog; 341 rcu_assign_pointer(net->bpf.run_array[type], run_array); 342 } 343 344 net->bpf.progs[type] = prog; 345 if (attached) 346 bpf_prog_put(attached); 347 348 out_unlock: 349 mutex_unlock(&netns_bpf_mutex); 350 351 return ret; 352 } 353 354 /* Must be called with netns_bpf_mutex held. */ 355 static int __netns_bpf_prog_detach(struct net *net, 356 enum netns_bpf_attach_type type, 357 struct bpf_prog *old) 358 { 359 struct bpf_prog *attached; 360 361 /* Progs attached via links cannot be detached */ 362 if (!list_empty(&net->bpf.links[type])) 363 return -EINVAL; 364 365 attached = net->bpf.progs[type]; 366 if (!attached || attached != old) 367 return -ENOENT; 368 netns_bpf_run_array_detach(net, type); 369 net->bpf.progs[type] = NULL; 370 bpf_prog_put(attached); 371 return 0; 372 } 373 374 int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) 375 { 376 enum netns_bpf_attach_type type; 377 struct bpf_prog *prog; 378 int ret; 379 380 if (attr->target_fd) 381 return -EINVAL; 382 383 type = to_netns_bpf_attach_type(attr->attach_type); 384 if (type < 0) 385 return -EINVAL; 386 387 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 388 if (IS_ERR(prog)) 389 return PTR_ERR(prog); 390 391 mutex_lock(&netns_bpf_mutex); 392 ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type, prog); 393 mutex_unlock(&netns_bpf_mutex); 394 395 bpf_prog_put(prog); 396 397 return ret; 398 } 399 400 static int netns_bpf_max_progs(enum netns_bpf_attach_type type) 401 { 402 switch (type) { 403 case NETNS_BPF_FLOW_DISSECTOR: 404 return 1; 405 case NETNS_BPF_SK_LOOKUP: 406 return 64; 407 default: 408 return 0; 409 } 410 } 411 412 static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, 413 enum netns_bpf_attach_type type) 414 { 415 struct bpf_netns_link *net_link = 416 container_of(link, struct bpf_netns_link, link); 417 struct bpf_prog_array *run_array; 418 int cnt, err; 419 420 mutex_lock(&netns_bpf_mutex); 421 422 cnt = link_count(net, type); 423 if (cnt >= netns_bpf_max_progs(type)) { 424 err = -E2BIG; 425 goto out_unlock; 426 } 427 /* Links are not compatible with attaching prog directly */ 428 if (net->bpf.progs[type]) { 429 err = -EEXIST; 430 goto out_unlock; 431 } 432 433 switch (type) { 434 case NETNS_BPF_FLOW_DISSECTOR: 435 err = flow_dissector_bpf_prog_attach_check(net, link->prog); 436 break; 437 case NETNS_BPF_SK_LOOKUP: 438 err = 0; /* nothing to check */ 439 break; 440 default: 441 err = -EINVAL; 442 break; 443 } 444 if (err) 445 goto out_unlock; 446 447 run_array = bpf_prog_array_alloc(cnt + 1, GFP_KERNEL); 448 if (!run_array) { 449 err = -ENOMEM; 450 goto out_unlock; 451 } 452 453 list_add_tail(&net_link->node, &net->bpf.links[type]); 454 455 fill_prog_array(net, type, run_array); 456 run_array = rcu_replace_pointer(net->bpf.run_array[type], run_array, 457 lockdep_is_held(&netns_bpf_mutex)); 458 bpf_prog_array_free(run_array); 459 460 /* Mark attach point as used */ 461 netns_bpf_attach_type_need(type); 462 463 out_unlock: 464 mutex_unlock(&netns_bpf_mutex); 465 return err; 466 } 467 468 int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog) 469 { 470 enum netns_bpf_attach_type netns_type; 471 struct bpf_link_primer link_primer; 472 struct bpf_netns_link *net_link; 473 enum bpf_attach_type type; 474 struct net *net; 475 int err; 476 477 if (attr->link_create.flags) 478 return -EINVAL; 479 480 type = attr->link_create.attach_type; 481 netns_type = to_netns_bpf_attach_type(type); 482 if (netns_type < 0) 483 return -EINVAL; 484 485 net = get_net_ns_by_fd(attr->link_create.target_fd); 486 if (IS_ERR(net)) 487 return PTR_ERR(net); 488 489 net_link = kzalloc(sizeof(*net_link), GFP_USER); 490 if (!net_link) { 491 err = -ENOMEM; 492 goto out_put_net; 493 } 494 bpf_link_init(&net_link->link, BPF_LINK_TYPE_NETNS, 495 &bpf_netns_link_ops, prog); 496 net_link->net = net; 497 net_link->type = type; 498 net_link->netns_type = netns_type; 499 500 err = bpf_link_prime(&net_link->link, &link_primer); 501 if (err) { 502 kfree(net_link); 503 goto out_put_net; 504 } 505 506 err = netns_bpf_link_attach(net, &net_link->link, netns_type); 507 if (err) { 508 bpf_link_cleanup(&link_primer); 509 goto out_put_net; 510 } 511 512 put_net(net); 513 return bpf_link_settle(&link_primer); 514 515 out_put_net: 516 put_net(net); 517 return err; 518 } 519 520 static int __net_init netns_bpf_pernet_init(struct net *net) 521 { 522 int type; 523 524 for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) 525 INIT_LIST_HEAD(&net->bpf.links[type]); 526 527 return 0; 528 } 529 530 static void __net_exit netns_bpf_pernet_pre_exit(struct net *net) 531 { 532 enum netns_bpf_attach_type type; 533 struct bpf_netns_link *net_link; 534 535 mutex_lock(&netns_bpf_mutex); 536 for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) { 537 netns_bpf_run_array_detach(net, type); 538 list_for_each_entry(net_link, &net->bpf.links[type], node) { 539 net_link->net = NULL; /* auto-detach link */ 540 netns_bpf_attach_type_unneed(type); 541 } 542 if (net->bpf.progs[type]) 543 bpf_prog_put(net->bpf.progs[type]); 544 } 545 mutex_unlock(&netns_bpf_mutex); 546 } 547 548 static struct pernet_operations netns_bpf_pernet_ops __net_initdata = { 549 .init = netns_bpf_pernet_init, 550 .pre_exit = netns_bpf_pernet_pre_exit, 551 }; 552 553 static int __init netns_bpf_init(void) 554 { 555 return register_pernet_subsys(&netns_bpf_pernet_ops); 556 } 557 558 subsys_initcall(netns_bpf_init); 559