1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2019 Facebook */ 3 #include <linux/rculist.h> 4 #include <linux/list.h> 5 #include <linux/hash.h> 6 #include <linux/types.h> 7 #include <linux/spinlock.h> 8 #include <linux/bpf.h> 9 #include <linux/btf.h> 10 #include <linux/btf_ids.h> 11 #include <linux/bpf_local_storage.h> 12 #include <net/bpf_sk_storage.h> 13 #include <net/sock.h> 14 #include <uapi/linux/sock_diag.h> 15 #include <uapi/linux/btf.h> 16 #include <linux/rcupdate_trace.h> 17 18 DEFINE_BPF_STORAGE_CACHE(sk_cache); 19 20 static struct bpf_local_storage_data * 21 bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) 22 { 23 struct bpf_local_storage *sk_storage; 24 struct bpf_local_storage_map *smap; 25 26 sk_storage = 27 rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held()); 28 if (!sk_storage) 29 return NULL; 30 31 smap = (struct bpf_local_storage_map *)map; 32 return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit); 33 } 34 35 static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) 36 { 37 struct bpf_local_storage_data *sdata; 38 39 sdata = bpf_sk_storage_lookup(sk, map, false); 40 if (!sdata) 41 return -ENOENT; 42 43 bpf_selem_unlink(SELEM(sdata), true); 44 45 return 0; 46 } 47 48 /* Called by __sk_destruct() & bpf_sk_storage_clone() */ 49 void bpf_sk_storage_free(struct sock *sk) 50 { 51 struct bpf_local_storage *sk_storage; 52 bool free_sk_storage = false; 53 54 rcu_read_lock(); 55 sk_storage = rcu_dereference(sk->sk_bpf_storage); 56 if (!sk_storage) { 57 rcu_read_unlock(); 58 return; 59 } 60 61 raw_spin_lock_bh(&sk_storage->lock); 62 free_sk_storage = bpf_local_storage_unlink_nolock(sk_storage); 63 raw_spin_unlock_bh(&sk_storage->lock); 64 rcu_read_unlock(); 65 66 if (free_sk_storage) 67 kfree_rcu(sk_storage, rcu); 68 } 69 70 static void bpf_sk_storage_map_free(struct bpf_map *map) 71 { 72 bpf_local_storage_map_free(map, &sk_cache, NULL); 73 } 74 75 static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) 76 { 77 return bpf_local_storage_map_alloc(attr, &sk_cache); 78 } 79 80 static int notsupp_get_next_key(struct bpf_map *map, void *key, 81 void *next_key) 82 { 83 return -ENOTSUPP; 84 } 85 86 static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) 87 { 88 struct bpf_local_storage_data *sdata; 89 struct socket *sock; 90 int fd, err; 91 92 fd = *(int *)key; 93 sock = sockfd_lookup(fd, &err); 94 if (sock) { 95 sdata = bpf_sk_storage_lookup(sock->sk, map, true); 96 sockfd_put(sock); 97 return sdata ? sdata->data : NULL; 98 } 99 100 return ERR_PTR(err); 101 } 102 103 static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, 104 void *value, u64 map_flags) 105 { 106 struct bpf_local_storage_data *sdata; 107 struct socket *sock; 108 int fd, err; 109 110 fd = *(int *)key; 111 sock = sockfd_lookup(fd, &err); 112 if (sock) { 113 sdata = bpf_local_storage_update( 114 sock->sk, (struct bpf_local_storage_map *)map, value, 115 map_flags, GFP_ATOMIC); 116 sockfd_put(sock); 117 return PTR_ERR_OR_ZERO(sdata); 118 } 119 120 return err; 121 } 122 123 static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) 124 { 125 struct socket *sock; 126 int fd, err; 127 128 fd = *(int *)key; 129 sock = sockfd_lookup(fd, &err); 130 if (sock) { 131 err = bpf_sk_storage_del(sock->sk, map); 132 sockfd_put(sock); 133 return err; 134 } 135 136 return err; 137 } 138 139 static struct bpf_local_storage_elem * 140 bpf_sk_storage_clone_elem(struct sock *newsk, 141 struct bpf_local_storage_map *smap, 142 struct bpf_local_storage_elem *selem) 143 { 144 struct bpf_local_storage_elem *copy_selem; 145 146 copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC); 147 if (!copy_selem) 148 return NULL; 149 150 if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)) 151 copy_map_value_locked(&smap->map, SDATA(copy_selem)->data, 152 SDATA(selem)->data, true); 153 else 154 copy_map_value(&smap->map, SDATA(copy_selem)->data, 155 SDATA(selem)->data); 156 157 return copy_selem; 158 } 159 160 int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) 161 { 162 struct bpf_local_storage *new_sk_storage = NULL; 163 struct bpf_local_storage *sk_storage; 164 struct bpf_local_storage_elem *selem; 165 int ret = 0; 166 167 RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); 168 169 rcu_read_lock(); 170 sk_storage = rcu_dereference(sk->sk_bpf_storage); 171 172 if (!sk_storage || hlist_empty(&sk_storage->list)) 173 goto out; 174 175 hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { 176 struct bpf_local_storage_elem *copy_selem; 177 struct bpf_local_storage_map *smap; 178 struct bpf_map *map; 179 180 smap = rcu_dereference(SDATA(selem)->smap); 181 if (!(smap->map.map_flags & BPF_F_CLONE)) 182 continue; 183 184 /* Note that for lockless listeners adding new element 185 * here can race with cleanup in bpf_local_storage_map_free. 186 * Try to grab map refcnt to make sure that it's still 187 * alive and prevent concurrent removal. 188 */ 189 map = bpf_map_inc_not_zero(&smap->map); 190 if (IS_ERR(map)) 191 continue; 192 193 copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem); 194 if (!copy_selem) { 195 ret = -ENOMEM; 196 bpf_map_put(map); 197 goto out; 198 } 199 200 if (new_sk_storage) { 201 bpf_selem_link_map(smap, copy_selem); 202 bpf_selem_link_storage_nolock(new_sk_storage, copy_selem); 203 } else { 204 ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC); 205 if (ret) { 206 kfree(copy_selem); 207 atomic_sub(smap->elem_size, 208 &newsk->sk_omem_alloc); 209 bpf_map_put(map); 210 goto out; 211 } 212 213 new_sk_storage = 214 rcu_dereference(copy_selem->local_storage); 215 } 216 bpf_map_put(map); 217 } 218 219 out: 220 rcu_read_unlock(); 221 222 /* In case of an error, don't free anything explicitly here, the 223 * caller is responsible to call bpf_sk_storage_free. 224 */ 225 226 return ret; 227 } 228 229 /* *gfp_flags* is a hidden argument provided by the verifier */ 230 BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, 231 void *, value, u64, flags, gfp_t, gfp_flags) 232 { 233 struct bpf_local_storage_data *sdata; 234 235 WARN_ON_ONCE(!bpf_rcu_lock_held()); 236 if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE) 237 return (unsigned long)NULL; 238 239 sdata = bpf_sk_storage_lookup(sk, map, true); 240 if (sdata) 241 return (unsigned long)sdata->data; 242 243 if (flags == BPF_SK_STORAGE_GET_F_CREATE && 244 /* Cannot add new elem to a going away sk. 245 * Otherwise, the new elem may become a leak 246 * (and also other memory issues during map 247 * destruction). 248 */ 249 refcount_inc_not_zero(&sk->sk_refcnt)) { 250 sdata = bpf_local_storage_update( 251 sk, (struct bpf_local_storage_map *)map, value, 252 BPF_NOEXIST, gfp_flags); 253 /* sk must be a fullsock (guaranteed by verifier), 254 * so sock_gen_put() is unnecessary. 255 */ 256 sock_put(sk); 257 return IS_ERR(sdata) ? 258 (unsigned long)NULL : (unsigned long)sdata->data; 259 } 260 261 return (unsigned long)NULL; 262 } 263 264 BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) 265 { 266 WARN_ON_ONCE(!bpf_rcu_lock_held()); 267 if (!sk || !sk_fullsock(sk)) 268 return -EINVAL; 269 270 if (refcount_inc_not_zero(&sk->sk_refcnt)) { 271 int err; 272 273 err = bpf_sk_storage_del(sk, map); 274 sock_put(sk); 275 return err; 276 } 277 278 return -ENOENT; 279 } 280 281 static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap, 282 void *owner, u32 size) 283 { 284 int optmem_max = READ_ONCE(sysctl_optmem_max); 285 struct sock *sk = (struct sock *)owner; 286 287 /* same check as in sock_kmalloc() */ 288 if (size <= optmem_max && 289 atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { 290 atomic_add(size, &sk->sk_omem_alloc); 291 return 0; 292 } 293 294 return -ENOMEM; 295 } 296 297 static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap, 298 void *owner, u32 size) 299 { 300 struct sock *sk = owner; 301 302 atomic_sub(size, &sk->sk_omem_alloc); 303 } 304 305 static struct bpf_local_storage __rcu ** 306 bpf_sk_storage_ptr(void *owner) 307 { 308 struct sock *sk = owner; 309 310 return &sk->sk_bpf_storage; 311 } 312 313 const struct bpf_map_ops sk_storage_map_ops = { 314 .map_meta_equal = bpf_map_meta_equal, 315 .map_alloc_check = bpf_local_storage_map_alloc_check, 316 .map_alloc = bpf_sk_storage_map_alloc, 317 .map_free = bpf_sk_storage_map_free, 318 .map_get_next_key = notsupp_get_next_key, 319 .map_lookup_elem = bpf_fd_sk_storage_lookup_elem, 320 .map_update_elem = bpf_fd_sk_storage_update_elem, 321 .map_delete_elem = bpf_fd_sk_storage_delete_elem, 322 .map_check_btf = bpf_local_storage_map_check_btf, 323 .map_btf_id = &bpf_local_storage_map_btf_id[0], 324 .map_local_storage_charge = bpf_sk_storage_charge, 325 .map_local_storage_uncharge = bpf_sk_storage_uncharge, 326 .map_owner_storage_ptr = bpf_sk_storage_ptr, 327 }; 328 329 const struct bpf_func_proto bpf_sk_storage_get_proto = { 330 .func = bpf_sk_storage_get, 331 .gpl_only = false, 332 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 333 .arg1_type = ARG_CONST_MAP_PTR, 334 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 335 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 336 .arg4_type = ARG_ANYTHING, 337 }; 338 339 const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = { 340 .func = bpf_sk_storage_get, 341 .gpl_only = false, 342 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 343 .arg1_type = ARG_CONST_MAP_PTR, 344 .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */ 345 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 346 .arg4_type = ARG_ANYTHING, 347 }; 348 349 const struct bpf_func_proto bpf_sk_storage_delete_proto = { 350 .func = bpf_sk_storage_delete, 351 .gpl_only = false, 352 .ret_type = RET_INTEGER, 353 .arg1_type = ARG_CONST_MAP_PTR, 354 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 355 }; 356 357 static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) 358 { 359 const struct btf *btf_vmlinux; 360 const struct btf_type *t; 361 const char *tname; 362 u32 btf_id; 363 364 if (prog->aux->dst_prog) 365 return false; 366 367 /* Ensure the tracing program is not tracing 368 * any bpf_sk_storage*() function and also 369 * use the bpf_sk_storage_(get|delete) helper. 370 */ 371 switch (prog->expected_attach_type) { 372 case BPF_TRACE_ITER: 373 case BPF_TRACE_RAW_TP: 374 /* bpf_sk_storage has no trace point */ 375 return true; 376 case BPF_TRACE_FENTRY: 377 case BPF_TRACE_FEXIT: 378 btf_vmlinux = bpf_get_btf_vmlinux(); 379 if (IS_ERR_OR_NULL(btf_vmlinux)) 380 return false; 381 btf_id = prog->aux->attach_btf_id; 382 t = btf_type_by_id(btf_vmlinux, btf_id); 383 tname = btf_name_by_offset(btf_vmlinux, t->name_off); 384 return !!strncmp(tname, "bpf_sk_storage", 385 strlen("bpf_sk_storage")); 386 default: 387 return false; 388 } 389 390 return false; 391 } 392 393 /* *gfp_flags* is a hidden argument provided by the verifier */ 394 BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, 395 void *, value, u64, flags, gfp_t, gfp_flags) 396 { 397 WARN_ON_ONCE(!bpf_rcu_lock_held()); 398 if (in_hardirq() || in_nmi()) 399 return (unsigned long)NULL; 400 401 return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags, 402 gfp_flags); 403 } 404 405 BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, 406 struct sock *, sk) 407 { 408 WARN_ON_ONCE(!bpf_rcu_lock_held()); 409 if (in_hardirq() || in_nmi()) 410 return -EPERM; 411 412 return ____bpf_sk_storage_delete(map, sk); 413 } 414 415 const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = { 416 .func = bpf_sk_storage_get_tracing, 417 .gpl_only = false, 418 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 419 .arg1_type = ARG_CONST_MAP_PTR, 420 .arg2_type = ARG_PTR_TO_BTF_ID, 421 .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 422 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 423 .arg4_type = ARG_ANYTHING, 424 .allowed = bpf_sk_storage_tracing_allowed, 425 }; 426 427 const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = { 428 .func = bpf_sk_storage_delete_tracing, 429 .gpl_only = false, 430 .ret_type = RET_INTEGER, 431 .arg1_type = ARG_CONST_MAP_PTR, 432 .arg2_type = ARG_PTR_TO_BTF_ID, 433 .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 434 .allowed = bpf_sk_storage_tracing_allowed, 435 }; 436 437 struct bpf_sk_storage_diag { 438 u32 nr_maps; 439 struct bpf_map *maps[]; 440 }; 441 442 /* The reply will be like: 443 * INET_DIAG_BPF_SK_STORAGES (nla_nest) 444 * SK_DIAG_BPF_STORAGE (nla_nest) 445 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 446 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 447 * SK_DIAG_BPF_STORAGE (nla_nest) 448 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 449 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 450 * .... 451 */ 452 static int nla_value_size(u32 value_size) 453 { 454 /* SK_DIAG_BPF_STORAGE (nla_nest) 455 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 456 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 457 */ 458 return nla_total_size(0) + nla_total_size(sizeof(u32)) + 459 nla_total_size_64bit(value_size); 460 } 461 462 void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag) 463 { 464 u32 i; 465 466 if (!diag) 467 return; 468 469 for (i = 0; i < diag->nr_maps; i++) 470 bpf_map_put(diag->maps[i]); 471 472 kfree(diag); 473 } 474 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free); 475 476 static bool diag_check_dup(const struct bpf_sk_storage_diag *diag, 477 const struct bpf_map *map) 478 { 479 u32 i; 480 481 for (i = 0; i < diag->nr_maps; i++) { 482 if (diag->maps[i] == map) 483 return true; 484 } 485 486 return false; 487 } 488 489 struct bpf_sk_storage_diag * 490 bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) 491 { 492 struct bpf_sk_storage_diag *diag; 493 struct nlattr *nla; 494 u32 nr_maps = 0; 495 int rem, err; 496 497 /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as 498 * the map_alloc_check() side also does. 499 */ 500 if (!bpf_capable()) 501 return ERR_PTR(-EPERM); 502 503 nla_for_each_nested(nla, nla_stgs, rem) { 504 if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) 505 nr_maps++; 506 } 507 508 diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL); 509 if (!diag) 510 return ERR_PTR(-ENOMEM); 511 512 nla_for_each_nested(nla, nla_stgs, rem) { 513 struct bpf_map *map; 514 int map_fd; 515 516 if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD) 517 continue; 518 519 map_fd = nla_get_u32(nla); 520 map = bpf_map_get(map_fd); 521 if (IS_ERR(map)) { 522 err = PTR_ERR(map); 523 goto err_free; 524 } 525 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) { 526 bpf_map_put(map); 527 err = -EINVAL; 528 goto err_free; 529 } 530 if (diag_check_dup(diag, map)) { 531 bpf_map_put(map); 532 err = -EEXIST; 533 goto err_free; 534 } 535 diag->maps[diag->nr_maps++] = map; 536 } 537 538 return diag; 539 540 err_free: 541 bpf_sk_storage_diag_free(diag); 542 return ERR_PTR(err); 543 } 544 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); 545 546 static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) 547 { 548 struct nlattr *nla_stg, *nla_value; 549 struct bpf_local_storage_map *smap; 550 551 /* It cannot exceed max nlattr's payload */ 552 BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE); 553 554 nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE); 555 if (!nla_stg) 556 return -EMSGSIZE; 557 558 smap = rcu_dereference(sdata->smap); 559 if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id)) 560 goto errout; 561 562 nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE, 563 smap->map.value_size, 564 SK_DIAG_BPF_STORAGE_PAD); 565 if (!nla_value) 566 goto errout; 567 568 if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)) 569 copy_map_value_locked(&smap->map, nla_data(nla_value), 570 sdata->data, true); 571 else 572 copy_map_value(&smap->map, nla_data(nla_value), sdata->data); 573 574 nla_nest_end(skb, nla_stg); 575 return 0; 576 577 errout: 578 nla_nest_cancel(skb, nla_stg); 579 return -EMSGSIZE; 580 } 581 582 static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb, 583 int stg_array_type, 584 unsigned int *res_diag_size) 585 { 586 /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ 587 unsigned int diag_size = nla_total_size(0); 588 struct bpf_local_storage *sk_storage; 589 struct bpf_local_storage_elem *selem; 590 struct bpf_local_storage_map *smap; 591 struct nlattr *nla_stgs; 592 unsigned int saved_len; 593 int err = 0; 594 595 rcu_read_lock(); 596 597 sk_storage = rcu_dereference(sk->sk_bpf_storage); 598 if (!sk_storage || hlist_empty(&sk_storage->list)) { 599 rcu_read_unlock(); 600 return 0; 601 } 602 603 nla_stgs = nla_nest_start(skb, stg_array_type); 604 if (!nla_stgs) 605 /* Continue to learn diag_size */ 606 err = -EMSGSIZE; 607 608 saved_len = skb->len; 609 hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { 610 smap = rcu_dereference(SDATA(selem)->smap); 611 diag_size += nla_value_size(smap->map.value_size); 612 613 if (nla_stgs && diag_get(SDATA(selem), skb)) 614 /* Continue to learn diag_size */ 615 err = -EMSGSIZE; 616 } 617 618 rcu_read_unlock(); 619 620 if (nla_stgs) { 621 if (saved_len == skb->len) 622 nla_nest_cancel(skb, nla_stgs); 623 else 624 nla_nest_end(skb, nla_stgs); 625 } 626 627 if (diag_size == nla_total_size(0)) { 628 *res_diag_size = 0; 629 return 0; 630 } 631 632 *res_diag_size = diag_size; 633 return err; 634 } 635 636 int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, 637 struct sock *sk, struct sk_buff *skb, 638 int stg_array_type, 639 unsigned int *res_diag_size) 640 { 641 /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ 642 unsigned int diag_size = nla_total_size(0); 643 struct bpf_local_storage *sk_storage; 644 struct bpf_local_storage_data *sdata; 645 struct nlattr *nla_stgs; 646 unsigned int saved_len; 647 int err = 0; 648 u32 i; 649 650 *res_diag_size = 0; 651 652 /* No map has been specified. Dump all. */ 653 if (!diag->nr_maps) 654 return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type, 655 res_diag_size); 656 657 rcu_read_lock(); 658 sk_storage = rcu_dereference(sk->sk_bpf_storage); 659 if (!sk_storage || hlist_empty(&sk_storage->list)) { 660 rcu_read_unlock(); 661 return 0; 662 } 663 664 nla_stgs = nla_nest_start(skb, stg_array_type); 665 if (!nla_stgs) 666 /* Continue to learn diag_size */ 667 err = -EMSGSIZE; 668 669 saved_len = skb->len; 670 for (i = 0; i < diag->nr_maps; i++) { 671 sdata = bpf_local_storage_lookup(sk_storage, 672 (struct bpf_local_storage_map *)diag->maps[i], 673 false); 674 675 if (!sdata) 676 continue; 677 678 diag_size += nla_value_size(diag->maps[i]->value_size); 679 680 if (nla_stgs && diag_get(sdata, skb)) 681 /* Continue to learn diag_size */ 682 err = -EMSGSIZE; 683 } 684 rcu_read_unlock(); 685 686 if (nla_stgs) { 687 if (saved_len == skb->len) 688 nla_nest_cancel(skb, nla_stgs); 689 else 690 nla_nest_end(skb, nla_stgs); 691 } 692 693 if (diag_size == nla_total_size(0)) { 694 *res_diag_size = 0; 695 return 0; 696 } 697 698 *res_diag_size = diag_size; 699 return err; 700 } 701 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put); 702 703 struct bpf_iter_seq_sk_storage_map_info { 704 struct bpf_map *map; 705 unsigned int bucket_id; 706 unsigned skip_elems; 707 }; 708 709 static struct bpf_local_storage_elem * 710 bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, 711 struct bpf_local_storage_elem *prev_selem) 712 __acquires(RCU) __releases(RCU) 713 { 714 struct bpf_local_storage *sk_storage; 715 struct bpf_local_storage_elem *selem; 716 u32 skip_elems = info->skip_elems; 717 struct bpf_local_storage_map *smap; 718 u32 bucket_id = info->bucket_id; 719 u32 i, count, n_buckets; 720 struct bpf_local_storage_map_bucket *b; 721 722 smap = (struct bpf_local_storage_map *)info->map; 723 n_buckets = 1U << smap->bucket_log; 724 if (bucket_id >= n_buckets) 725 return NULL; 726 727 /* try to find next selem in the same bucket */ 728 selem = prev_selem; 729 count = 0; 730 while (selem) { 731 selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)), 732 struct bpf_local_storage_elem, map_node); 733 if (!selem) { 734 /* not found, unlock and go to the next bucket */ 735 b = &smap->buckets[bucket_id++]; 736 rcu_read_unlock(); 737 skip_elems = 0; 738 break; 739 } 740 sk_storage = rcu_dereference(selem->local_storage); 741 if (sk_storage) { 742 info->skip_elems = skip_elems + count; 743 return selem; 744 } 745 count++; 746 } 747 748 for (i = bucket_id; i < (1U << smap->bucket_log); i++) { 749 b = &smap->buckets[i]; 750 rcu_read_lock(); 751 count = 0; 752 hlist_for_each_entry_rcu(selem, &b->list, map_node) { 753 sk_storage = rcu_dereference(selem->local_storage); 754 if (sk_storage && count >= skip_elems) { 755 info->bucket_id = i; 756 info->skip_elems = count; 757 return selem; 758 } 759 count++; 760 } 761 rcu_read_unlock(); 762 skip_elems = 0; 763 } 764 765 info->bucket_id = i; 766 info->skip_elems = 0; 767 return NULL; 768 } 769 770 static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos) 771 { 772 struct bpf_local_storage_elem *selem; 773 774 selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL); 775 if (!selem) 776 return NULL; 777 778 if (*pos == 0) 779 ++*pos; 780 return selem; 781 } 782 783 static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v, 784 loff_t *pos) 785 { 786 struct bpf_iter_seq_sk_storage_map_info *info = seq->private; 787 788 ++*pos; 789 ++info->skip_elems; 790 return bpf_sk_storage_map_seq_find_next(seq->private, v); 791 } 792 793 struct bpf_iter__bpf_sk_storage_map { 794 __bpf_md_ptr(struct bpf_iter_meta *, meta); 795 __bpf_md_ptr(struct bpf_map *, map); 796 __bpf_md_ptr(struct sock *, sk); 797 __bpf_md_ptr(void *, value); 798 }; 799 800 DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta, 801 struct bpf_map *map, struct sock *sk, 802 void *value) 803 804 static int __bpf_sk_storage_map_seq_show(struct seq_file *seq, 805 struct bpf_local_storage_elem *selem) 806 { 807 struct bpf_iter_seq_sk_storage_map_info *info = seq->private; 808 struct bpf_iter__bpf_sk_storage_map ctx = {}; 809 struct bpf_local_storage *sk_storage; 810 struct bpf_iter_meta meta; 811 struct bpf_prog *prog; 812 int ret = 0; 813 814 meta.seq = seq; 815 prog = bpf_iter_get_info(&meta, selem == NULL); 816 if (prog) { 817 ctx.meta = &meta; 818 ctx.map = info->map; 819 if (selem) { 820 sk_storage = rcu_dereference(selem->local_storage); 821 ctx.sk = sk_storage->owner; 822 ctx.value = SDATA(selem)->data; 823 } 824 ret = bpf_iter_run_prog(prog, &ctx); 825 } 826 827 return ret; 828 } 829 830 static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v) 831 { 832 return __bpf_sk_storage_map_seq_show(seq, v); 833 } 834 835 static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v) 836 __releases(RCU) 837 { 838 if (!v) 839 (void)__bpf_sk_storage_map_seq_show(seq, v); 840 else 841 rcu_read_unlock(); 842 } 843 844 static int bpf_iter_init_sk_storage_map(void *priv_data, 845 struct bpf_iter_aux_info *aux) 846 { 847 struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; 848 849 bpf_map_inc_with_uref(aux->map); 850 seq_info->map = aux->map; 851 return 0; 852 } 853 854 static void bpf_iter_fini_sk_storage_map(void *priv_data) 855 { 856 struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; 857 858 bpf_map_put_with_uref(seq_info->map); 859 } 860 861 static int bpf_iter_attach_map(struct bpf_prog *prog, 862 union bpf_iter_link_info *linfo, 863 struct bpf_iter_aux_info *aux) 864 { 865 struct bpf_map *map; 866 int err = -EINVAL; 867 868 if (!linfo->map.map_fd) 869 return -EBADF; 870 871 map = bpf_map_get_with_uref(linfo->map.map_fd); 872 if (IS_ERR(map)) 873 return PTR_ERR(map); 874 875 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) 876 goto put_map; 877 878 if (prog->aux->max_rdwr_access > map->value_size) { 879 err = -EACCES; 880 goto put_map; 881 } 882 883 aux->map = map; 884 return 0; 885 886 put_map: 887 bpf_map_put_with_uref(map); 888 return err; 889 } 890 891 static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) 892 { 893 bpf_map_put_with_uref(aux->map); 894 } 895 896 static const struct seq_operations bpf_sk_storage_map_seq_ops = { 897 .start = bpf_sk_storage_map_seq_start, 898 .next = bpf_sk_storage_map_seq_next, 899 .stop = bpf_sk_storage_map_seq_stop, 900 .show = bpf_sk_storage_map_seq_show, 901 }; 902 903 static const struct bpf_iter_seq_info iter_seq_info = { 904 .seq_ops = &bpf_sk_storage_map_seq_ops, 905 .init_seq_private = bpf_iter_init_sk_storage_map, 906 .fini_seq_private = bpf_iter_fini_sk_storage_map, 907 .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info), 908 }; 909 910 static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { 911 .target = "bpf_sk_storage_map", 912 .attach_target = bpf_iter_attach_map, 913 .detach_target = bpf_iter_detach_map, 914 .show_fdinfo = bpf_iter_map_show_fdinfo, 915 .fill_link_info = bpf_iter_map_fill_link_info, 916 .ctx_arg_info_size = 2, 917 .ctx_arg_info = { 918 { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), 919 PTR_TO_BTF_ID_OR_NULL }, 920 { offsetof(struct bpf_iter__bpf_sk_storage_map, value), 921 PTR_TO_BUF | PTR_MAYBE_NULL }, 922 }, 923 .seq_info = &iter_seq_info, 924 }; 925 926 static int __init bpf_sk_storage_map_iter_init(void) 927 { 928 bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id = 929 btf_sock_ids[BTF_SOCK_TYPE_SOCK]; 930 return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info); 931 } 932 late_initcall(bpf_sk_storage_map_iter_init); 933