1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2019 Facebook */ 3 #include <linux/rculist.h> 4 #include <linux/list.h> 5 #include <linux/hash.h> 6 #include <linux/types.h> 7 #include <linux/spinlock.h> 8 #include <linux/bpf.h> 9 #include <linux/btf.h> 10 #include <linux/btf_ids.h> 11 #include <linux/bpf_local_storage.h> 12 #include <net/bpf_sk_storage.h> 13 #include <net/sock.h> 14 #include <uapi/linux/sock_diag.h> 15 #include <uapi/linux/btf.h> 16 #include <linux/rcupdate_trace.h> 17 18 DEFINE_BPF_STORAGE_CACHE(sk_cache); 19 20 static struct bpf_local_storage_data * 21 bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) 22 { 23 struct bpf_local_storage *sk_storage; 24 struct bpf_local_storage_map *smap; 25 26 sk_storage = 27 rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held()); 28 if (!sk_storage) 29 return NULL; 30 31 smap = (struct bpf_local_storage_map *)map; 32 return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit); 33 } 34 35 static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) 36 { 37 struct bpf_local_storage_data *sdata; 38 39 sdata = bpf_sk_storage_lookup(sk, map, false); 40 if (!sdata) 41 return -ENOENT; 42 43 bpf_selem_unlink(SELEM(sdata), true); 44 45 return 0; 46 } 47 48 /* Called by __sk_destruct() & bpf_sk_storage_clone() */ 49 void bpf_sk_storage_free(struct sock *sk) 50 { 51 struct bpf_local_storage *sk_storage; 52 bool free_sk_storage = false; 53 54 rcu_read_lock(); 55 sk_storage = rcu_dereference(sk->sk_bpf_storage); 56 if (!sk_storage) { 57 rcu_read_unlock(); 58 return; 59 } 60 61 raw_spin_lock_bh(&sk_storage->lock); 62 free_sk_storage = bpf_local_storage_unlink_nolock(sk_storage); 63 raw_spin_unlock_bh(&sk_storage->lock); 64 rcu_read_unlock(); 65 66 if (free_sk_storage) 67 kfree_rcu(sk_storage, rcu); 68 } 69 70 static void bpf_sk_storage_map_free(struct bpf_map *map) 71 { 72 bpf_local_storage_map_free(map, &sk_cache, NULL); 73 } 74 75 static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) 76 { 77 return bpf_local_storage_map_alloc(attr, &sk_cache); 78 } 79 80 static int notsupp_get_next_key(struct bpf_map *map, void *key, 81 void *next_key) 82 { 83 return -ENOTSUPP; 84 } 85 86 static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) 87 { 88 struct bpf_local_storage_data *sdata; 89 struct socket *sock; 90 int fd, err; 91 92 fd = *(int *)key; 93 sock = sockfd_lookup(fd, &err); 94 if (sock) { 95 sdata = bpf_sk_storage_lookup(sock->sk, map, true); 96 sockfd_put(sock); 97 return sdata ? sdata->data : NULL; 98 } 99 100 return ERR_PTR(err); 101 } 102 103 static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, 104 void *value, u64 map_flags) 105 { 106 struct bpf_local_storage_data *sdata; 107 struct socket *sock; 108 int fd, err; 109 110 fd = *(int *)key; 111 sock = sockfd_lookup(fd, &err); 112 if (sock) { 113 sdata = bpf_local_storage_update( 114 sock->sk, (struct bpf_local_storage_map *)map, value, 115 map_flags, GFP_ATOMIC); 116 sockfd_put(sock); 117 return PTR_ERR_OR_ZERO(sdata); 118 } 119 120 return err; 121 } 122 123 static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) 124 { 125 struct socket *sock; 126 int fd, err; 127 128 fd = *(int *)key; 129 sock = sockfd_lookup(fd, &err); 130 if (sock) { 131 err = bpf_sk_storage_del(sock->sk, map); 132 sockfd_put(sock); 133 return err; 134 } 135 136 return err; 137 } 138 139 static struct bpf_local_storage_elem * 140 bpf_sk_storage_clone_elem(struct sock *newsk, 141 struct bpf_local_storage_map *smap, 142 struct bpf_local_storage_elem *selem) 143 { 144 struct bpf_local_storage_elem *copy_selem; 145 146 copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC); 147 if (!copy_selem) 148 return NULL; 149 150 if (map_value_has_spin_lock(&smap->map)) 151 copy_map_value_locked(&smap->map, SDATA(copy_selem)->data, 152 SDATA(selem)->data, true); 153 else 154 copy_map_value(&smap->map, SDATA(copy_selem)->data, 155 SDATA(selem)->data); 156 157 return copy_selem; 158 } 159 160 int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) 161 { 162 struct bpf_local_storage *new_sk_storage = NULL; 163 struct bpf_local_storage *sk_storage; 164 struct bpf_local_storage_elem *selem; 165 int ret = 0; 166 167 RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); 168 169 rcu_read_lock(); 170 sk_storage = rcu_dereference(sk->sk_bpf_storage); 171 172 if (!sk_storage || hlist_empty(&sk_storage->list)) 173 goto out; 174 175 hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { 176 struct bpf_local_storage_elem *copy_selem; 177 struct bpf_local_storage_map *smap; 178 struct bpf_map *map; 179 180 smap = rcu_dereference(SDATA(selem)->smap); 181 if (!(smap->map.map_flags & BPF_F_CLONE)) 182 continue; 183 184 /* Note that for lockless listeners adding new element 185 * here can race with cleanup in bpf_local_storage_map_free. 186 * Try to grab map refcnt to make sure that it's still 187 * alive and prevent concurrent removal. 188 */ 189 map = bpf_map_inc_not_zero(&smap->map); 190 if (IS_ERR(map)) 191 continue; 192 193 copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem); 194 if (!copy_selem) { 195 ret = -ENOMEM; 196 bpf_map_put(map); 197 goto out; 198 } 199 200 if (new_sk_storage) { 201 bpf_selem_link_map(smap, copy_selem); 202 bpf_selem_link_storage_nolock(new_sk_storage, copy_selem); 203 } else { 204 ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC); 205 if (ret) { 206 kfree(copy_selem); 207 atomic_sub(smap->elem_size, 208 &newsk->sk_omem_alloc); 209 bpf_map_put(map); 210 goto out; 211 } 212 213 new_sk_storage = 214 rcu_dereference(copy_selem->local_storage); 215 } 216 bpf_map_put(map); 217 } 218 219 out: 220 rcu_read_unlock(); 221 222 /* In case of an error, don't free anything explicitly here, the 223 * caller is responsible to call bpf_sk_storage_free. 224 */ 225 226 return ret; 227 } 228 229 /* *gfp_flags* is a hidden argument provided by the verifier */ 230 BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, 231 void *, value, u64, flags, gfp_t, gfp_flags) 232 { 233 struct bpf_local_storage_data *sdata; 234 235 WARN_ON_ONCE(!bpf_rcu_lock_held()); 236 if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE) 237 return (unsigned long)NULL; 238 239 sdata = bpf_sk_storage_lookup(sk, map, true); 240 if (sdata) 241 return (unsigned long)sdata->data; 242 243 if (flags == BPF_SK_STORAGE_GET_F_CREATE && 244 /* Cannot add new elem to a going away sk. 245 * Otherwise, the new elem may become a leak 246 * (and also other memory issues during map 247 * destruction). 248 */ 249 refcount_inc_not_zero(&sk->sk_refcnt)) { 250 sdata = bpf_local_storage_update( 251 sk, (struct bpf_local_storage_map *)map, value, 252 BPF_NOEXIST, gfp_flags); 253 /* sk must be a fullsock (guaranteed by verifier), 254 * so sock_gen_put() is unnecessary. 255 */ 256 sock_put(sk); 257 return IS_ERR(sdata) ? 258 (unsigned long)NULL : (unsigned long)sdata->data; 259 } 260 261 return (unsigned long)NULL; 262 } 263 264 BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) 265 { 266 WARN_ON_ONCE(!bpf_rcu_lock_held()); 267 if (!sk || !sk_fullsock(sk)) 268 return -EINVAL; 269 270 if (refcount_inc_not_zero(&sk->sk_refcnt)) { 271 int err; 272 273 err = bpf_sk_storage_del(sk, map); 274 sock_put(sk); 275 return err; 276 } 277 278 return -ENOENT; 279 } 280 281 static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap, 282 void *owner, u32 size) 283 { 284 int optmem_max = READ_ONCE(sysctl_optmem_max); 285 struct sock *sk = (struct sock *)owner; 286 287 /* same check as in sock_kmalloc() */ 288 if (size <= optmem_max && 289 atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { 290 atomic_add(size, &sk->sk_omem_alloc); 291 return 0; 292 } 293 294 return -ENOMEM; 295 } 296 297 static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap, 298 void *owner, u32 size) 299 { 300 struct sock *sk = owner; 301 302 atomic_sub(size, &sk->sk_omem_alloc); 303 } 304 305 static struct bpf_local_storage __rcu ** 306 bpf_sk_storage_ptr(void *owner) 307 { 308 struct sock *sk = owner; 309 310 return &sk->sk_bpf_storage; 311 } 312 313 BTF_ID_LIST_SINGLE(sk_storage_map_btf_ids, struct, bpf_local_storage_map) 314 const struct bpf_map_ops sk_storage_map_ops = { 315 .map_meta_equal = bpf_map_meta_equal, 316 .map_alloc_check = bpf_local_storage_map_alloc_check, 317 .map_alloc = bpf_sk_storage_map_alloc, 318 .map_free = bpf_sk_storage_map_free, 319 .map_get_next_key = notsupp_get_next_key, 320 .map_lookup_elem = bpf_fd_sk_storage_lookup_elem, 321 .map_update_elem = bpf_fd_sk_storage_update_elem, 322 .map_delete_elem = bpf_fd_sk_storage_delete_elem, 323 .map_check_btf = bpf_local_storage_map_check_btf, 324 .map_btf_id = &sk_storage_map_btf_ids[0], 325 .map_local_storage_charge = bpf_sk_storage_charge, 326 .map_local_storage_uncharge = bpf_sk_storage_uncharge, 327 .map_owner_storage_ptr = bpf_sk_storage_ptr, 328 }; 329 330 const struct bpf_func_proto bpf_sk_storage_get_proto = { 331 .func = bpf_sk_storage_get, 332 .gpl_only = false, 333 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 334 .arg1_type = ARG_CONST_MAP_PTR, 335 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 336 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 337 .arg4_type = ARG_ANYTHING, 338 }; 339 340 const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = { 341 .func = bpf_sk_storage_get, 342 .gpl_only = false, 343 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 344 .arg1_type = ARG_CONST_MAP_PTR, 345 .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */ 346 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 347 .arg4_type = ARG_ANYTHING, 348 }; 349 350 const struct bpf_func_proto bpf_sk_storage_delete_proto = { 351 .func = bpf_sk_storage_delete, 352 .gpl_only = false, 353 .ret_type = RET_INTEGER, 354 .arg1_type = ARG_CONST_MAP_PTR, 355 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 356 }; 357 358 static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) 359 { 360 const struct btf *btf_vmlinux; 361 const struct btf_type *t; 362 const char *tname; 363 u32 btf_id; 364 365 if (prog->aux->dst_prog) 366 return false; 367 368 /* Ensure the tracing program is not tracing 369 * any bpf_sk_storage*() function and also 370 * use the bpf_sk_storage_(get|delete) helper. 371 */ 372 switch (prog->expected_attach_type) { 373 case BPF_TRACE_ITER: 374 case BPF_TRACE_RAW_TP: 375 /* bpf_sk_storage has no trace point */ 376 return true; 377 case BPF_TRACE_FENTRY: 378 case BPF_TRACE_FEXIT: 379 btf_vmlinux = bpf_get_btf_vmlinux(); 380 if (IS_ERR_OR_NULL(btf_vmlinux)) 381 return false; 382 btf_id = prog->aux->attach_btf_id; 383 t = btf_type_by_id(btf_vmlinux, btf_id); 384 tname = btf_name_by_offset(btf_vmlinux, t->name_off); 385 return !!strncmp(tname, "bpf_sk_storage", 386 strlen("bpf_sk_storage")); 387 default: 388 return false; 389 } 390 391 return false; 392 } 393 394 /* *gfp_flags* is a hidden argument provided by the verifier */ 395 BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, 396 void *, value, u64, flags, gfp_t, gfp_flags) 397 { 398 WARN_ON_ONCE(!bpf_rcu_lock_held()); 399 if (in_hardirq() || in_nmi()) 400 return (unsigned long)NULL; 401 402 return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags, 403 gfp_flags); 404 } 405 406 BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, 407 struct sock *, sk) 408 { 409 WARN_ON_ONCE(!bpf_rcu_lock_held()); 410 if (in_hardirq() || in_nmi()) 411 return -EPERM; 412 413 return ____bpf_sk_storage_delete(map, sk); 414 } 415 416 const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = { 417 .func = bpf_sk_storage_get_tracing, 418 .gpl_only = false, 419 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 420 .arg1_type = ARG_CONST_MAP_PTR, 421 .arg2_type = ARG_PTR_TO_BTF_ID, 422 .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 423 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 424 .arg4_type = ARG_ANYTHING, 425 .allowed = bpf_sk_storage_tracing_allowed, 426 }; 427 428 const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = { 429 .func = bpf_sk_storage_delete_tracing, 430 .gpl_only = false, 431 .ret_type = RET_INTEGER, 432 .arg1_type = ARG_CONST_MAP_PTR, 433 .arg2_type = ARG_PTR_TO_BTF_ID, 434 .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 435 .allowed = bpf_sk_storage_tracing_allowed, 436 }; 437 438 struct bpf_sk_storage_diag { 439 u32 nr_maps; 440 struct bpf_map *maps[]; 441 }; 442 443 /* The reply will be like: 444 * INET_DIAG_BPF_SK_STORAGES (nla_nest) 445 * SK_DIAG_BPF_STORAGE (nla_nest) 446 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 447 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 448 * SK_DIAG_BPF_STORAGE (nla_nest) 449 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 450 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 451 * .... 452 */ 453 static int nla_value_size(u32 value_size) 454 { 455 /* SK_DIAG_BPF_STORAGE (nla_nest) 456 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 457 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 458 */ 459 return nla_total_size(0) + nla_total_size(sizeof(u32)) + 460 nla_total_size_64bit(value_size); 461 } 462 463 void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag) 464 { 465 u32 i; 466 467 if (!diag) 468 return; 469 470 for (i = 0; i < diag->nr_maps; i++) 471 bpf_map_put(diag->maps[i]); 472 473 kfree(diag); 474 } 475 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free); 476 477 static bool diag_check_dup(const struct bpf_sk_storage_diag *diag, 478 const struct bpf_map *map) 479 { 480 u32 i; 481 482 for (i = 0; i < diag->nr_maps; i++) { 483 if (diag->maps[i] == map) 484 return true; 485 } 486 487 return false; 488 } 489 490 struct bpf_sk_storage_diag * 491 bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) 492 { 493 struct bpf_sk_storage_diag *diag; 494 struct nlattr *nla; 495 u32 nr_maps = 0; 496 int rem, err; 497 498 /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as 499 * the map_alloc_check() side also does. 500 */ 501 if (!bpf_capable()) 502 return ERR_PTR(-EPERM); 503 504 nla_for_each_nested(nla, nla_stgs, rem) { 505 if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) 506 nr_maps++; 507 } 508 509 diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL); 510 if (!diag) 511 return ERR_PTR(-ENOMEM); 512 513 nla_for_each_nested(nla, nla_stgs, rem) { 514 struct bpf_map *map; 515 int map_fd; 516 517 if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD) 518 continue; 519 520 map_fd = nla_get_u32(nla); 521 map = bpf_map_get(map_fd); 522 if (IS_ERR(map)) { 523 err = PTR_ERR(map); 524 goto err_free; 525 } 526 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) { 527 bpf_map_put(map); 528 err = -EINVAL; 529 goto err_free; 530 } 531 if (diag_check_dup(diag, map)) { 532 bpf_map_put(map); 533 err = -EEXIST; 534 goto err_free; 535 } 536 diag->maps[diag->nr_maps++] = map; 537 } 538 539 return diag; 540 541 err_free: 542 bpf_sk_storage_diag_free(diag); 543 return ERR_PTR(err); 544 } 545 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); 546 547 static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) 548 { 549 struct nlattr *nla_stg, *nla_value; 550 struct bpf_local_storage_map *smap; 551 552 /* It cannot exceed max nlattr's payload */ 553 BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE); 554 555 nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE); 556 if (!nla_stg) 557 return -EMSGSIZE; 558 559 smap = rcu_dereference(sdata->smap); 560 if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id)) 561 goto errout; 562 563 nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE, 564 smap->map.value_size, 565 SK_DIAG_BPF_STORAGE_PAD); 566 if (!nla_value) 567 goto errout; 568 569 if (map_value_has_spin_lock(&smap->map)) 570 copy_map_value_locked(&smap->map, nla_data(nla_value), 571 sdata->data, true); 572 else 573 copy_map_value(&smap->map, nla_data(nla_value), sdata->data); 574 575 nla_nest_end(skb, nla_stg); 576 return 0; 577 578 errout: 579 nla_nest_cancel(skb, nla_stg); 580 return -EMSGSIZE; 581 } 582 583 static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb, 584 int stg_array_type, 585 unsigned int *res_diag_size) 586 { 587 /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ 588 unsigned int diag_size = nla_total_size(0); 589 struct bpf_local_storage *sk_storage; 590 struct bpf_local_storage_elem *selem; 591 struct bpf_local_storage_map *smap; 592 struct nlattr *nla_stgs; 593 unsigned int saved_len; 594 int err = 0; 595 596 rcu_read_lock(); 597 598 sk_storage = rcu_dereference(sk->sk_bpf_storage); 599 if (!sk_storage || hlist_empty(&sk_storage->list)) { 600 rcu_read_unlock(); 601 return 0; 602 } 603 604 nla_stgs = nla_nest_start(skb, stg_array_type); 605 if (!nla_stgs) 606 /* Continue to learn diag_size */ 607 err = -EMSGSIZE; 608 609 saved_len = skb->len; 610 hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { 611 smap = rcu_dereference(SDATA(selem)->smap); 612 diag_size += nla_value_size(smap->map.value_size); 613 614 if (nla_stgs && diag_get(SDATA(selem), skb)) 615 /* Continue to learn diag_size */ 616 err = -EMSGSIZE; 617 } 618 619 rcu_read_unlock(); 620 621 if (nla_stgs) { 622 if (saved_len == skb->len) 623 nla_nest_cancel(skb, nla_stgs); 624 else 625 nla_nest_end(skb, nla_stgs); 626 } 627 628 if (diag_size == nla_total_size(0)) { 629 *res_diag_size = 0; 630 return 0; 631 } 632 633 *res_diag_size = diag_size; 634 return err; 635 } 636 637 int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, 638 struct sock *sk, struct sk_buff *skb, 639 int stg_array_type, 640 unsigned int *res_diag_size) 641 { 642 /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ 643 unsigned int diag_size = nla_total_size(0); 644 struct bpf_local_storage *sk_storage; 645 struct bpf_local_storage_data *sdata; 646 struct nlattr *nla_stgs; 647 unsigned int saved_len; 648 int err = 0; 649 u32 i; 650 651 *res_diag_size = 0; 652 653 /* No map has been specified. Dump all. */ 654 if (!diag->nr_maps) 655 return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type, 656 res_diag_size); 657 658 rcu_read_lock(); 659 sk_storage = rcu_dereference(sk->sk_bpf_storage); 660 if (!sk_storage || hlist_empty(&sk_storage->list)) { 661 rcu_read_unlock(); 662 return 0; 663 } 664 665 nla_stgs = nla_nest_start(skb, stg_array_type); 666 if (!nla_stgs) 667 /* Continue to learn diag_size */ 668 err = -EMSGSIZE; 669 670 saved_len = skb->len; 671 for (i = 0; i < diag->nr_maps; i++) { 672 sdata = bpf_local_storage_lookup(sk_storage, 673 (struct bpf_local_storage_map *)diag->maps[i], 674 false); 675 676 if (!sdata) 677 continue; 678 679 diag_size += nla_value_size(diag->maps[i]->value_size); 680 681 if (nla_stgs && diag_get(sdata, skb)) 682 /* Continue to learn diag_size */ 683 err = -EMSGSIZE; 684 } 685 rcu_read_unlock(); 686 687 if (nla_stgs) { 688 if (saved_len == skb->len) 689 nla_nest_cancel(skb, nla_stgs); 690 else 691 nla_nest_end(skb, nla_stgs); 692 } 693 694 if (diag_size == nla_total_size(0)) { 695 *res_diag_size = 0; 696 return 0; 697 } 698 699 *res_diag_size = diag_size; 700 return err; 701 } 702 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put); 703 704 struct bpf_iter_seq_sk_storage_map_info { 705 struct bpf_map *map; 706 unsigned int bucket_id; 707 unsigned skip_elems; 708 }; 709 710 static struct bpf_local_storage_elem * 711 bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, 712 struct bpf_local_storage_elem *prev_selem) 713 __acquires(RCU) __releases(RCU) 714 { 715 struct bpf_local_storage *sk_storage; 716 struct bpf_local_storage_elem *selem; 717 u32 skip_elems = info->skip_elems; 718 struct bpf_local_storage_map *smap; 719 u32 bucket_id = info->bucket_id; 720 u32 i, count, n_buckets; 721 struct bpf_local_storage_map_bucket *b; 722 723 smap = (struct bpf_local_storage_map *)info->map; 724 n_buckets = 1U << smap->bucket_log; 725 if (bucket_id >= n_buckets) 726 return NULL; 727 728 /* try to find next selem in the same bucket */ 729 selem = prev_selem; 730 count = 0; 731 while (selem) { 732 selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)), 733 struct bpf_local_storage_elem, map_node); 734 if (!selem) { 735 /* not found, unlock and go to the next bucket */ 736 b = &smap->buckets[bucket_id++]; 737 rcu_read_unlock(); 738 skip_elems = 0; 739 break; 740 } 741 sk_storage = rcu_dereference(selem->local_storage); 742 if (sk_storage) { 743 info->skip_elems = skip_elems + count; 744 return selem; 745 } 746 count++; 747 } 748 749 for (i = bucket_id; i < (1U << smap->bucket_log); i++) { 750 b = &smap->buckets[i]; 751 rcu_read_lock(); 752 count = 0; 753 hlist_for_each_entry_rcu(selem, &b->list, map_node) { 754 sk_storage = rcu_dereference(selem->local_storage); 755 if (sk_storage && count >= skip_elems) { 756 info->bucket_id = i; 757 info->skip_elems = count; 758 return selem; 759 } 760 count++; 761 } 762 rcu_read_unlock(); 763 skip_elems = 0; 764 } 765 766 info->bucket_id = i; 767 info->skip_elems = 0; 768 return NULL; 769 } 770 771 static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos) 772 { 773 struct bpf_local_storage_elem *selem; 774 775 selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL); 776 if (!selem) 777 return NULL; 778 779 if (*pos == 0) 780 ++*pos; 781 return selem; 782 } 783 784 static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v, 785 loff_t *pos) 786 { 787 struct bpf_iter_seq_sk_storage_map_info *info = seq->private; 788 789 ++*pos; 790 ++info->skip_elems; 791 return bpf_sk_storage_map_seq_find_next(seq->private, v); 792 } 793 794 struct bpf_iter__bpf_sk_storage_map { 795 __bpf_md_ptr(struct bpf_iter_meta *, meta); 796 __bpf_md_ptr(struct bpf_map *, map); 797 __bpf_md_ptr(struct sock *, sk); 798 __bpf_md_ptr(void *, value); 799 }; 800 801 DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta, 802 struct bpf_map *map, struct sock *sk, 803 void *value) 804 805 static int __bpf_sk_storage_map_seq_show(struct seq_file *seq, 806 struct bpf_local_storage_elem *selem) 807 { 808 struct bpf_iter_seq_sk_storage_map_info *info = seq->private; 809 struct bpf_iter__bpf_sk_storage_map ctx = {}; 810 struct bpf_local_storage *sk_storage; 811 struct bpf_iter_meta meta; 812 struct bpf_prog *prog; 813 int ret = 0; 814 815 meta.seq = seq; 816 prog = bpf_iter_get_info(&meta, selem == NULL); 817 if (prog) { 818 ctx.meta = &meta; 819 ctx.map = info->map; 820 if (selem) { 821 sk_storage = rcu_dereference(selem->local_storage); 822 ctx.sk = sk_storage->owner; 823 ctx.value = SDATA(selem)->data; 824 } 825 ret = bpf_iter_run_prog(prog, &ctx); 826 } 827 828 return ret; 829 } 830 831 static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v) 832 { 833 return __bpf_sk_storage_map_seq_show(seq, v); 834 } 835 836 static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v) 837 __releases(RCU) 838 { 839 if (!v) 840 (void)__bpf_sk_storage_map_seq_show(seq, v); 841 else 842 rcu_read_unlock(); 843 } 844 845 static int bpf_iter_init_sk_storage_map(void *priv_data, 846 struct bpf_iter_aux_info *aux) 847 { 848 struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; 849 850 bpf_map_inc_with_uref(aux->map); 851 seq_info->map = aux->map; 852 return 0; 853 } 854 855 static void bpf_iter_fini_sk_storage_map(void *priv_data) 856 { 857 struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; 858 859 bpf_map_put_with_uref(seq_info->map); 860 } 861 862 static int bpf_iter_attach_map(struct bpf_prog *prog, 863 union bpf_iter_link_info *linfo, 864 struct bpf_iter_aux_info *aux) 865 { 866 struct bpf_map *map; 867 int err = -EINVAL; 868 869 if (!linfo->map.map_fd) 870 return -EBADF; 871 872 map = bpf_map_get_with_uref(linfo->map.map_fd); 873 if (IS_ERR(map)) 874 return PTR_ERR(map); 875 876 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) 877 goto put_map; 878 879 if (prog->aux->max_rdwr_access > map->value_size) { 880 err = -EACCES; 881 goto put_map; 882 } 883 884 aux->map = map; 885 return 0; 886 887 put_map: 888 bpf_map_put_with_uref(map); 889 return err; 890 } 891 892 static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) 893 { 894 bpf_map_put_with_uref(aux->map); 895 } 896 897 static const struct seq_operations bpf_sk_storage_map_seq_ops = { 898 .start = bpf_sk_storage_map_seq_start, 899 .next = bpf_sk_storage_map_seq_next, 900 .stop = bpf_sk_storage_map_seq_stop, 901 .show = bpf_sk_storage_map_seq_show, 902 }; 903 904 static const struct bpf_iter_seq_info iter_seq_info = { 905 .seq_ops = &bpf_sk_storage_map_seq_ops, 906 .init_seq_private = bpf_iter_init_sk_storage_map, 907 .fini_seq_private = bpf_iter_fini_sk_storage_map, 908 .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info), 909 }; 910 911 static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { 912 .target = "bpf_sk_storage_map", 913 .attach_target = bpf_iter_attach_map, 914 .detach_target = bpf_iter_detach_map, 915 .show_fdinfo = bpf_iter_map_show_fdinfo, 916 .fill_link_info = bpf_iter_map_fill_link_info, 917 .ctx_arg_info_size = 2, 918 .ctx_arg_info = { 919 { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), 920 PTR_TO_BTF_ID_OR_NULL }, 921 { offsetof(struct bpf_iter__bpf_sk_storage_map, value), 922 PTR_TO_BUF | PTR_MAYBE_NULL }, 923 }, 924 .seq_info = &iter_seq_info, 925 }; 926 927 static int __init bpf_sk_storage_map_iter_init(void) 928 { 929 bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id = 930 btf_sock_ids[BTF_SOCK_TYPE_SOCK]; 931 return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info); 932 } 933 late_initcall(bpf_sk_storage_map_iter_init); 934