1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * Copyright (c) 2016 Facebook 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 */ 13 #include <linux/bpf.h> 14 #include <linux/jhash.h> 15 #include <linux/filter.h> 16 #include <linux/vmalloc.h> 17 #include "percpu_freelist.h" 18 19 struct bucket { 20 struct hlist_head head; 21 raw_spinlock_t lock; 22 }; 23 24 struct bpf_htab { 25 struct bpf_map map; 26 struct bucket *buckets; 27 void *elems; 28 struct pcpu_freelist freelist; 29 void __percpu *extra_elems; 30 atomic_t count; /* number of elements in this hashtable */ 31 u32 n_buckets; /* number of hash buckets */ 32 u32 elem_size; /* size of each element in bytes */ 33 }; 34 35 enum extra_elem_state { 36 HTAB_NOT_AN_EXTRA_ELEM = 0, 37 HTAB_EXTRA_ELEM_FREE, 38 HTAB_EXTRA_ELEM_USED 39 }; 40 41 /* each htab element is struct htab_elem + key + value */ 42 struct htab_elem { 43 union { 44 struct hlist_node hash_node; 45 struct bpf_htab *htab; 46 struct pcpu_freelist_node fnode; 47 }; 48 union { 49 struct rcu_head rcu; 50 enum extra_elem_state state; 51 }; 52 u32 hash; 53 char key[0] __aligned(8); 54 }; 55 56 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, 57 void __percpu *pptr) 58 { 59 *(void __percpu **)(l->key + key_size) = pptr; 60 } 61 62 static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size) 63 { 64 return *(void __percpu **)(l->key + key_size); 65 } 66 67 static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i) 68 { 69 return (struct htab_elem *) (htab->elems + i * htab->elem_size); 70 } 71 72 static void htab_free_elems(struct bpf_htab *htab) 73 { 74 int i; 75 76 if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH) 77 goto free_elems; 78 79 for (i = 0; i < htab->map.max_entries; i++) { 80 void __percpu *pptr; 81 82 pptr = htab_elem_get_ptr(get_htab_elem(htab, i), 83 htab->map.key_size); 84 free_percpu(pptr); 85 } 86 free_elems: 87 vfree(htab->elems); 88 } 89 90 static int prealloc_elems_and_freelist(struct bpf_htab *htab) 91 { 92 int err = -ENOMEM, i; 93 94 htab->elems = vzalloc(htab->elem_size * htab->map.max_entries); 95 if (!htab->elems) 96 return -ENOMEM; 97 98 if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH) 99 goto skip_percpu_elems; 100 101 for (i = 0; i < htab->map.max_entries; i++) { 102 u32 size = round_up(htab->map.value_size, 8); 103 void __percpu *pptr; 104 105 pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN); 106 if (!pptr) 107 goto free_elems; 108 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, 109 pptr); 110 } 111 112 skip_percpu_elems: 113 err = pcpu_freelist_init(&htab->freelist); 114 if (err) 115 goto free_elems; 116 117 pcpu_freelist_populate(&htab->freelist, htab->elems, htab->elem_size, 118 htab->map.max_entries); 119 return 0; 120 121 free_elems: 122 htab_free_elems(htab); 123 return err; 124 } 125 126 static int alloc_extra_elems(struct bpf_htab *htab) 127 { 128 void __percpu *pptr; 129 int cpu; 130 131 pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN); 132 if (!pptr) 133 return -ENOMEM; 134 135 for_each_possible_cpu(cpu) { 136 ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state = 137 HTAB_EXTRA_ELEM_FREE; 138 } 139 htab->extra_elems = pptr; 140 return 0; 141 } 142 143 /* Called from syscall */ 144 static struct bpf_map *htab_map_alloc(union bpf_attr *attr) 145 { 146 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_HASH; 147 struct bpf_htab *htab; 148 int err, i; 149 u64 cost; 150 151 if (attr->map_flags & ~BPF_F_NO_PREALLOC) 152 /* reserved bits should not be used */ 153 return ERR_PTR(-EINVAL); 154 155 htab = kzalloc(sizeof(*htab), GFP_USER); 156 if (!htab) 157 return ERR_PTR(-ENOMEM); 158 159 /* mandatory map attributes */ 160 htab->map.map_type = attr->map_type; 161 htab->map.key_size = attr->key_size; 162 htab->map.value_size = attr->value_size; 163 htab->map.max_entries = attr->max_entries; 164 htab->map.map_flags = attr->map_flags; 165 166 /* check sanity of attributes. 167 * value_size == 0 may be allowed in the future to use map as a set 168 */ 169 err = -EINVAL; 170 if (htab->map.max_entries == 0 || htab->map.key_size == 0 || 171 htab->map.value_size == 0) 172 goto free_htab; 173 174 /* hash table size must be power of 2 */ 175 htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); 176 177 err = -E2BIG; 178 if (htab->map.key_size > MAX_BPF_STACK) 179 /* eBPF programs initialize keys on stack, so they cannot be 180 * larger than max stack size 181 */ 182 goto free_htab; 183 184 if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) - 185 MAX_BPF_STACK - sizeof(struct htab_elem)) 186 /* if value_size is bigger, the user space won't be able to 187 * access the elements via bpf syscall. This check also makes 188 * sure that the elem_size doesn't overflow and it's 189 * kmalloc-able later in htab_map_update_elem() 190 */ 191 goto free_htab; 192 193 if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE) 194 /* make sure the size for pcpu_alloc() is reasonable */ 195 goto free_htab; 196 197 htab->elem_size = sizeof(struct htab_elem) + 198 round_up(htab->map.key_size, 8); 199 if (percpu) 200 htab->elem_size += sizeof(void *); 201 else 202 htab->elem_size += round_up(htab->map.value_size, 8); 203 204 /* prevent zero size kmalloc and check for u32 overflow */ 205 if (htab->n_buckets == 0 || 206 htab->n_buckets > U32_MAX / sizeof(struct bucket)) 207 goto free_htab; 208 209 cost = (u64) htab->n_buckets * sizeof(struct bucket) + 210 (u64) htab->elem_size * htab->map.max_entries; 211 212 if (percpu) 213 cost += (u64) round_up(htab->map.value_size, 8) * 214 num_possible_cpus() * htab->map.max_entries; 215 else 216 cost += (u64) htab->elem_size * num_possible_cpus(); 217 218 if (cost >= U32_MAX - PAGE_SIZE) 219 /* make sure page count doesn't overflow */ 220 goto free_htab; 221 222 htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 223 224 /* if map size is larger than memlock limit, reject it early */ 225 err = bpf_map_precharge_memlock(htab->map.pages); 226 if (err) 227 goto free_htab; 228 229 err = -ENOMEM; 230 htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct bucket), 231 GFP_USER | __GFP_NOWARN); 232 233 if (!htab->buckets) { 234 htab->buckets = vmalloc(htab->n_buckets * sizeof(struct bucket)); 235 if (!htab->buckets) 236 goto free_htab; 237 } 238 239 for (i = 0; i < htab->n_buckets; i++) { 240 INIT_HLIST_HEAD(&htab->buckets[i].head); 241 raw_spin_lock_init(&htab->buckets[i].lock); 242 } 243 244 if (!percpu) { 245 err = alloc_extra_elems(htab); 246 if (err) 247 goto free_buckets; 248 } 249 250 if (!(attr->map_flags & BPF_F_NO_PREALLOC)) { 251 err = prealloc_elems_and_freelist(htab); 252 if (err) 253 goto free_extra_elems; 254 } 255 256 return &htab->map; 257 258 free_extra_elems: 259 free_percpu(htab->extra_elems); 260 free_buckets: 261 kvfree(htab->buckets); 262 free_htab: 263 kfree(htab); 264 return ERR_PTR(err); 265 } 266 267 static inline u32 htab_map_hash(const void *key, u32 key_len) 268 { 269 return jhash(key, key_len, 0); 270 } 271 272 static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) 273 { 274 return &htab->buckets[hash & (htab->n_buckets - 1)]; 275 } 276 277 static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) 278 { 279 return &__select_bucket(htab, hash)->head; 280 } 281 282 static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash, 283 void *key, u32 key_size) 284 { 285 struct htab_elem *l; 286 287 hlist_for_each_entry_rcu(l, head, hash_node) 288 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 289 return l; 290 291 return NULL; 292 } 293 294 /* Called from syscall or from eBPF program */ 295 static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) 296 { 297 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 298 struct hlist_head *head; 299 struct htab_elem *l; 300 u32 hash, key_size; 301 302 /* Must be called with rcu_read_lock. */ 303 WARN_ON_ONCE(!rcu_read_lock_held()); 304 305 key_size = map->key_size; 306 307 hash = htab_map_hash(key, key_size); 308 309 head = select_bucket(htab, hash); 310 311 l = lookup_elem_raw(head, hash, key, key_size); 312 313 return l; 314 } 315 316 static void *htab_map_lookup_elem(struct bpf_map *map, void *key) 317 { 318 struct htab_elem *l = __htab_map_lookup_elem(map, key); 319 320 if (l) 321 return l->key + round_up(map->key_size, 8); 322 323 return NULL; 324 } 325 326 /* Called from syscall */ 327 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 328 { 329 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 330 struct hlist_head *head; 331 struct htab_elem *l, *next_l; 332 u32 hash, key_size; 333 int i; 334 335 WARN_ON_ONCE(!rcu_read_lock_held()); 336 337 key_size = map->key_size; 338 339 hash = htab_map_hash(key, key_size); 340 341 head = select_bucket(htab, hash); 342 343 /* lookup the key */ 344 l = lookup_elem_raw(head, hash, key, key_size); 345 346 if (!l) { 347 i = 0; 348 goto find_first_elem; 349 } 350 351 /* key was found, get next key in the same bucket */ 352 next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)), 353 struct htab_elem, hash_node); 354 355 if (next_l) { 356 /* if next elem in this hash list is non-zero, just return it */ 357 memcpy(next_key, next_l->key, key_size); 358 return 0; 359 } 360 361 /* no more elements in this hash list, go to the next bucket */ 362 i = hash & (htab->n_buckets - 1); 363 i++; 364 365 find_first_elem: 366 /* iterate over buckets */ 367 for (; i < htab->n_buckets; i++) { 368 head = select_bucket(htab, i); 369 370 /* pick first element in the bucket */ 371 next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), 372 struct htab_elem, hash_node); 373 if (next_l) { 374 /* if it's not empty, just return it */ 375 memcpy(next_key, next_l->key, key_size); 376 return 0; 377 } 378 } 379 380 /* iterated over all buckets and all elements */ 381 return -ENOENT; 382 } 383 384 static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) 385 { 386 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) 387 free_percpu(htab_elem_get_ptr(l, htab->map.key_size)); 388 kfree(l); 389 } 390 391 static void htab_elem_free_rcu(struct rcu_head *head) 392 { 393 struct htab_elem *l = container_of(head, struct htab_elem, rcu); 394 struct bpf_htab *htab = l->htab; 395 396 /* must increment bpf_prog_active to avoid kprobe+bpf triggering while 397 * we're calling kfree, otherwise deadlock is possible if kprobes 398 * are placed somewhere inside of slub 399 */ 400 preempt_disable(); 401 __this_cpu_inc(bpf_prog_active); 402 htab_elem_free(htab, l); 403 __this_cpu_dec(bpf_prog_active); 404 preempt_enable(); 405 } 406 407 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) 408 { 409 if (l->state == HTAB_EXTRA_ELEM_USED) { 410 l->state = HTAB_EXTRA_ELEM_FREE; 411 return; 412 } 413 414 if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) { 415 pcpu_freelist_push(&htab->freelist, &l->fnode); 416 } else { 417 atomic_dec(&htab->count); 418 l->htab = htab; 419 call_rcu(&l->rcu, htab_elem_free_rcu); 420 } 421 } 422 423 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, 424 void *value, u32 key_size, u32 hash, 425 bool percpu, bool onallcpus, 426 bool old_elem_exists) 427 { 428 u32 size = htab->map.value_size; 429 bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC); 430 struct htab_elem *l_new; 431 void __percpu *pptr; 432 int err = 0; 433 434 if (prealloc) { 435 l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist); 436 if (!l_new) 437 err = -E2BIG; 438 } else { 439 if (atomic_inc_return(&htab->count) > htab->map.max_entries) { 440 atomic_dec(&htab->count); 441 err = -E2BIG; 442 } else { 443 l_new = kmalloc(htab->elem_size, 444 GFP_ATOMIC | __GFP_NOWARN); 445 if (!l_new) 446 return ERR_PTR(-ENOMEM); 447 } 448 } 449 450 if (err) { 451 if (!old_elem_exists) 452 return ERR_PTR(err); 453 454 /* if we're updating the existing element and the hash table 455 * is full, use per-cpu extra elems 456 */ 457 l_new = this_cpu_ptr(htab->extra_elems); 458 if (l_new->state != HTAB_EXTRA_ELEM_FREE) 459 return ERR_PTR(-E2BIG); 460 l_new->state = HTAB_EXTRA_ELEM_USED; 461 } else { 462 l_new->state = HTAB_NOT_AN_EXTRA_ELEM; 463 } 464 465 memcpy(l_new->key, key, key_size); 466 if (percpu) { 467 /* round up value_size to 8 bytes */ 468 size = round_up(size, 8); 469 470 if (prealloc) { 471 pptr = htab_elem_get_ptr(l_new, key_size); 472 } else { 473 /* alloc_percpu zero-fills */ 474 pptr = __alloc_percpu_gfp(size, 8, 475 GFP_ATOMIC | __GFP_NOWARN); 476 if (!pptr) { 477 kfree(l_new); 478 return ERR_PTR(-ENOMEM); 479 } 480 } 481 482 if (!onallcpus) { 483 /* copy true value_size bytes */ 484 memcpy(this_cpu_ptr(pptr), value, htab->map.value_size); 485 } else { 486 int off = 0, cpu; 487 488 for_each_possible_cpu(cpu) { 489 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), 490 value + off, size); 491 off += size; 492 } 493 } 494 if (!prealloc) 495 htab_elem_set_ptr(l_new, key_size, pptr); 496 } else { 497 memcpy(l_new->key + round_up(key_size, 8), value, size); 498 } 499 500 l_new->hash = hash; 501 return l_new; 502 } 503 504 static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, 505 u64 map_flags) 506 { 507 if (l_old && map_flags == BPF_NOEXIST) 508 /* elem already exists */ 509 return -EEXIST; 510 511 if (!l_old && map_flags == BPF_EXIST) 512 /* elem doesn't exist, cannot update it */ 513 return -ENOENT; 514 515 return 0; 516 } 517 518 /* Called from syscall or from eBPF program */ 519 static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, 520 u64 map_flags) 521 { 522 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 523 struct htab_elem *l_new = NULL, *l_old; 524 struct hlist_head *head; 525 unsigned long flags; 526 struct bucket *b; 527 u32 key_size, hash; 528 int ret; 529 530 if (unlikely(map_flags > BPF_EXIST)) 531 /* unknown flags */ 532 return -EINVAL; 533 534 WARN_ON_ONCE(!rcu_read_lock_held()); 535 536 key_size = map->key_size; 537 538 hash = htab_map_hash(key, key_size); 539 540 b = __select_bucket(htab, hash); 541 head = &b->head; 542 543 /* bpf_map_update_elem() can be called in_irq() */ 544 raw_spin_lock_irqsave(&b->lock, flags); 545 546 l_old = lookup_elem_raw(head, hash, key, key_size); 547 548 ret = check_flags(htab, l_old, map_flags); 549 if (ret) 550 goto err; 551 552 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, 553 !!l_old); 554 if (IS_ERR(l_new)) { 555 /* all pre-allocated elements are in use or memory exhausted */ 556 ret = PTR_ERR(l_new); 557 goto err; 558 } 559 560 /* add new element to the head of the list, so that 561 * concurrent search will find it before old elem 562 */ 563 hlist_add_head_rcu(&l_new->hash_node, head); 564 if (l_old) { 565 hlist_del_rcu(&l_old->hash_node); 566 free_htab_elem(htab, l_old); 567 } 568 ret = 0; 569 err: 570 raw_spin_unlock_irqrestore(&b->lock, flags); 571 return ret; 572 } 573 574 static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, 575 void *value, u64 map_flags, 576 bool onallcpus) 577 { 578 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 579 struct htab_elem *l_new = NULL, *l_old; 580 struct hlist_head *head; 581 unsigned long flags; 582 struct bucket *b; 583 u32 key_size, hash; 584 int ret; 585 586 if (unlikely(map_flags > BPF_EXIST)) 587 /* unknown flags */ 588 return -EINVAL; 589 590 WARN_ON_ONCE(!rcu_read_lock_held()); 591 592 key_size = map->key_size; 593 594 hash = htab_map_hash(key, key_size); 595 596 b = __select_bucket(htab, hash); 597 head = &b->head; 598 599 /* bpf_map_update_elem() can be called in_irq() */ 600 raw_spin_lock_irqsave(&b->lock, flags); 601 602 l_old = lookup_elem_raw(head, hash, key, key_size); 603 604 ret = check_flags(htab, l_old, map_flags); 605 if (ret) 606 goto err; 607 608 if (l_old) { 609 void __percpu *pptr = htab_elem_get_ptr(l_old, key_size); 610 u32 size = htab->map.value_size; 611 612 /* per-cpu hash map can update value in-place */ 613 if (!onallcpus) { 614 memcpy(this_cpu_ptr(pptr), value, size); 615 } else { 616 int off = 0, cpu; 617 618 size = round_up(size, 8); 619 for_each_possible_cpu(cpu) { 620 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), 621 value + off, size); 622 off += size; 623 } 624 } 625 } else { 626 l_new = alloc_htab_elem(htab, key, value, key_size, 627 hash, true, onallcpus, false); 628 if (IS_ERR(l_new)) { 629 ret = PTR_ERR(l_new); 630 goto err; 631 } 632 hlist_add_head_rcu(&l_new->hash_node, head); 633 } 634 ret = 0; 635 err: 636 raw_spin_unlock_irqrestore(&b->lock, flags); 637 return ret; 638 } 639 640 static int htab_percpu_map_update_elem(struct bpf_map *map, void *key, 641 void *value, u64 map_flags) 642 { 643 return __htab_percpu_map_update_elem(map, key, value, map_flags, false); 644 } 645 646 /* Called from syscall or from eBPF program */ 647 static int htab_map_delete_elem(struct bpf_map *map, void *key) 648 { 649 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 650 struct hlist_head *head; 651 struct bucket *b; 652 struct htab_elem *l; 653 unsigned long flags; 654 u32 hash, key_size; 655 int ret = -ENOENT; 656 657 WARN_ON_ONCE(!rcu_read_lock_held()); 658 659 key_size = map->key_size; 660 661 hash = htab_map_hash(key, key_size); 662 b = __select_bucket(htab, hash); 663 head = &b->head; 664 665 raw_spin_lock_irqsave(&b->lock, flags); 666 667 l = lookup_elem_raw(head, hash, key, key_size); 668 669 if (l) { 670 hlist_del_rcu(&l->hash_node); 671 free_htab_elem(htab, l); 672 ret = 0; 673 } 674 675 raw_spin_unlock_irqrestore(&b->lock, flags); 676 return ret; 677 } 678 679 static void delete_all_elements(struct bpf_htab *htab) 680 { 681 int i; 682 683 for (i = 0; i < htab->n_buckets; i++) { 684 struct hlist_head *head = select_bucket(htab, i); 685 struct hlist_node *n; 686 struct htab_elem *l; 687 688 hlist_for_each_entry_safe(l, n, head, hash_node) { 689 hlist_del_rcu(&l->hash_node); 690 htab_elem_free(htab, l); 691 } 692 } 693 } 694 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 695 static void htab_map_free(struct bpf_map *map) 696 { 697 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 698 699 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, 700 * so the programs (can be more than one that used this map) were 701 * disconnected from events. Wait for outstanding critical sections in 702 * these programs to complete 703 */ 704 synchronize_rcu(); 705 706 /* some of free_htab_elem() callbacks for elements of this map may 707 * not have executed. Wait for them. 708 */ 709 rcu_barrier(); 710 if (htab->map.map_flags & BPF_F_NO_PREALLOC) { 711 delete_all_elements(htab); 712 } else { 713 htab_free_elems(htab); 714 pcpu_freelist_destroy(&htab->freelist); 715 } 716 free_percpu(htab->extra_elems); 717 kvfree(htab->buckets); 718 kfree(htab); 719 } 720 721 static const struct bpf_map_ops htab_ops = { 722 .map_alloc = htab_map_alloc, 723 .map_free = htab_map_free, 724 .map_get_next_key = htab_map_get_next_key, 725 .map_lookup_elem = htab_map_lookup_elem, 726 .map_update_elem = htab_map_update_elem, 727 .map_delete_elem = htab_map_delete_elem, 728 }; 729 730 static struct bpf_map_type_list htab_type __read_mostly = { 731 .ops = &htab_ops, 732 .type = BPF_MAP_TYPE_HASH, 733 }; 734 735 /* Called from eBPF program */ 736 static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) 737 { 738 struct htab_elem *l = __htab_map_lookup_elem(map, key); 739 740 if (l) 741 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); 742 else 743 return NULL; 744 } 745 746 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) 747 { 748 struct htab_elem *l; 749 void __percpu *pptr; 750 int ret = -ENOENT; 751 int cpu, off = 0; 752 u32 size; 753 754 /* per_cpu areas are zero-filled and bpf programs can only 755 * access 'value_size' of them, so copying rounded areas 756 * will not leak any kernel data 757 */ 758 size = round_up(map->value_size, 8); 759 rcu_read_lock(); 760 l = __htab_map_lookup_elem(map, key); 761 if (!l) 762 goto out; 763 pptr = htab_elem_get_ptr(l, map->key_size); 764 for_each_possible_cpu(cpu) { 765 bpf_long_memcpy(value + off, 766 per_cpu_ptr(pptr, cpu), size); 767 off += size; 768 } 769 ret = 0; 770 out: 771 rcu_read_unlock(); 772 return ret; 773 } 774 775 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, 776 u64 map_flags) 777 { 778 int ret; 779 780 rcu_read_lock(); 781 ret = __htab_percpu_map_update_elem(map, key, value, map_flags, true); 782 rcu_read_unlock(); 783 784 return ret; 785 } 786 787 static const struct bpf_map_ops htab_percpu_ops = { 788 .map_alloc = htab_map_alloc, 789 .map_free = htab_map_free, 790 .map_get_next_key = htab_map_get_next_key, 791 .map_lookup_elem = htab_percpu_map_lookup_elem, 792 .map_update_elem = htab_percpu_map_update_elem, 793 .map_delete_elem = htab_map_delete_elem, 794 }; 795 796 static struct bpf_map_type_list htab_percpu_type __read_mostly = { 797 .ops = &htab_percpu_ops, 798 .type = BPF_MAP_TYPE_PERCPU_HASH, 799 }; 800 801 static int __init register_htab_map(void) 802 { 803 bpf_register_map_type(&htab_type); 804 bpf_register_map_type(&htab_percpu_type); 805 return 0; 806 } 807 late_initcall(register_htab_map); 808