1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3 * Copyright (c) 2016,2017 Facebook
4 */
5 #include <linux/bpf.h>
6 #include <linux/btf.h>
7 #include <linux/err.h>
8 #include <linux/slab.h>
9 #include <linux/mm.h>
10 #include <linux/filter.h>
11 #include <linux/perf_event.h>
12 #include <uapi/linux/btf.h>
13 #include <linux/rcupdate_trace.h>
14 #include <linux/btf_ids.h>
15
16 #include "map_in_map.h"
17
18 #define ARRAY_CREATE_FLAG_MASK \
19 (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
20 BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP)
21
bpf_array_free_percpu(struct bpf_array * array)22 static void bpf_array_free_percpu(struct bpf_array *array)
23 {
24 int i;
25
26 for (i = 0; i < array->map.max_entries; i++) {
27 free_percpu(array->pptrs[i]);
28 cond_resched();
29 }
30 }
31
bpf_array_alloc_percpu(struct bpf_array * array)32 static int bpf_array_alloc_percpu(struct bpf_array *array)
33 {
34 void __percpu *ptr;
35 int i;
36
37 for (i = 0; i < array->map.max_entries; i++) {
38 ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8,
39 GFP_USER | __GFP_NOWARN);
40 if (!ptr) {
41 bpf_array_free_percpu(array);
42 return -ENOMEM;
43 }
44 array->pptrs[i] = ptr;
45 cond_resched();
46 }
47
48 return 0;
49 }
50
51 /* Called from syscall */
array_map_alloc_check(union bpf_attr * attr)52 int array_map_alloc_check(union bpf_attr *attr)
53 {
54 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
55 int numa_node = bpf_map_attr_numa_node(attr);
56
57 /* check sanity of attributes */
58 if (attr->max_entries == 0 || attr->key_size != 4 ||
59 attr->value_size == 0 ||
60 attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
61 !bpf_map_flags_access_ok(attr->map_flags) ||
62 (percpu && numa_node != NUMA_NO_NODE))
63 return -EINVAL;
64
65 if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
66 attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP))
67 return -EINVAL;
68
69 if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
70 attr->map_flags & BPF_F_PRESERVE_ELEMS)
71 return -EINVAL;
72
73 /* avoid overflow on round_up(map->value_size) */
74 if (attr->value_size > INT_MAX)
75 return -E2BIG;
76 /* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */
77 if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE)
78 return -E2BIG;
79
80 return 0;
81 }
82
array_map_alloc(union bpf_attr * attr)83 static struct bpf_map *array_map_alloc(union bpf_attr *attr)
84 {
85 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
86 int numa_node = bpf_map_attr_numa_node(attr);
87 u32 elem_size, index_mask, max_entries;
88 bool bypass_spec_v1 = bpf_bypass_spec_v1();
89 u64 array_size, mask64;
90 struct bpf_array *array;
91
92 elem_size = round_up(attr->value_size, 8);
93
94 max_entries = attr->max_entries;
95
96 /* On 32 bit archs roundup_pow_of_two() with max_entries that has
97 * upper most bit set in u32 space is undefined behavior due to
98 * resulting 1U << 32, so do it manually here in u64 space.
99 */
100 mask64 = fls_long(max_entries - 1);
101 mask64 = 1ULL << mask64;
102 mask64 -= 1;
103
104 index_mask = mask64;
105 if (!bypass_spec_v1) {
106 /* round up array size to nearest power of 2,
107 * since cpu will speculate within index_mask limits
108 */
109 max_entries = index_mask + 1;
110 /* Check for overflows. */
111 if (max_entries < attr->max_entries)
112 return ERR_PTR(-E2BIG);
113 }
114
115 array_size = sizeof(*array);
116 if (percpu) {
117 array_size += (u64) max_entries * sizeof(void *);
118 } else {
119 /* rely on vmalloc() to return page-aligned memory and
120 * ensure array->value is exactly page-aligned
121 */
122 if (attr->map_flags & BPF_F_MMAPABLE) {
123 array_size = PAGE_ALIGN(array_size);
124 array_size += PAGE_ALIGN((u64) max_entries * elem_size);
125 } else {
126 array_size += (u64) max_entries * elem_size;
127 }
128 }
129
130 /* allocate all map elements and zero-initialize them */
131 if (attr->map_flags & BPF_F_MMAPABLE) {
132 void *data;
133
134 /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */
135 data = bpf_map_area_mmapable_alloc(array_size, numa_node);
136 if (!data)
137 return ERR_PTR(-ENOMEM);
138 array = data + PAGE_ALIGN(sizeof(struct bpf_array))
139 - offsetof(struct bpf_array, value);
140 } else {
141 array = bpf_map_area_alloc(array_size, numa_node);
142 }
143 if (!array)
144 return ERR_PTR(-ENOMEM);
145 array->index_mask = index_mask;
146 array->map.bypass_spec_v1 = bypass_spec_v1;
147
148 /* copy mandatory map attributes */
149 bpf_map_init_from_attr(&array->map, attr);
150 array->elem_size = elem_size;
151
152 if (percpu && bpf_array_alloc_percpu(array)) {
153 bpf_map_area_free(array);
154 return ERR_PTR(-ENOMEM);
155 }
156
157 return &array->map;
158 }
159
array_map_elem_ptr(struct bpf_array * array,u32 index)160 static void *array_map_elem_ptr(struct bpf_array* array, u32 index)
161 {
162 return array->value + (u64)array->elem_size * index;
163 }
164
165 /* Called from syscall or from eBPF program */
array_map_lookup_elem(struct bpf_map * map,void * key)166 static void *array_map_lookup_elem(struct bpf_map *map, void *key)
167 {
168 struct bpf_array *array = container_of(map, struct bpf_array, map);
169 u32 index = *(u32 *)key;
170
171 if (unlikely(index >= array->map.max_entries))
172 return NULL;
173
174 return array->value + (u64)array->elem_size * (index & array->index_mask);
175 }
176
array_map_direct_value_addr(const struct bpf_map * map,u64 * imm,u32 off)177 static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
178 u32 off)
179 {
180 struct bpf_array *array = container_of(map, struct bpf_array, map);
181
182 if (map->max_entries != 1)
183 return -ENOTSUPP;
184 if (off >= map->value_size)
185 return -EINVAL;
186
187 *imm = (unsigned long)array->value;
188 return 0;
189 }
190
array_map_direct_value_meta(const struct bpf_map * map,u64 imm,u32 * off)191 static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
192 u32 *off)
193 {
194 struct bpf_array *array = container_of(map, struct bpf_array, map);
195 u64 base = (unsigned long)array->value;
196 u64 range = array->elem_size;
197
198 if (map->max_entries != 1)
199 return -ENOTSUPP;
200 if (imm < base || imm >= base + range)
201 return -ENOENT;
202
203 *off = imm - base;
204 return 0;
205 }
206
207 /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
array_map_gen_lookup(struct bpf_map * map,struct bpf_insn * insn_buf)208 static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
209 {
210 struct bpf_array *array = container_of(map, struct bpf_array, map);
211 struct bpf_insn *insn = insn_buf;
212 u32 elem_size = array->elem_size;
213 const int ret = BPF_REG_0;
214 const int map_ptr = BPF_REG_1;
215 const int index = BPF_REG_2;
216
217 if (map->map_flags & BPF_F_INNER_MAP)
218 return -EOPNOTSUPP;
219
220 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
221 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
222 if (!map->bypass_spec_v1) {
223 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
224 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
225 } else {
226 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
227 }
228
229 if (is_power_of_2(elem_size)) {
230 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
231 } else {
232 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
233 }
234 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
235 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
236 *insn++ = BPF_MOV64_IMM(ret, 0);
237 return insn - insn_buf;
238 }
239
240 /* Called from eBPF program */
percpu_array_map_lookup_elem(struct bpf_map * map,void * key)241 static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
242 {
243 struct bpf_array *array = container_of(map, struct bpf_array, map);
244 u32 index = *(u32 *)key;
245
246 if (unlikely(index >= array->map.max_entries))
247 return NULL;
248
249 return this_cpu_ptr(array->pptrs[index & array->index_mask]);
250 }
251
percpu_array_map_lookup_percpu_elem(struct bpf_map * map,void * key,u32 cpu)252 static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
253 {
254 struct bpf_array *array = container_of(map, struct bpf_array, map);
255 u32 index = *(u32 *)key;
256
257 if (cpu >= nr_cpu_ids)
258 return NULL;
259
260 if (unlikely(index >= array->map.max_entries))
261 return NULL;
262
263 return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu);
264 }
265
bpf_percpu_array_copy(struct bpf_map * map,void * key,void * value)266 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
267 {
268 struct bpf_array *array = container_of(map, struct bpf_array, map);
269 u32 index = *(u32 *)key;
270 void __percpu *pptr;
271 int cpu, off = 0;
272 u32 size;
273
274 if (unlikely(index >= array->map.max_entries))
275 return -ENOENT;
276
277 /* per_cpu areas are zero-filled and bpf programs can only
278 * access 'value_size' of them, so copying rounded areas
279 * will not leak any kernel data
280 */
281 size = array->elem_size;
282 rcu_read_lock();
283 pptr = array->pptrs[index & array->index_mask];
284 for_each_possible_cpu(cpu) {
285 copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
286 check_and_init_map_value(map, value + off);
287 off += size;
288 }
289 rcu_read_unlock();
290 return 0;
291 }
292
293 /* Called from syscall */
array_map_get_next_key(struct bpf_map * map,void * key,void * next_key)294 static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
295 {
296 struct bpf_array *array = container_of(map, struct bpf_array, map);
297 u32 index = key ? *(u32 *)key : U32_MAX;
298 u32 *next = (u32 *)next_key;
299
300 if (index >= array->map.max_entries) {
301 *next = 0;
302 return 0;
303 }
304
305 if (index == array->map.max_entries - 1)
306 return -ENOENT;
307
308 *next = index + 1;
309 return 0;
310 }
311
312 /* Called from syscall or from eBPF program */
array_map_update_elem(struct bpf_map * map,void * key,void * value,u64 map_flags)313 static long array_map_update_elem(struct bpf_map *map, void *key, void *value,
314 u64 map_flags)
315 {
316 struct bpf_array *array = container_of(map, struct bpf_array, map);
317 u32 index = *(u32 *)key;
318 char *val;
319
320 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
321 /* unknown flags */
322 return -EINVAL;
323
324 if (unlikely(index >= array->map.max_entries))
325 /* all elements were pre-allocated, cannot insert a new one */
326 return -E2BIG;
327
328 if (unlikely(map_flags & BPF_NOEXIST))
329 /* all elements already exist */
330 return -EEXIST;
331
332 if (unlikely((map_flags & BPF_F_LOCK) &&
333 !btf_record_has_field(map->record, BPF_SPIN_LOCK)))
334 return -EINVAL;
335
336 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
337 val = this_cpu_ptr(array->pptrs[index & array->index_mask]);
338 copy_map_value(map, val, value);
339 bpf_obj_free_fields(array->map.record, val);
340 } else {
341 val = array->value +
342 (u64)array->elem_size * (index & array->index_mask);
343 if (map_flags & BPF_F_LOCK)
344 copy_map_value_locked(map, val, value, false);
345 else
346 copy_map_value(map, val, value);
347 bpf_obj_free_fields(array->map.record, val);
348 }
349 return 0;
350 }
351
bpf_percpu_array_update(struct bpf_map * map,void * key,void * value,u64 map_flags)352 int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
353 u64 map_flags)
354 {
355 struct bpf_array *array = container_of(map, struct bpf_array, map);
356 u32 index = *(u32 *)key;
357 void __percpu *pptr;
358 int cpu, off = 0;
359 u32 size;
360
361 if (unlikely(map_flags > BPF_EXIST))
362 /* unknown flags */
363 return -EINVAL;
364
365 if (unlikely(index >= array->map.max_entries))
366 /* all elements were pre-allocated, cannot insert a new one */
367 return -E2BIG;
368
369 if (unlikely(map_flags == BPF_NOEXIST))
370 /* all elements already exist */
371 return -EEXIST;
372
373 /* the user space will provide round_up(value_size, 8) bytes that
374 * will be copied into per-cpu area. bpf programs can only access
375 * value_size of it. During lookup the same extra bytes will be
376 * returned or zeros which were zero-filled by percpu_alloc,
377 * so no kernel data leaks possible
378 */
379 size = array->elem_size;
380 rcu_read_lock();
381 pptr = array->pptrs[index & array->index_mask];
382 for_each_possible_cpu(cpu) {
383 copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
384 bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
385 off += size;
386 }
387 rcu_read_unlock();
388 return 0;
389 }
390
391 /* Called from syscall or from eBPF program */
array_map_delete_elem(struct bpf_map * map,void * key)392 static long array_map_delete_elem(struct bpf_map *map, void *key)
393 {
394 return -EINVAL;
395 }
396
array_map_vmalloc_addr(struct bpf_array * array)397 static void *array_map_vmalloc_addr(struct bpf_array *array)
398 {
399 return (void *)round_down((unsigned long)array, PAGE_SIZE);
400 }
401
array_map_free_timers(struct bpf_map * map)402 static void array_map_free_timers(struct bpf_map *map)
403 {
404 struct bpf_array *array = container_of(map, struct bpf_array, map);
405 int i;
406
407 /* We don't reset or free fields other than timer on uref dropping to zero. */
408 if (!btf_record_has_field(map->record, BPF_TIMER))
409 return;
410
411 for (i = 0; i < array->map.max_entries; i++)
412 bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
413 }
414
415 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
array_map_free(struct bpf_map * map)416 static void array_map_free(struct bpf_map *map)
417 {
418 struct bpf_array *array = container_of(map, struct bpf_array, map);
419 int i;
420
421 if (!IS_ERR_OR_NULL(map->record)) {
422 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
423 for (i = 0; i < array->map.max_entries; i++) {
424 void __percpu *pptr = array->pptrs[i & array->index_mask];
425 int cpu;
426
427 for_each_possible_cpu(cpu) {
428 bpf_obj_free_fields(map->record, per_cpu_ptr(pptr, cpu));
429 cond_resched();
430 }
431 }
432 } else {
433 for (i = 0; i < array->map.max_entries; i++)
434 bpf_obj_free_fields(map->record, array_map_elem_ptr(array, i));
435 }
436 }
437
438 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
439 bpf_array_free_percpu(array);
440
441 if (array->map.map_flags & BPF_F_MMAPABLE)
442 bpf_map_area_free(array_map_vmalloc_addr(array));
443 else
444 bpf_map_area_free(array);
445 }
446
array_map_seq_show_elem(struct bpf_map * map,void * key,struct seq_file * m)447 static void array_map_seq_show_elem(struct bpf_map *map, void *key,
448 struct seq_file *m)
449 {
450 void *value;
451
452 rcu_read_lock();
453
454 value = array_map_lookup_elem(map, key);
455 if (!value) {
456 rcu_read_unlock();
457 return;
458 }
459
460 if (map->btf_key_type_id)
461 seq_printf(m, "%u: ", *(u32 *)key);
462 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
463 seq_puts(m, "\n");
464
465 rcu_read_unlock();
466 }
467
percpu_array_map_seq_show_elem(struct bpf_map * map,void * key,struct seq_file * m)468 static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
469 struct seq_file *m)
470 {
471 struct bpf_array *array = container_of(map, struct bpf_array, map);
472 u32 index = *(u32 *)key;
473 void __percpu *pptr;
474 int cpu;
475
476 rcu_read_lock();
477
478 seq_printf(m, "%u: {\n", *(u32 *)key);
479 pptr = array->pptrs[index & array->index_mask];
480 for_each_possible_cpu(cpu) {
481 seq_printf(m, "\tcpu%d: ", cpu);
482 btf_type_seq_show(map->btf, map->btf_value_type_id,
483 per_cpu_ptr(pptr, cpu), m);
484 seq_puts(m, "\n");
485 }
486 seq_puts(m, "}\n");
487
488 rcu_read_unlock();
489 }
490
array_map_check_btf(const struct bpf_map * map,const struct btf * btf,const struct btf_type * key_type,const struct btf_type * value_type)491 static int array_map_check_btf(const struct bpf_map *map,
492 const struct btf *btf,
493 const struct btf_type *key_type,
494 const struct btf_type *value_type)
495 {
496 u32 int_data;
497
498 /* One exception for keyless BTF: .bss/.data/.rodata map */
499 if (btf_type_is_void(key_type)) {
500 if (map->map_type != BPF_MAP_TYPE_ARRAY ||
501 map->max_entries != 1)
502 return -EINVAL;
503
504 if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
505 return -EINVAL;
506
507 return 0;
508 }
509
510 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
511 return -EINVAL;
512
513 int_data = *(u32 *)(key_type + 1);
514 /* bpf array can only take a u32 key. This check makes sure
515 * that the btf matches the attr used during map_create.
516 */
517 if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
518 return -EINVAL;
519
520 return 0;
521 }
522
array_map_mmap(struct bpf_map * map,struct vm_area_struct * vma)523 static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
524 {
525 struct bpf_array *array = container_of(map, struct bpf_array, map);
526 pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT;
527
528 if (!(map->map_flags & BPF_F_MMAPABLE))
529 return -EINVAL;
530
531 if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) >
532 PAGE_ALIGN((u64)array->map.max_entries * array->elem_size))
533 return -EINVAL;
534
535 return remap_vmalloc_range(vma, array_map_vmalloc_addr(array),
536 vma->vm_pgoff + pgoff);
537 }
538
array_map_meta_equal(const struct bpf_map * meta0,const struct bpf_map * meta1)539 static bool array_map_meta_equal(const struct bpf_map *meta0,
540 const struct bpf_map *meta1)
541 {
542 if (!bpf_map_meta_equal(meta0, meta1))
543 return false;
544 return meta0->map_flags & BPF_F_INNER_MAP ? true :
545 meta0->max_entries == meta1->max_entries;
546 }
547
548 struct bpf_iter_seq_array_map_info {
549 struct bpf_map *map;
550 void *percpu_value_buf;
551 u32 index;
552 };
553
bpf_array_map_seq_start(struct seq_file * seq,loff_t * pos)554 static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
555 {
556 struct bpf_iter_seq_array_map_info *info = seq->private;
557 struct bpf_map *map = info->map;
558 struct bpf_array *array;
559 u32 index;
560
561 if (info->index >= map->max_entries)
562 return NULL;
563
564 if (*pos == 0)
565 ++*pos;
566 array = container_of(map, struct bpf_array, map);
567 index = info->index & array->index_mask;
568 if (info->percpu_value_buf)
569 return array->pptrs[index];
570 return array_map_elem_ptr(array, index);
571 }
572
bpf_array_map_seq_next(struct seq_file * seq,void * v,loff_t * pos)573 static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
574 {
575 struct bpf_iter_seq_array_map_info *info = seq->private;
576 struct bpf_map *map = info->map;
577 struct bpf_array *array;
578 u32 index;
579
580 ++*pos;
581 ++info->index;
582 if (info->index >= map->max_entries)
583 return NULL;
584
585 array = container_of(map, struct bpf_array, map);
586 index = info->index & array->index_mask;
587 if (info->percpu_value_buf)
588 return array->pptrs[index];
589 return array_map_elem_ptr(array, index);
590 }
591
__bpf_array_map_seq_show(struct seq_file * seq,void * v)592 static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
593 {
594 struct bpf_iter_seq_array_map_info *info = seq->private;
595 struct bpf_iter__bpf_map_elem ctx = {};
596 struct bpf_map *map = info->map;
597 struct bpf_array *array = container_of(map, struct bpf_array, map);
598 struct bpf_iter_meta meta;
599 struct bpf_prog *prog;
600 int off = 0, cpu = 0;
601 void __percpu **pptr;
602 u32 size;
603
604 meta.seq = seq;
605 prog = bpf_iter_get_info(&meta, v == NULL);
606 if (!prog)
607 return 0;
608
609 ctx.meta = &meta;
610 ctx.map = info->map;
611 if (v) {
612 ctx.key = &info->index;
613
614 if (!info->percpu_value_buf) {
615 ctx.value = v;
616 } else {
617 pptr = v;
618 size = array->elem_size;
619 for_each_possible_cpu(cpu) {
620 copy_map_value_long(map, info->percpu_value_buf + off,
621 per_cpu_ptr(pptr, cpu));
622 check_and_init_map_value(map, info->percpu_value_buf + off);
623 off += size;
624 }
625 ctx.value = info->percpu_value_buf;
626 }
627 }
628
629 return bpf_iter_run_prog(prog, &ctx);
630 }
631
bpf_array_map_seq_show(struct seq_file * seq,void * v)632 static int bpf_array_map_seq_show(struct seq_file *seq, void *v)
633 {
634 return __bpf_array_map_seq_show(seq, v);
635 }
636
bpf_array_map_seq_stop(struct seq_file * seq,void * v)637 static void bpf_array_map_seq_stop(struct seq_file *seq, void *v)
638 {
639 if (!v)
640 (void)__bpf_array_map_seq_show(seq, NULL);
641 }
642
bpf_iter_init_array_map(void * priv_data,struct bpf_iter_aux_info * aux)643 static int bpf_iter_init_array_map(void *priv_data,
644 struct bpf_iter_aux_info *aux)
645 {
646 struct bpf_iter_seq_array_map_info *seq_info = priv_data;
647 struct bpf_map *map = aux->map;
648 struct bpf_array *array = container_of(map, struct bpf_array, map);
649 void *value_buf;
650 u32 buf_size;
651
652 if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
653 buf_size = array->elem_size * num_possible_cpus();
654 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
655 if (!value_buf)
656 return -ENOMEM;
657
658 seq_info->percpu_value_buf = value_buf;
659 }
660
661 /* bpf_iter_attach_map() acquires a map uref, and the uref may be
662 * released before or in the middle of iterating map elements, so
663 * acquire an extra map uref for iterator.
664 */
665 bpf_map_inc_with_uref(map);
666 seq_info->map = map;
667 return 0;
668 }
669
bpf_iter_fini_array_map(void * priv_data)670 static void bpf_iter_fini_array_map(void *priv_data)
671 {
672 struct bpf_iter_seq_array_map_info *seq_info = priv_data;
673
674 bpf_map_put_with_uref(seq_info->map);
675 kfree(seq_info->percpu_value_buf);
676 }
677
678 static const struct seq_operations bpf_array_map_seq_ops = {
679 .start = bpf_array_map_seq_start,
680 .next = bpf_array_map_seq_next,
681 .stop = bpf_array_map_seq_stop,
682 .show = bpf_array_map_seq_show,
683 };
684
685 static const struct bpf_iter_seq_info iter_seq_info = {
686 .seq_ops = &bpf_array_map_seq_ops,
687 .init_seq_private = bpf_iter_init_array_map,
688 .fini_seq_private = bpf_iter_fini_array_map,
689 .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
690 };
691
bpf_for_each_array_elem(struct bpf_map * map,bpf_callback_t callback_fn,void * callback_ctx,u64 flags)692 static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn,
693 void *callback_ctx, u64 flags)
694 {
695 u32 i, key, num_elems = 0;
696 struct bpf_array *array;
697 bool is_percpu;
698 u64 ret = 0;
699 void *val;
700
701 if (flags != 0)
702 return -EINVAL;
703
704 is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
705 array = container_of(map, struct bpf_array, map);
706 if (is_percpu)
707 migrate_disable();
708 for (i = 0; i < map->max_entries; i++) {
709 if (is_percpu)
710 val = this_cpu_ptr(array->pptrs[i]);
711 else
712 val = array_map_elem_ptr(array, i);
713 num_elems++;
714 key = i;
715 ret = callback_fn((u64)(long)map, (u64)(long)&key,
716 (u64)(long)val, (u64)(long)callback_ctx, 0);
717 /* return value: 0 - continue, 1 - stop and return */
718 if (ret)
719 break;
720 }
721
722 if (is_percpu)
723 migrate_enable();
724 return num_elems;
725 }
726
array_map_mem_usage(const struct bpf_map * map)727 static u64 array_map_mem_usage(const struct bpf_map *map)
728 {
729 struct bpf_array *array = container_of(map, struct bpf_array, map);
730 bool percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
731 u32 elem_size = array->elem_size;
732 u64 entries = map->max_entries;
733 u64 usage = sizeof(*array);
734
735 if (percpu) {
736 usage += entries * sizeof(void *);
737 usage += entries * elem_size * num_possible_cpus();
738 } else {
739 if (map->map_flags & BPF_F_MMAPABLE) {
740 usage = PAGE_ALIGN(usage);
741 usage += PAGE_ALIGN(entries * elem_size);
742 } else {
743 usage += entries * elem_size;
744 }
745 }
746 return usage;
747 }
748
749 BTF_ID_LIST_SINGLE(array_map_btf_ids, struct, bpf_array)
750 const struct bpf_map_ops array_map_ops = {
751 .map_meta_equal = array_map_meta_equal,
752 .map_alloc_check = array_map_alloc_check,
753 .map_alloc = array_map_alloc,
754 .map_free = array_map_free,
755 .map_get_next_key = array_map_get_next_key,
756 .map_release_uref = array_map_free_timers,
757 .map_lookup_elem = array_map_lookup_elem,
758 .map_update_elem = array_map_update_elem,
759 .map_delete_elem = array_map_delete_elem,
760 .map_gen_lookup = array_map_gen_lookup,
761 .map_direct_value_addr = array_map_direct_value_addr,
762 .map_direct_value_meta = array_map_direct_value_meta,
763 .map_mmap = array_map_mmap,
764 .map_seq_show_elem = array_map_seq_show_elem,
765 .map_check_btf = array_map_check_btf,
766 .map_lookup_batch = generic_map_lookup_batch,
767 .map_update_batch = generic_map_update_batch,
768 .map_set_for_each_callback_args = map_set_for_each_callback_args,
769 .map_for_each_callback = bpf_for_each_array_elem,
770 .map_mem_usage = array_map_mem_usage,
771 .map_btf_id = &array_map_btf_ids[0],
772 .iter_seq_info = &iter_seq_info,
773 };
774
775 const struct bpf_map_ops percpu_array_map_ops = {
776 .map_meta_equal = bpf_map_meta_equal,
777 .map_alloc_check = array_map_alloc_check,
778 .map_alloc = array_map_alloc,
779 .map_free = array_map_free,
780 .map_get_next_key = array_map_get_next_key,
781 .map_lookup_elem = percpu_array_map_lookup_elem,
782 .map_update_elem = array_map_update_elem,
783 .map_delete_elem = array_map_delete_elem,
784 .map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
785 .map_seq_show_elem = percpu_array_map_seq_show_elem,
786 .map_check_btf = array_map_check_btf,
787 .map_lookup_batch = generic_map_lookup_batch,
788 .map_update_batch = generic_map_update_batch,
789 .map_set_for_each_callback_args = map_set_for_each_callback_args,
790 .map_for_each_callback = bpf_for_each_array_elem,
791 .map_mem_usage = array_map_mem_usage,
792 .map_btf_id = &array_map_btf_ids[0],
793 .iter_seq_info = &iter_seq_info,
794 };
795
fd_array_map_alloc_check(union bpf_attr * attr)796 static int fd_array_map_alloc_check(union bpf_attr *attr)
797 {
798 /* only file descriptors can be stored in this type of map */
799 if (attr->value_size != sizeof(u32))
800 return -EINVAL;
801 /* Program read-only/write-only not supported for special maps yet. */
802 if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
803 return -EINVAL;
804 return array_map_alloc_check(attr);
805 }
806
fd_array_map_free(struct bpf_map * map)807 static void fd_array_map_free(struct bpf_map *map)
808 {
809 struct bpf_array *array = container_of(map, struct bpf_array, map);
810 int i;
811
812 /* make sure it's empty */
813 for (i = 0; i < array->map.max_entries; i++)
814 BUG_ON(array->ptrs[i] != NULL);
815
816 bpf_map_area_free(array);
817 }
818
fd_array_map_lookup_elem(struct bpf_map * map,void * key)819 static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
820 {
821 return ERR_PTR(-EOPNOTSUPP);
822 }
823
824 /* only called from syscall */
bpf_fd_array_map_lookup_elem(struct bpf_map * map,void * key,u32 * value)825 int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
826 {
827 void **elem, *ptr;
828 int ret = 0;
829
830 if (!map->ops->map_fd_sys_lookup_elem)
831 return -ENOTSUPP;
832
833 rcu_read_lock();
834 elem = array_map_lookup_elem(map, key);
835 if (elem && (ptr = READ_ONCE(*elem)))
836 *value = map->ops->map_fd_sys_lookup_elem(ptr);
837 else
838 ret = -ENOENT;
839 rcu_read_unlock();
840
841 return ret;
842 }
843
844 /* only called from syscall */
bpf_fd_array_map_update_elem(struct bpf_map * map,struct file * map_file,void * key,void * value,u64 map_flags)845 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
846 void *key, void *value, u64 map_flags)
847 {
848 struct bpf_array *array = container_of(map, struct bpf_array, map);
849 void *new_ptr, *old_ptr;
850 u32 index = *(u32 *)key, ufd;
851
852 if (map_flags != BPF_ANY)
853 return -EINVAL;
854
855 if (index >= array->map.max_entries)
856 return -E2BIG;
857
858 ufd = *(u32 *)value;
859 new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
860 if (IS_ERR(new_ptr))
861 return PTR_ERR(new_ptr);
862
863 if (map->ops->map_poke_run) {
864 mutex_lock(&array->aux->poke_mutex);
865 old_ptr = xchg(array->ptrs + index, new_ptr);
866 map->ops->map_poke_run(map, index, old_ptr, new_ptr);
867 mutex_unlock(&array->aux->poke_mutex);
868 } else {
869 old_ptr = xchg(array->ptrs + index, new_ptr);
870 }
871
872 if (old_ptr)
873 map->ops->map_fd_put_ptr(map, old_ptr, true);
874 return 0;
875 }
876
__fd_array_map_delete_elem(struct bpf_map * map,void * key,bool need_defer)877 static long __fd_array_map_delete_elem(struct bpf_map *map, void *key, bool need_defer)
878 {
879 struct bpf_array *array = container_of(map, struct bpf_array, map);
880 void *old_ptr;
881 u32 index = *(u32 *)key;
882
883 if (index >= array->map.max_entries)
884 return -E2BIG;
885
886 if (map->ops->map_poke_run) {
887 mutex_lock(&array->aux->poke_mutex);
888 old_ptr = xchg(array->ptrs + index, NULL);
889 map->ops->map_poke_run(map, index, old_ptr, NULL);
890 mutex_unlock(&array->aux->poke_mutex);
891 } else {
892 old_ptr = xchg(array->ptrs + index, NULL);
893 }
894
895 if (old_ptr) {
896 map->ops->map_fd_put_ptr(map, old_ptr, need_defer);
897 return 0;
898 } else {
899 return -ENOENT;
900 }
901 }
902
fd_array_map_delete_elem(struct bpf_map * map,void * key)903 static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
904 {
905 return __fd_array_map_delete_elem(map, key, true);
906 }
907
prog_fd_array_get_ptr(struct bpf_map * map,struct file * map_file,int fd)908 static void *prog_fd_array_get_ptr(struct bpf_map *map,
909 struct file *map_file, int fd)
910 {
911 struct bpf_prog *prog = bpf_prog_get(fd);
912
913 if (IS_ERR(prog))
914 return prog;
915
916 if (!bpf_prog_map_compatible(map, prog)) {
917 bpf_prog_put(prog);
918 return ERR_PTR(-EINVAL);
919 }
920
921 return prog;
922 }
923
prog_fd_array_put_ptr(struct bpf_map * map,void * ptr,bool need_defer)924 static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
925 {
926 /* bpf_prog is freed after one RCU or tasks trace grace period */
927 bpf_prog_put(ptr);
928 }
929
prog_fd_array_sys_lookup_elem(void * ptr)930 static u32 prog_fd_array_sys_lookup_elem(void *ptr)
931 {
932 return ((struct bpf_prog *)ptr)->aux->id;
933 }
934
935 /* decrement refcnt of all bpf_progs that are stored in this map */
bpf_fd_array_map_clear(struct bpf_map * map,bool need_defer)936 static void bpf_fd_array_map_clear(struct bpf_map *map, bool need_defer)
937 {
938 struct bpf_array *array = container_of(map, struct bpf_array, map);
939 int i;
940
941 for (i = 0; i < array->map.max_entries; i++)
942 __fd_array_map_delete_elem(map, &i, need_defer);
943 }
944
prog_array_map_seq_show_elem(struct bpf_map * map,void * key,struct seq_file * m)945 static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
946 struct seq_file *m)
947 {
948 void **elem, *ptr;
949 u32 prog_id;
950
951 rcu_read_lock();
952
953 elem = array_map_lookup_elem(map, key);
954 if (elem) {
955 ptr = READ_ONCE(*elem);
956 if (ptr) {
957 seq_printf(m, "%u: ", *(u32 *)key);
958 prog_id = prog_fd_array_sys_lookup_elem(ptr);
959 btf_type_seq_show(map->btf, map->btf_value_type_id,
960 &prog_id, m);
961 seq_puts(m, "\n");
962 }
963 }
964
965 rcu_read_unlock();
966 }
967
968 struct prog_poke_elem {
969 struct list_head list;
970 struct bpf_prog_aux *aux;
971 };
972
prog_array_map_poke_track(struct bpf_map * map,struct bpf_prog_aux * prog_aux)973 static int prog_array_map_poke_track(struct bpf_map *map,
974 struct bpf_prog_aux *prog_aux)
975 {
976 struct prog_poke_elem *elem;
977 struct bpf_array_aux *aux;
978 int ret = 0;
979
980 aux = container_of(map, struct bpf_array, map)->aux;
981 mutex_lock(&aux->poke_mutex);
982 list_for_each_entry(elem, &aux->poke_progs, list) {
983 if (elem->aux == prog_aux)
984 goto out;
985 }
986
987 elem = kmalloc(sizeof(*elem), GFP_KERNEL);
988 if (!elem) {
989 ret = -ENOMEM;
990 goto out;
991 }
992
993 INIT_LIST_HEAD(&elem->list);
994 /* We must track the program's aux info at this point in time
995 * since the program pointer itself may not be stable yet, see
996 * also comment in prog_array_map_poke_run().
997 */
998 elem->aux = prog_aux;
999
1000 list_add_tail(&elem->list, &aux->poke_progs);
1001 out:
1002 mutex_unlock(&aux->poke_mutex);
1003 return ret;
1004 }
1005
prog_array_map_poke_untrack(struct bpf_map * map,struct bpf_prog_aux * prog_aux)1006 static void prog_array_map_poke_untrack(struct bpf_map *map,
1007 struct bpf_prog_aux *prog_aux)
1008 {
1009 struct prog_poke_elem *elem, *tmp;
1010 struct bpf_array_aux *aux;
1011
1012 aux = container_of(map, struct bpf_array, map)->aux;
1013 mutex_lock(&aux->poke_mutex);
1014 list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
1015 if (elem->aux == prog_aux) {
1016 list_del_init(&elem->list);
1017 kfree(elem);
1018 break;
1019 }
1020 }
1021 mutex_unlock(&aux->poke_mutex);
1022 }
1023
bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor * poke,struct bpf_prog * new,struct bpf_prog * old)1024 void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
1025 struct bpf_prog *new, struct bpf_prog *old)
1026 {
1027 WARN_ON_ONCE(1);
1028 }
1029
prog_array_map_poke_run(struct bpf_map * map,u32 key,struct bpf_prog * old,struct bpf_prog * new)1030 static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
1031 struct bpf_prog *old,
1032 struct bpf_prog *new)
1033 {
1034 struct prog_poke_elem *elem;
1035 struct bpf_array_aux *aux;
1036
1037 aux = container_of(map, struct bpf_array, map)->aux;
1038 WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex));
1039
1040 list_for_each_entry(elem, &aux->poke_progs, list) {
1041 struct bpf_jit_poke_descriptor *poke;
1042 int i;
1043
1044 for (i = 0; i < elem->aux->size_poke_tab; i++) {
1045 poke = &elem->aux->poke_tab[i];
1046
1047 /* Few things to be aware of:
1048 *
1049 * 1) We can only ever access aux in this context, but
1050 * not aux->prog since it might not be stable yet and
1051 * there could be danger of use after free otherwise.
1052 * 2) Initially when we start tracking aux, the program
1053 * is not JITed yet and also does not have a kallsyms
1054 * entry. We skip these as poke->tailcall_target_stable
1055 * is not active yet. The JIT will do the final fixup
1056 * before setting it stable. The various
1057 * poke->tailcall_target_stable are successively
1058 * activated, so tail call updates can arrive from here
1059 * while JIT is still finishing its final fixup for
1060 * non-activated poke entries.
1061 * 3) Also programs reaching refcount of zero while patching
1062 * is in progress is okay since we're protected under
1063 * poke_mutex and untrack the programs before the JIT
1064 * buffer is freed.
1065 */
1066 if (!READ_ONCE(poke->tailcall_target_stable))
1067 continue;
1068 if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
1069 continue;
1070 if (poke->tail_call.map != map ||
1071 poke->tail_call.key != key)
1072 continue;
1073
1074 bpf_arch_poke_desc_update(poke, new, old);
1075 }
1076 }
1077 }
1078
prog_array_map_clear_deferred(struct work_struct * work)1079 static void prog_array_map_clear_deferred(struct work_struct *work)
1080 {
1081 struct bpf_map *map = container_of(work, struct bpf_array_aux,
1082 work)->map;
1083 bpf_fd_array_map_clear(map, true);
1084 bpf_map_put(map);
1085 }
1086
prog_array_map_clear(struct bpf_map * map)1087 static void prog_array_map_clear(struct bpf_map *map)
1088 {
1089 struct bpf_array_aux *aux = container_of(map, struct bpf_array,
1090 map)->aux;
1091 bpf_map_inc(map);
1092 schedule_work(&aux->work);
1093 }
1094
prog_array_map_alloc(union bpf_attr * attr)1095 static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
1096 {
1097 struct bpf_array_aux *aux;
1098 struct bpf_map *map;
1099
1100 aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT);
1101 if (!aux)
1102 return ERR_PTR(-ENOMEM);
1103
1104 INIT_WORK(&aux->work, prog_array_map_clear_deferred);
1105 INIT_LIST_HEAD(&aux->poke_progs);
1106 mutex_init(&aux->poke_mutex);
1107
1108 map = array_map_alloc(attr);
1109 if (IS_ERR(map)) {
1110 kfree(aux);
1111 return map;
1112 }
1113
1114 container_of(map, struct bpf_array, map)->aux = aux;
1115 aux->map = map;
1116
1117 return map;
1118 }
1119
prog_array_map_free(struct bpf_map * map)1120 static void prog_array_map_free(struct bpf_map *map)
1121 {
1122 struct prog_poke_elem *elem, *tmp;
1123 struct bpf_array_aux *aux;
1124
1125 aux = container_of(map, struct bpf_array, map)->aux;
1126 list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
1127 list_del_init(&elem->list);
1128 kfree(elem);
1129 }
1130 kfree(aux);
1131 fd_array_map_free(map);
1132 }
1133
1134 /* prog_array->aux->{type,jited} is a runtime binding.
1135 * Doing static check alone in the verifier is not enough.
1136 * Thus, prog_array_map cannot be used as an inner_map
1137 * and map_meta_equal is not implemented.
1138 */
1139 const struct bpf_map_ops prog_array_map_ops = {
1140 .map_alloc_check = fd_array_map_alloc_check,
1141 .map_alloc = prog_array_map_alloc,
1142 .map_free = prog_array_map_free,
1143 .map_poke_track = prog_array_map_poke_track,
1144 .map_poke_untrack = prog_array_map_poke_untrack,
1145 .map_poke_run = prog_array_map_poke_run,
1146 .map_get_next_key = array_map_get_next_key,
1147 .map_lookup_elem = fd_array_map_lookup_elem,
1148 .map_delete_elem = fd_array_map_delete_elem,
1149 .map_fd_get_ptr = prog_fd_array_get_ptr,
1150 .map_fd_put_ptr = prog_fd_array_put_ptr,
1151 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
1152 .map_release_uref = prog_array_map_clear,
1153 .map_seq_show_elem = prog_array_map_seq_show_elem,
1154 .map_mem_usage = array_map_mem_usage,
1155 .map_btf_id = &array_map_btf_ids[0],
1156 };
1157
bpf_event_entry_gen(struct file * perf_file,struct file * map_file)1158 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
1159 struct file *map_file)
1160 {
1161 struct bpf_event_entry *ee;
1162
1163 ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
1164 if (ee) {
1165 ee->event = perf_file->private_data;
1166 ee->perf_file = perf_file;
1167 ee->map_file = map_file;
1168 }
1169
1170 return ee;
1171 }
1172
__bpf_event_entry_free(struct rcu_head * rcu)1173 static void __bpf_event_entry_free(struct rcu_head *rcu)
1174 {
1175 struct bpf_event_entry *ee;
1176
1177 ee = container_of(rcu, struct bpf_event_entry, rcu);
1178 fput(ee->perf_file);
1179 kfree(ee);
1180 }
1181
bpf_event_entry_free_rcu(struct bpf_event_entry * ee)1182 static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
1183 {
1184 call_rcu(&ee->rcu, __bpf_event_entry_free);
1185 }
1186
perf_event_fd_array_get_ptr(struct bpf_map * map,struct file * map_file,int fd)1187 static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
1188 struct file *map_file, int fd)
1189 {
1190 struct bpf_event_entry *ee;
1191 struct perf_event *event;
1192 struct file *perf_file;
1193 u64 value;
1194
1195 perf_file = perf_event_get(fd);
1196 if (IS_ERR(perf_file))
1197 return perf_file;
1198
1199 ee = ERR_PTR(-EOPNOTSUPP);
1200 event = perf_file->private_data;
1201 if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
1202 goto err_out;
1203
1204 ee = bpf_event_entry_gen(perf_file, map_file);
1205 if (ee)
1206 return ee;
1207 ee = ERR_PTR(-ENOMEM);
1208 err_out:
1209 fput(perf_file);
1210 return ee;
1211 }
1212
perf_event_fd_array_put_ptr(struct bpf_map * map,void * ptr,bool need_defer)1213 static void perf_event_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
1214 {
1215 /* bpf_perf_event is freed after one RCU grace period */
1216 bpf_event_entry_free_rcu(ptr);
1217 }
1218
perf_event_fd_array_release(struct bpf_map * map,struct file * map_file)1219 static void perf_event_fd_array_release(struct bpf_map *map,
1220 struct file *map_file)
1221 {
1222 struct bpf_array *array = container_of(map, struct bpf_array, map);
1223 struct bpf_event_entry *ee;
1224 int i;
1225
1226 if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1227 return;
1228
1229 rcu_read_lock();
1230 for (i = 0; i < array->map.max_entries; i++) {
1231 ee = READ_ONCE(array->ptrs[i]);
1232 if (ee && ee->map_file == map_file)
1233 __fd_array_map_delete_elem(map, &i, true);
1234 }
1235 rcu_read_unlock();
1236 }
1237
perf_event_fd_array_map_free(struct bpf_map * map)1238 static void perf_event_fd_array_map_free(struct bpf_map *map)
1239 {
1240 if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1241 bpf_fd_array_map_clear(map, false);
1242 fd_array_map_free(map);
1243 }
1244
1245 const struct bpf_map_ops perf_event_array_map_ops = {
1246 .map_meta_equal = bpf_map_meta_equal,
1247 .map_alloc_check = fd_array_map_alloc_check,
1248 .map_alloc = array_map_alloc,
1249 .map_free = perf_event_fd_array_map_free,
1250 .map_get_next_key = array_map_get_next_key,
1251 .map_lookup_elem = fd_array_map_lookup_elem,
1252 .map_delete_elem = fd_array_map_delete_elem,
1253 .map_fd_get_ptr = perf_event_fd_array_get_ptr,
1254 .map_fd_put_ptr = perf_event_fd_array_put_ptr,
1255 .map_release = perf_event_fd_array_release,
1256 .map_check_btf = map_check_no_btf,
1257 .map_mem_usage = array_map_mem_usage,
1258 .map_btf_id = &array_map_btf_ids[0],
1259 };
1260
1261 #ifdef CONFIG_CGROUPS
cgroup_fd_array_get_ptr(struct bpf_map * map,struct file * map_file,int fd)1262 static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
1263 struct file *map_file /* not used */,
1264 int fd)
1265 {
1266 return cgroup_get_from_fd(fd);
1267 }
1268
cgroup_fd_array_put_ptr(struct bpf_map * map,void * ptr,bool need_defer)1269 static void cgroup_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
1270 {
1271 /* cgroup_put free cgrp after a rcu grace period */
1272 cgroup_put(ptr);
1273 }
1274
cgroup_fd_array_free(struct bpf_map * map)1275 static void cgroup_fd_array_free(struct bpf_map *map)
1276 {
1277 bpf_fd_array_map_clear(map, false);
1278 fd_array_map_free(map);
1279 }
1280
1281 const struct bpf_map_ops cgroup_array_map_ops = {
1282 .map_meta_equal = bpf_map_meta_equal,
1283 .map_alloc_check = fd_array_map_alloc_check,
1284 .map_alloc = array_map_alloc,
1285 .map_free = cgroup_fd_array_free,
1286 .map_get_next_key = array_map_get_next_key,
1287 .map_lookup_elem = fd_array_map_lookup_elem,
1288 .map_delete_elem = fd_array_map_delete_elem,
1289 .map_fd_get_ptr = cgroup_fd_array_get_ptr,
1290 .map_fd_put_ptr = cgroup_fd_array_put_ptr,
1291 .map_check_btf = map_check_no_btf,
1292 .map_mem_usage = array_map_mem_usage,
1293 .map_btf_id = &array_map_btf_ids[0],
1294 };
1295 #endif
1296
array_of_map_alloc(union bpf_attr * attr)1297 static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
1298 {
1299 struct bpf_map *map, *inner_map_meta;
1300
1301 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
1302 if (IS_ERR(inner_map_meta))
1303 return inner_map_meta;
1304
1305 map = array_map_alloc(attr);
1306 if (IS_ERR(map)) {
1307 bpf_map_meta_free(inner_map_meta);
1308 return map;
1309 }
1310
1311 map->inner_map_meta = inner_map_meta;
1312
1313 return map;
1314 }
1315
array_of_map_free(struct bpf_map * map)1316 static void array_of_map_free(struct bpf_map *map)
1317 {
1318 /* map->inner_map_meta is only accessed by syscall which
1319 * is protected by fdget/fdput.
1320 */
1321 bpf_map_meta_free(map->inner_map_meta);
1322 bpf_fd_array_map_clear(map, false);
1323 fd_array_map_free(map);
1324 }
1325
array_of_map_lookup_elem(struct bpf_map * map,void * key)1326 static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
1327 {
1328 struct bpf_map **inner_map = array_map_lookup_elem(map, key);
1329
1330 if (!inner_map)
1331 return NULL;
1332
1333 return READ_ONCE(*inner_map);
1334 }
1335
array_of_map_gen_lookup(struct bpf_map * map,struct bpf_insn * insn_buf)1336 static int array_of_map_gen_lookup(struct bpf_map *map,
1337 struct bpf_insn *insn_buf)
1338 {
1339 struct bpf_array *array = container_of(map, struct bpf_array, map);
1340 u32 elem_size = array->elem_size;
1341 struct bpf_insn *insn = insn_buf;
1342 const int ret = BPF_REG_0;
1343 const int map_ptr = BPF_REG_1;
1344 const int index = BPF_REG_2;
1345
1346 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
1347 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
1348 if (!map->bypass_spec_v1) {
1349 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
1350 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
1351 } else {
1352 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
1353 }
1354 if (is_power_of_2(elem_size))
1355 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
1356 else
1357 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
1358 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
1359 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
1360 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
1361 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1362 *insn++ = BPF_MOV64_IMM(ret, 0);
1363
1364 return insn - insn_buf;
1365 }
1366
1367 const struct bpf_map_ops array_of_maps_map_ops = {
1368 .map_alloc_check = fd_array_map_alloc_check,
1369 .map_alloc = array_of_map_alloc,
1370 .map_free = array_of_map_free,
1371 .map_get_next_key = array_map_get_next_key,
1372 .map_lookup_elem = array_of_map_lookup_elem,
1373 .map_delete_elem = fd_array_map_delete_elem,
1374 .map_fd_get_ptr = bpf_map_fd_get_ptr,
1375 .map_fd_put_ptr = bpf_map_fd_put_ptr,
1376 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
1377 .map_gen_lookup = array_of_map_gen_lookup,
1378 .map_lookup_batch = generic_map_lookup_batch,
1379 .map_update_batch = generic_map_update_batch,
1380 .map_check_btf = map_check_no_btf,
1381 .map_mem_usage = array_map_mem_usage,
1382 .map_btf_id = &array_map_btf_ids[0],
1383 };
1384