xref: /openbmc/linux/kernel/bpf/helpers.c (revision 0ca8d3ca4561535f97b31e7b8de569c69bc3b27b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  */
4 #include <linux/bpf.h>
5 #include <linux/rcupdate.h>
6 #include <linux/random.h>
7 #include <linux/smp.h>
8 #include <linux/topology.h>
9 #include <linux/ktime.h>
10 #include <linux/sched.h>
11 #include <linux/uidgid.h>
12 #include <linux/filter.h>
13 #include <linux/ctype.h>
14 #include <linux/jiffies.h>
15 #include <linux/pid_namespace.h>
16 #include <linux/proc_ns.h>
17 #include <linux/security.h>
18 
19 #include "../../lib/kstrtox.h"
20 
21 /* If kernel subsystem is allowing eBPF programs to call this function,
22  * inside its own verifier_ops->get_func_proto() callback it should return
23  * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
24  *
25  * Different map implementations will rely on rcu in map methods
26  * lookup/update/delete, therefore eBPF programs must run under rcu lock
27  * if program is allowed to access maps, so check rcu_read_lock_held in
28  * all three functions.
29  */
30 BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
31 {
32 	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
33 	return (unsigned long) map->ops->map_lookup_elem(map, key);
34 }
35 
36 const struct bpf_func_proto bpf_map_lookup_elem_proto = {
37 	.func		= bpf_map_lookup_elem,
38 	.gpl_only	= false,
39 	.pkt_access	= true,
40 	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
41 	.arg1_type	= ARG_CONST_MAP_PTR,
42 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
43 };
44 
45 BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
46 	   void *, value, u64, flags)
47 {
48 	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
49 	return map->ops->map_update_elem(map, key, value, flags);
50 }
51 
52 const struct bpf_func_proto bpf_map_update_elem_proto = {
53 	.func		= bpf_map_update_elem,
54 	.gpl_only	= false,
55 	.pkt_access	= true,
56 	.ret_type	= RET_INTEGER,
57 	.arg1_type	= ARG_CONST_MAP_PTR,
58 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
59 	.arg3_type	= ARG_PTR_TO_MAP_VALUE,
60 	.arg4_type	= ARG_ANYTHING,
61 };
62 
63 BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
64 {
65 	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
66 	return map->ops->map_delete_elem(map, key);
67 }
68 
69 const struct bpf_func_proto bpf_map_delete_elem_proto = {
70 	.func		= bpf_map_delete_elem,
71 	.gpl_only	= false,
72 	.pkt_access	= true,
73 	.ret_type	= RET_INTEGER,
74 	.arg1_type	= ARG_CONST_MAP_PTR,
75 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
76 };
77 
78 BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
79 {
80 	return map->ops->map_push_elem(map, value, flags);
81 }
82 
83 const struct bpf_func_proto bpf_map_push_elem_proto = {
84 	.func		= bpf_map_push_elem,
85 	.gpl_only	= false,
86 	.pkt_access	= true,
87 	.ret_type	= RET_INTEGER,
88 	.arg1_type	= ARG_CONST_MAP_PTR,
89 	.arg2_type	= ARG_PTR_TO_MAP_VALUE,
90 	.arg3_type	= ARG_ANYTHING,
91 };
92 
93 BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
94 {
95 	return map->ops->map_pop_elem(map, value);
96 }
97 
98 const struct bpf_func_proto bpf_map_pop_elem_proto = {
99 	.func		= bpf_map_pop_elem,
100 	.gpl_only	= false,
101 	.ret_type	= RET_INTEGER,
102 	.arg1_type	= ARG_CONST_MAP_PTR,
103 	.arg2_type	= ARG_PTR_TO_UNINIT_MAP_VALUE,
104 };
105 
106 BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
107 {
108 	return map->ops->map_peek_elem(map, value);
109 }
110 
111 const struct bpf_func_proto bpf_map_peek_elem_proto = {
112 	.func		= bpf_map_peek_elem,
113 	.gpl_only	= false,
114 	.ret_type	= RET_INTEGER,
115 	.arg1_type	= ARG_CONST_MAP_PTR,
116 	.arg2_type	= ARG_PTR_TO_UNINIT_MAP_VALUE,
117 };
118 
119 const struct bpf_func_proto bpf_get_prandom_u32_proto = {
120 	.func		= bpf_user_rnd_u32,
121 	.gpl_only	= false,
122 	.ret_type	= RET_INTEGER,
123 };
124 
125 BPF_CALL_0(bpf_get_smp_processor_id)
126 {
127 	return smp_processor_id();
128 }
129 
130 const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
131 	.func		= bpf_get_smp_processor_id,
132 	.gpl_only	= false,
133 	.ret_type	= RET_INTEGER,
134 };
135 
136 BPF_CALL_0(bpf_get_numa_node_id)
137 {
138 	return numa_node_id();
139 }
140 
141 const struct bpf_func_proto bpf_get_numa_node_id_proto = {
142 	.func		= bpf_get_numa_node_id,
143 	.gpl_only	= false,
144 	.ret_type	= RET_INTEGER,
145 };
146 
147 BPF_CALL_0(bpf_ktime_get_ns)
148 {
149 	/* NMI safe access to clock monotonic */
150 	return ktime_get_mono_fast_ns();
151 }
152 
153 const struct bpf_func_proto bpf_ktime_get_ns_proto = {
154 	.func		= bpf_ktime_get_ns,
155 	.gpl_only	= false,
156 	.ret_type	= RET_INTEGER,
157 };
158 
159 BPF_CALL_0(bpf_ktime_get_boot_ns)
160 {
161 	/* NMI safe access to clock boottime */
162 	return ktime_get_boot_fast_ns();
163 }
164 
165 const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
166 	.func		= bpf_ktime_get_boot_ns,
167 	.gpl_only	= false,
168 	.ret_type	= RET_INTEGER,
169 };
170 
171 BPF_CALL_0(bpf_ktime_get_coarse_ns)
172 {
173 	return ktime_get_coarse_ns();
174 }
175 
176 const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = {
177 	.func		= bpf_ktime_get_coarse_ns,
178 	.gpl_only	= false,
179 	.ret_type	= RET_INTEGER,
180 };
181 
182 BPF_CALL_0(bpf_get_current_pid_tgid)
183 {
184 	struct task_struct *task = current;
185 
186 	if (unlikely(!task))
187 		return -EINVAL;
188 
189 	return (u64) task->tgid << 32 | task->pid;
190 }
191 
192 const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
193 	.func		= bpf_get_current_pid_tgid,
194 	.gpl_only	= false,
195 	.ret_type	= RET_INTEGER,
196 };
197 
198 BPF_CALL_0(bpf_get_current_uid_gid)
199 {
200 	struct task_struct *task = current;
201 	kuid_t uid;
202 	kgid_t gid;
203 
204 	if (unlikely(!task))
205 		return -EINVAL;
206 
207 	current_uid_gid(&uid, &gid);
208 	return (u64) from_kgid(&init_user_ns, gid) << 32 |
209 		     from_kuid(&init_user_ns, uid);
210 }
211 
212 const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
213 	.func		= bpf_get_current_uid_gid,
214 	.gpl_only	= false,
215 	.ret_type	= RET_INTEGER,
216 };
217 
218 BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
219 {
220 	struct task_struct *task = current;
221 
222 	if (unlikely(!task))
223 		goto err_clear;
224 
225 	strncpy(buf, task->comm, size);
226 
227 	/* Verifier guarantees that size > 0. For task->comm exceeding
228 	 * size, guarantee that buf is %NUL-terminated. Unconditionally
229 	 * done here to save the size test.
230 	 */
231 	buf[size - 1] = 0;
232 	return 0;
233 err_clear:
234 	memset(buf, 0, size);
235 	return -EINVAL;
236 }
237 
238 const struct bpf_func_proto bpf_get_current_comm_proto = {
239 	.func		= bpf_get_current_comm,
240 	.gpl_only	= false,
241 	.ret_type	= RET_INTEGER,
242 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
243 	.arg2_type	= ARG_CONST_SIZE,
244 };
245 
246 #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
247 
248 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
249 {
250 	arch_spinlock_t *l = (void *)lock;
251 	union {
252 		__u32 val;
253 		arch_spinlock_t lock;
254 	} u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
255 
256 	compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
257 	BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
258 	BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
259 	arch_spin_lock(l);
260 }
261 
262 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
263 {
264 	arch_spinlock_t *l = (void *)lock;
265 
266 	arch_spin_unlock(l);
267 }
268 
269 #else
270 
271 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
272 {
273 	atomic_t *l = (void *)lock;
274 
275 	BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
276 	do {
277 		atomic_cond_read_relaxed(l, !VAL);
278 	} while (atomic_xchg(l, 1));
279 }
280 
281 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
282 {
283 	atomic_t *l = (void *)lock;
284 
285 	atomic_set_release(l, 0);
286 }
287 
288 #endif
289 
290 static DEFINE_PER_CPU(unsigned long, irqsave_flags);
291 
292 static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
293 {
294 	unsigned long flags;
295 
296 	local_irq_save(flags);
297 	__bpf_spin_lock(lock);
298 	__this_cpu_write(irqsave_flags, flags);
299 }
300 
301 notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
302 {
303 	__bpf_spin_lock_irqsave(lock);
304 	return 0;
305 }
306 
307 const struct bpf_func_proto bpf_spin_lock_proto = {
308 	.func		= bpf_spin_lock,
309 	.gpl_only	= false,
310 	.ret_type	= RET_VOID,
311 	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
312 };
313 
314 static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
315 {
316 	unsigned long flags;
317 
318 	flags = __this_cpu_read(irqsave_flags);
319 	__bpf_spin_unlock(lock);
320 	local_irq_restore(flags);
321 }
322 
323 notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
324 {
325 	__bpf_spin_unlock_irqrestore(lock);
326 	return 0;
327 }
328 
329 const struct bpf_func_proto bpf_spin_unlock_proto = {
330 	.func		= bpf_spin_unlock,
331 	.gpl_only	= false,
332 	.ret_type	= RET_VOID,
333 	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
334 };
335 
336 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
337 			   bool lock_src)
338 {
339 	struct bpf_spin_lock *lock;
340 
341 	if (lock_src)
342 		lock = src + map->spin_lock_off;
343 	else
344 		lock = dst + map->spin_lock_off;
345 	preempt_disable();
346 	__bpf_spin_lock_irqsave(lock);
347 	copy_map_value(map, dst, src);
348 	__bpf_spin_unlock_irqrestore(lock);
349 	preempt_enable();
350 }
351 
352 BPF_CALL_0(bpf_jiffies64)
353 {
354 	return get_jiffies_64();
355 }
356 
357 const struct bpf_func_proto bpf_jiffies64_proto = {
358 	.func		= bpf_jiffies64,
359 	.gpl_only	= false,
360 	.ret_type	= RET_INTEGER,
361 };
362 
363 #ifdef CONFIG_CGROUPS
364 BPF_CALL_0(bpf_get_current_cgroup_id)
365 {
366 	struct cgroup *cgrp = task_dfl_cgroup(current);
367 
368 	return cgroup_id(cgrp);
369 }
370 
371 const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
372 	.func		= bpf_get_current_cgroup_id,
373 	.gpl_only	= false,
374 	.ret_type	= RET_INTEGER,
375 };
376 
377 BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level)
378 {
379 	struct cgroup *cgrp = task_dfl_cgroup(current);
380 	struct cgroup *ancestor;
381 
382 	ancestor = cgroup_ancestor(cgrp, ancestor_level);
383 	if (!ancestor)
384 		return 0;
385 	return cgroup_id(ancestor);
386 }
387 
388 const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
389 	.func		= bpf_get_current_ancestor_cgroup_id,
390 	.gpl_only	= false,
391 	.ret_type	= RET_INTEGER,
392 	.arg1_type	= ARG_ANYTHING,
393 };
394 
395 #ifdef CONFIG_CGROUP_BPF
396 
397 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
398 {
399 	/* flags argument is not used now,
400 	 * but provides an ability to extend the API.
401 	 * verifier checks that its value is correct.
402 	 */
403 	enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
404 	struct bpf_cgroup_storage *storage;
405 	struct bpf_cg_run_ctx *ctx;
406 	void *ptr;
407 
408 	/* get current cgroup storage from BPF run context */
409 	ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
410 	storage = ctx->prog_item->cgroup_storage[stype];
411 
412 	if (stype == BPF_CGROUP_STORAGE_SHARED)
413 		ptr = &READ_ONCE(storage->buf)->data[0];
414 	else
415 		ptr = this_cpu_ptr(storage->percpu_buf);
416 
417 	return (unsigned long)ptr;
418 }
419 
420 const struct bpf_func_proto bpf_get_local_storage_proto = {
421 	.func		= bpf_get_local_storage,
422 	.gpl_only	= false,
423 	.ret_type	= RET_PTR_TO_MAP_VALUE,
424 	.arg1_type	= ARG_CONST_MAP_PTR,
425 	.arg2_type	= ARG_ANYTHING,
426 };
427 #endif
428 
429 #define BPF_STRTOX_BASE_MASK 0x1F
430 
431 static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
432 			  unsigned long long *res, bool *is_negative)
433 {
434 	unsigned int base = flags & BPF_STRTOX_BASE_MASK;
435 	const char *cur_buf = buf;
436 	size_t cur_len = buf_len;
437 	unsigned int consumed;
438 	size_t val_len;
439 	char str[64];
440 
441 	if (!buf || !buf_len || !res || !is_negative)
442 		return -EINVAL;
443 
444 	if (base != 0 && base != 8 && base != 10 && base != 16)
445 		return -EINVAL;
446 
447 	if (flags & ~BPF_STRTOX_BASE_MASK)
448 		return -EINVAL;
449 
450 	while (cur_buf < buf + buf_len && isspace(*cur_buf))
451 		++cur_buf;
452 
453 	*is_negative = (cur_buf < buf + buf_len && *cur_buf == '-');
454 	if (*is_negative)
455 		++cur_buf;
456 
457 	consumed = cur_buf - buf;
458 	cur_len -= consumed;
459 	if (!cur_len)
460 		return -EINVAL;
461 
462 	cur_len = min(cur_len, sizeof(str) - 1);
463 	memcpy(str, cur_buf, cur_len);
464 	str[cur_len] = '\0';
465 	cur_buf = str;
466 
467 	cur_buf = _parse_integer_fixup_radix(cur_buf, &base);
468 	val_len = _parse_integer(cur_buf, base, res);
469 
470 	if (val_len & KSTRTOX_OVERFLOW)
471 		return -ERANGE;
472 
473 	if (val_len == 0)
474 		return -EINVAL;
475 
476 	cur_buf += val_len;
477 	consumed += cur_buf - str;
478 
479 	return consumed;
480 }
481 
482 static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
483 			 long long *res)
484 {
485 	unsigned long long _res;
486 	bool is_negative;
487 	int err;
488 
489 	err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
490 	if (err < 0)
491 		return err;
492 	if (is_negative) {
493 		if ((long long)-_res > 0)
494 			return -ERANGE;
495 		*res = -_res;
496 	} else {
497 		if ((long long)_res < 0)
498 			return -ERANGE;
499 		*res = _res;
500 	}
501 	return err;
502 }
503 
504 BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
505 	   long *, res)
506 {
507 	long long _res;
508 	int err;
509 
510 	err = __bpf_strtoll(buf, buf_len, flags, &_res);
511 	if (err < 0)
512 		return err;
513 	if (_res != (long)_res)
514 		return -ERANGE;
515 	*res = _res;
516 	return err;
517 }
518 
519 const struct bpf_func_proto bpf_strtol_proto = {
520 	.func		= bpf_strtol,
521 	.gpl_only	= false,
522 	.ret_type	= RET_INTEGER,
523 	.arg1_type	= ARG_PTR_TO_MEM,
524 	.arg2_type	= ARG_CONST_SIZE,
525 	.arg3_type	= ARG_ANYTHING,
526 	.arg4_type	= ARG_PTR_TO_LONG,
527 };
528 
529 BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
530 	   unsigned long *, res)
531 {
532 	unsigned long long _res;
533 	bool is_negative;
534 	int err;
535 
536 	err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
537 	if (err < 0)
538 		return err;
539 	if (is_negative)
540 		return -EINVAL;
541 	if (_res != (unsigned long)_res)
542 		return -ERANGE;
543 	*res = _res;
544 	return err;
545 }
546 
547 const struct bpf_func_proto bpf_strtoul_proto = {
548 	.func		= bpf_strtoul,
549 	.gpl_only	= false,
550 	.ret_type	= RET_INTEGER,
551 	.arg1_type	= ARG_PTR_TO_MEM,
552 	.arg2_type	= ARG_CONST_SIZE,
553 	.arg3_type	= ARG_ANYTHING,
554 	.arg4_type	= ARG_PTR_TO_LONG,
555 };
556 #endif
557 
558 BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
559 	   struct bpf_pidns_info *, nsdata, u32, size)
560 {
561 	struct task_struct *task = current;
562 	struct pid_namespace *pidns;
563 	int err = -EINVAL;
564 
565 	if (unlikely(size != sizeof(struct bpf_pidns_info)))
566 		goto clear;
567 
568 	if (unlikely((u64)(dev_t)dev != dev))
569 		goto clear;
570 
571 	if (unlikely(!task))
572 		goto clear;
573 
574 	pidns = task_active_pid_ns(task);
575 	if (unlikely(!pidns)) {
576 		err = -ENOENT;
577 		goto clear;
578 	}
579 
580 	if (!ns_match(&pidns->ns, (dev_t)dev, ino))
581 		goto clear;
582 
583 	nsdata->pid = task_pid_nr_ns(task, pidns);
584 	nsdata->tgid = task_tgid_nr_ns(task, pidns);
585 	return 0;
586 clear:
587 	memset((void *)nsdata, 0, (size_t) size);
588 	return err;
589 }
590 
591 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
592 	.func		= bpf_get_ns_current_pid_tgid,
593 	.gpl_only	= false,
594 	.ret_type	= RET_INTEGER,
595 	.arg1_type	= ARG_ANYTHING,
596 	.arg2_type	= ARG_ANYTHING,
597 	.arg3_type      = ARG_PTR_TO_UNINIT_MEM,
598 	.arg4_type      = ARG_CONST_SIZE,
599 };
600 
601 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
602 	.func		= bpf_get_raw_cpu_id,
603 	.gpl_only	= false,
604 	.ret_type	= RET_INTEGER,
605 };
606 
607 BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map,
608 	   u64, flags, void *, data, u64, size)
609 {
610 	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
611 		return -EINVAL;
612 
613 	return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
614 }
615 
616 const struct bpf_func_proto bpf_event_output_data_proto =  {
617 	.func		= bpf_event_output_data,
618 	.gpl_only       = true,
619 	.ret_type       = RET_INTEGER,
620 	.arg1_type      = ARG_PTR_TO_CTX,
621 	.arg2_type      = ARG_CONST_MAP_PTR,
622 	.arg3_type      = ARG_ANYTHING,
623 	.arg4_type      = ARG_PTR_TO_MEM,
624 	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
625 };
626 
627 BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
628 	   const void __user *, user_ptr)
629 {
630 	int ret = copy_from_user(dst, user_ptr, size);
631 
632 	if (unlikely(ret)) {
633 		memset(dst, 0, size);
634 		ret = -EFAULT;
635 	}
636 
637 	return ret;
638 }
639 
640 const struct bpf_func_proto bpf_copy_from_user_proto = {
641 	.func		= bpf_copy_from_user,
642 	.gpl_only	= false,
643 	.ret_type	= RET_INTEGER,
644 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
645 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
646 	.arg3_type	= ARG_ANYTHING,
647 };
648 
649 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
650 {
651 	if (cpu >= nr_cpu_ids)
652 		return (unsigned long)NULL;
653 
654 	return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
655 }
656 
657 const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
658 	.func		= bpf_per_cpu_ptr,
659 	.gpl_only	= false,
660 	.ret_type	= RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL,
661 	.arg1_type	= ARG_PTR_TO_PERCPU_BTF_ID,
662 	.arg2_type	= ARG_ANYTHING,
663 };
664 
665 BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
666 {
667 	return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
668 }
669 
670 const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
671 	.func		= bpf_this_cpu_ptr,
672 	.gpl_only	= false,
673 	.ret_type	= RET_PTR_TO_MEM_OR_BTF_ID,
674 	.arg1_type	= ARG_PTR_TO_PERCPU_BTF_ID,
675 };
676 
677 static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
678 		size_t bufsz)
679 {
680 	void __user *user_ptr = (__force void __user *)unsafe_ptr;
681 
682 	buf[0] = 0;
683 
684 	switch (fmt_ptype) {
685 	case 's':
686 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
687 		if ((unsigned long)unsafe_ptr < TASK_SIZE)
688 			return strncpy_from_user_nofault(buf, user_ptr, bufsz);
689 		fallthrough;
690 #endif
691 	case 'k':
692 		return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
693 	case 'u':
694 		return strncpy_from_user_nofault(buf, user_ptr, bufsz);
695 	}
696 
697 	return -EINVAL;
698 }
699 
700 /* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
701  * arguments representation.
702  */
703 #define MAX_BPRINTF_BUF_LEN	512
704 
705 /* Support executing three nested bprintf helper calls on a given CPU */
706 #define MAX_BPRINTF_NEST_LEVEL	3
707 struct bpf_bprintf_buffers {
708 	char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN];
709 };
710 static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
711 static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
712 
713 static int try_get_fmt_tmp_buf(char **tmp_buf)
714 {
715 	struct bpf_bprintf_buffers *bufs;
716 	int nest_level;
717 
718 	preempt_disable();
719 	nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
720 	if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
721 		this_cpu_dec(bpf_bprintf_nest_level);
722 		preempt_enable();
723 		return -EBUSY;
724 	}
725 	bufs = this_cpu_ptr(&bpf_bprintf_bufs);
726 	*tmp_buf = bufs->tmp_bufs[nest_level - 1];
727 
728 	return 0;
729 }
730 
731 void bpf_bprintf_cleanup(void)
732 {
733 	if (this_cpu_read(bpf_bprintf_nest_level)) {
734 		this_cpu_dec(bpf_bprintf_nest_level);
735 		preempt_enable();
736 	}
737 }
738 
739 /*
740  * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers
741  *
742  * Returns a negative value if fmt is an invalid format string or 0 otherwise.
743  *
744  * This can be used in two ways:
745  * - Format string verification only: when bin_args is NULL
746  * - Arguments preparation: in addition to the above verification, it writes in
747  *   bin_args a binary representation of arguments usable by bstr_printf where
748  *   pointers from BPF have been sanitized.
749  *
750  * In argument preparation mode, if 0 is returned, safe temporary buffers are
751  * allocated and bpf_bprintf_cleanup should be called to free them after use.
752  */
753 int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
754 			u32 **bin_args, u32 num_args)
755 {
756 	char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
757 	size_t sizeof_cur_arg, sizeof_cur_ip;
758 	int err, i, num_spec = 0;
759 	u64 cur_arg;
760 	char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX";
761 
762 	fmt_end = strnchr(fmt, fmt_size, 0);
763 	if (!fmt_end)
764 		return -EINVAL;
765 	fmt_size = fmt_end - fmt;
766 
767 	if (bin_args) {
768 		if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
769 			return -EBUSY;
770 
771 		tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN;
772 		*bin_args = (u32 *)tmp_buf;
773 	}
774 
775 	for (i = 0; i < fmt_size; i++) {
776 		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
777 			err = -EINVAL;
778 			goto out;
779 		}
780 
781 		if (fmt[i] != '%')
782 			continue;
783 
784 		if (fmt[i + 1] == '%') {
785 			i++;
786 			continue;
787 		}
788 
789 		if (num_spec >= num_args) {
790 			err = -EINVAL;
791 			goto out;
792 		}
793 
794 		/* The string is zero-terminated so if fmt[i] != 0, we can
795 		 * always access fmt[i + 1], in the worst case it will be a 0
796 		 */
797 		i++;
798 
799 		/* skip optional "[0 +-][num]" width formatting field */
800 		while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
801 		       fmt[i] == ' ')
802 			i++;
803 		if (fmt[i] >= '1' && fmt[i] <= '9') {
804 			i++;
805 			while (fmt[i] >= '0' && fmt[i] <= '9')
806 				i++;
807 		}
808 
809 		if (fmt[i] == 'p') {
810 			sizeof_cur_arg = sizeof(long);
811 
812 			if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
813 			    fmt[i + 2] == 's') {
814 				fmt_ptype = fmt[i + 1];
815 				i += 2;
816 				goto fmt_str;
817 			}
818 
819 			if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
820 			    ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
821 			    fmt[i + 1] == 'x' || fmt[i + 1] == 's' ||
822 			    fmt[i + 1] == 'S') {
823 				/* just kernel pointers */
824 				if (tmp_buf)
825 					cur_arg = raw_args[num_spec];
826 				i++;
827 				goto nocopy_fmt;
828 			}
829 
830 			if (fmt[i + 1] == 'B') {
831 				if (tmp_buf)  {
832 					err = snprintf(tmp_buf,
833 						       (tmp_buf_end - tmp_buf),
834 						       "%pB",
835 						       (void *)(long)raw_args[num_spec]);
836 					tmp_buf += (err + 1);
837 				}
838 
839 				i++;
840 				num_spec++;
841 				continue;
842 			}
843 
844 			/* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
845 			if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
846 			    (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
847 				err = -EINVAL;
848 				goto out;
849 			}
850 
851 			i += 2;
852 			if (!tmp_buf)
853 				goto nocopy_fmt;
854 
855 			sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16;
856 			if (tmp_buf_end - tmp_buf < sizeof_cur_ip) {
857 				err = -ENOSPC;
858 				goto out;
859 			}
860 
861 			unsafe_ptr = (char *)(long)raw_args[num_spec];
862 			err = copy_from_kernel_nofault(cur_ip, unsafe_ptr,
863 						       sizeof_cur_ip);
864 			if (err < 0)
865 				memset(cur_ip, 0, sizeof_cur_ip);
866 
867 			/* hack: bstr_printf expects IP addresses to be
868 			 * pre-formatted as strings, ironically, the easiest way
869 			 * to do that is to call snprintf.
870 			 */
871 			ip_spec[2] = fmt[i - 1];
872 			ip_spec[3] = fmt[i];
873 			err = snprintf(tmp_buf, tmp_buf_end - tmp_buf,
874 				       ip_spec, &cur_ip);
875 
876 			tmp_buf += err + 1;
877 			num_spec++;
878 
879 			continue;
880 		} else if (fmt[i] == 's') {
881 			fmt_ptype = fmt[i];
882 fmt_str:
883 			if (fmt[i + 1] != 0 &&
884 			    !isspace(fmt[i + 1]) &&
885 			    !ispunct(fmt[i + 1])) {
886 				err = -EINVAL;
887 				goto out;
888 			}
889 
890 			if (!tmp_buf)
891 				goto nocopy_fmt;
892 
893 			if (tmp_buf_end == tmp_buf) {
894 				err = -ENOSPC;
895 				goto out;
896 			}
897 
898 			unsafe_ptr = (char *)(long)raw_args[num_spec];
899 			err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
900 						    fmt_ptype,
901 						    tmp_buf_end - tmp_buf);
902 			if (err < 0) {
903 				tmp_buf[0] = '\0';
904 				err = 1;
905 			}
906 
907 			tmp_buf += err;
908 			num_spec++;
909 
910 			continue;
911 		}
912 
913 		sizeof_cur_arg = sizeof(int);
914 
915 		if (fmt[i] == 'l') {
916 			sizeof_cur_arg = sizeof(long);
917 			i++;
918 		}
919 		if (fmt[i] == 'l') {
920 			sizeof_cur_arg = sizeof(long long);
921 			i++;
922 		}
923 
924 		if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
925 		    fmt[i] != 'x' && fmt[i] != 'X') {
926 			err = -EINVAL;
927 			goto out;
928 		}
929 
930 		if (tmp_buf)
931 			cur_arg = raw_args[num_spec];
932 nocopy_fmt:
933 		if (tmp_buf) {
934 			tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32));
935 			if (tmp_buf_end - tmp_buf < sizeof_cur_arg) {
936 				err = -ENOSPC;
937 				goto out;
938 			}
939 
940 			if (sizeof_cur_arg == 8) {
941 				*(u32 *)tmp_buf = *(u32 *)&cur_arg;
942 				*(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1);
943 			} else {
944 				*(u32 *)tmp_buf = (u32)(long)cur_arg;
945 			}
946 			tmp_buf += sizeof_cur_arg;
947 		}
948 		num_spec++;
949 	}
950 
951 	err = 0;
952 out:
953 	if (err)
954 		bpf_bprintf_cleanup();
955 	return err;
956 }
957 
958 #define MAX_SNPRINTF_VARARGS		12
959 
960 BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
961 	   const void *, data, u32, data_len)
962 {
963 	int err, num_args;
964 	u32 *bin_args;
965 
966 	if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 ||
967 	    (data_len && !data))
968 		return -EINVAL;
969 	num_args = data_len / 8;
970 
971 	/* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
972 	 * can safely give an unbounded size.
973 	 */
974 	err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args);
975 	if (err < 0)
976 		return err;
977 
978 	err = bstr_printf(str, str_size, fmt, bin_args);
979 
980 	bpf_bprintf_cleanup();
981 
982 	return err + 1;
983 }
984 
985 const struct bpf_func_proto bpf_snprintf_proto = {
986 	.func		= bpf_snprintf,
987 	.gpl_only	= true,
988 	.ret_type	= RET_INTEGER,
989 	.arg1_type	= ARG_PTR_TO_MEM_OR_NULL,
990 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
991 	.arg3_type	= ARG_PTR_TO_CONST_STR,
992 	.arg4_type	= ARG_PTR_TO_MEM_OR_NULL,
993 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
994 };
995 
996 /* BPF map elements can contain 'struct bpf_timer'.
997  * Such map owns all of its BPF timers.
998  * 'struct bpf_timer' is allocated as part of map element allocation
999  * and it's zero initialized.
1000  * That space is used to keep 'struct bpf_timer_kern'.
1001  * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and
1002  * remembers 'struct bpf_map *' pointer it's part of.
1003  * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn.
1004  * bpf_timer_start() arms the timer.
1005  * If user space reference to a map goes to zero at this point
1006  * ops->map_release_uref callback is responsible for cancelling the timers,
1007  * freeing their memory, and decrementing prog's refcnts.
1008  * bpf_timer_cancel() cancels the timer and decrements prog's refcnt.
1009  * Inner maps can contain bpf timers as well. ops->map_release_uref is
1010  * freeing the timers when inner map is replaced or deleted by user space.
1011  */
1012 struct bpf_hrtimer {
1013 	struct hrtimer timer;
1014 	struct bpf_map *map;
1015 	struct bpf_prog *prog;
1016 	void __rcu *callback_fn;
1017 	void *value;
1018 };
1019 
1020 /* the actual struct hidden inside uapi struct bpf_timer */
1021 struct bpf_timer_kern {
1022 	struct bpf_hrtimer *timer;
1023 	/* bpf_spin_lock is used here instead of spinlock_t to make
1024 	 * sure that it always fits into space resereved by struct bpf_timer
1025 	 * regardless of LOCKDEP and spinlock debug flags.
1026 	 */
1027 	struct bpf_spin_lock lock;
1028 } __attribute__((aligned(8)));
1029 
1030 static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
1031 
1032 static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
1033 {
1034 	struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
1035 	struct bpf_map *map = t->map;
1036 	void *value = t->value;
1037 	void *callback_fn;
1038 	void *key;
1039 	u32 idx;
1040 
1041 	callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held());
1042 	if (!callback_fn)
1043 		goto out;
1044 
1045 	/* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and
1046 	 * cannot be preempted by another bpf_timer_cb() on the same cpu.
1047 	 * Remember the timer this callback is servicing to prevent
1048 	 * deadlock if callback_fn() calls bpf_timer_cancel() or
1049 	 * bpf_map_delete_elem() on the same timer.
1050 	 */
1051 	this_cpu_write(hrtimer_running, t);
1052 	if (map->map_type == BPF_MAP_TYPE_ARRAY) {
1053 		struct bpf_array *array = container_of(map, struct bpf_array, map);
1054 
1055 		/* compute the key */
1056 		idx = ((char *)value - array->value) / array->elem_size;
1057 		key = &idx;
1058 	} else { /* hash or lru */
1059 		key = value - round_up(map->key_size, 8);
1060 	}
1061 
1062 	BPF_CAST_CALL(callback_fn)((u64)(long)map, (u64)(long)key,
1063 				   (u64)(long)value, 0, 0);
1064 	/* The verifier checked that return value is zero. */
1065 
1066 	this_cpu_write(hrtimer_running, NULL);
1067 out:
1068 	return HRTIMER_NORESTART;
1069 }
1070 
1071 BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map,
1072 	   u64, flags)
1073 {
1074 	clockid_t clockid = flags & (MAX_CLOCKS - 1);
1075 	struct bpf_hrtimer *t;
1076 	int ret = 0;
1077 
1078 	BUILD_BUG_ON(MAX_CLOCKS != 16);
1079 	BUILD_BUG_ON(sizeof(struct bpf_timer_kern) > sizeof(struct bpf_timer));
1080 	BUILD_BUG_ON(__alignof__(struct bpf_timer_kern) != __alignof__(struct bpf_timer));
1081 
1082 	if (in_nmi())
1083 		return -EOPNOTSUPP;
1084 
1085 	if (flags >= MAX_CLOCKS ||
1086 	    /* similar to timerfd except _ALARM variants are not supported */
1087 	    (clockid != CLOCK_MONOTONIC &&
1088 	     clockid != CLOCK_REALTIME &&
1089 	     clockid != CLOCK_BOOTTIME))
1090 		return -EINVAL;
1091 	__bpf_spin_lock_irqsave(&timer->lock);
1092 	t = timer->timer;
1093 	if (t) {
1094 		ret = -EBUSY;
1095 		goto out;
1096 	}
1097 	if (!atomic64_read(&map->usercnt)) {
1098 		/* maps with timers must be either held by user space
1099 		 * or pinned in bpffs.
1100 		 */
1101 		ret = -EPERM;
1102 		goto out;
1103 	}
1104 	/* allocate hrtimer via map_kmalloc to use memcg accounting */
1105 	t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node);
1106 	if (!t) {
1107 		ret = -ENOMEM;
1108 		goto out;
1109 	}
1110 	t->value = (void *)timer - map->timer_off;
1111 	t->map = map;
1112 	t->prog = NULL;
1113 	rcu_assign_pointer(t->callback_fn, NULL);
1114 	hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
1115 	t->timer.function = bpf_timer_cb;
1116 	timer->timer = t;
1117 out:
1118 	__bpf_spin_unlock_irqrestore(&timer->lock);
1119 	return ret;
1120 }
1121 
1122 static const struct bpf_func_proto bpf_timer_init_proto = {
1123 	.func		= bpf_timer_init,
1124 	.gpl_only	= true,
1125 	.ret_type	= RET_INTEGER,
1126 	.arg1_type	= ARG_PTR_TO_TIMER,
1127 	.arg2_type	= ARG_CONST_MAP_PTR,
1128 	.arg3_type	= ARG_ANYTHING,
1129 };
1130 
1131 BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn,
1132 	   struct bpf_prog_aux *, aux)
1133 {
1134 	struct bpf_prog *prev, *prog = aux->prog;
1135 	struct bpf_hrtimer *t;
1136 	int ret = 0;
1137 
1138 	if (in_nmi())
1139 		return -EOPNOTSUPP;
1140 	__bpf_spin_lock_irqsave(&timer->lock);
1141 	t = timer->timer;
1142 	if (!t) {
1143 		ret = -EINVAL;
1144 		goto out;
1145 	}
1146 	if (!atomic64_read(&t->map->usercnt)) {
1147 		/* maps with timers must be either held by user space
1148 		 * or pinned in bpffs. Otherwise timer might still be
1149 		 * running even when bpf prog is detached and user space
1150 		 * is gone, since map_release_uref won't ever be called.
1151 		 */
1152 		ret = -EPERM;
1153 		goto out;
1154 	}
1155 	prev = t->prog;
1156 	if (prev != prog) {
1157 		/* Bump prog refcnt once. Every bpf_timer_set_callback()
1158 		 * can pick different callback_fn-s within the same prog.
1159 		 */
1160 		prog = bpf_prog_inc_not_zero(prog);
1161 		if (IS_ERR(prog)) {
1162 			ret = PTR_ERR(prog);
1163 			goto out;
1164 		}
1165 		if (prev)
1166 			/* Drop prev prog refcnt when swapping with new prog */
1167 			bpf_prog_put(prev);
1168 		t->prog = prog;
1169 	}
1170 	rcu_assign_pointer(t->callback_fn, callback_fn);
1171 out:
1172 	__bpf_spin_unlock_irqrestore(&timer->lock);
1173 	return ret;
1174 }
1175 
1176 static const struct bpf_func_proto bpf_timer_set_callback_proto = {
1177 	.func		= bpf_timer_set_callback,
1178 	.gpl_only	= true,
1179 	.ret_type	= RET_INTEGER,
1180 	.arg1_type	= ARG_PTR_TO_TIMER,
1181 	.arg2_type	= ARG_PTR_TO_FUNC,
1182 };
1183 
1184 BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, flags)
1185 {
1186 	struct bpf_hrtimer *t;
1187 	int ret = 0;
1188 
1189 	if (in_nmi())
1190 		return -EOPNOTSUPP;
1191 	if (flags)
1192 		return -EINVAL;
1193 	__bpf_spin_lock_irqsave(&timer->lock);
1194 	t = timer->timer;
1195 	if (!t || !t->prog) {
1196 		ret = -EINVAL;
1197 		goto out;
1198 	}
1199 	hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT);
1200 out:
1201 	__bpf_spin_unlock_irqrestore(&timer->lock);
1202 	return ret;
1203 }
1204 
1205 static const struct bpf_func_proto bpf_timer_start_proto = {
1206 	.func		= bpf_timer_start,
1207 	.gpl_only	= true,
1208 	.ret_type	= RET_INTEGER,
1209 	.arg1_type	= ARG_PTR_TO_TIMER,
1210 	.arg2_type	= ARG_ANYTHING,
1211 	.arg3_type	= ARG_ANYTHING,
1212 };
1213 
1214 static void drop_prog_refcnt(struct bpf_hrtimer *t)
1215 {
1216 	struct bpf_prog *prog = t->prog;
1217 
1218 	if (prog) {
1219 		bpf_prog_put(prog);
1220 		t->prog = NULL;
1221 		rcu_assign_pointer(t->callback_fn, NULL);
1222 	}
1223 }
1224 
1225 BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer)
1226 {
1227 	struct bpf_hrtimer *t;
1228 	int ret = 0;
1229 
1230 	if (in_nmi())
1231 		return -EOPNOTSUPP;
1232 	__bpf_spin_lock_irqsave(&timer->lock);
1233 	t = timer->timer;
1234 	if (!t) {
1235 		ret = -EINVAL;
1236 		goto out;
1237 	}
1238 	if (this_cpu_read(hrtimer_running) == t) {
1239 		/* If bpf callback_fn is trying to bpf_timer_cancel()
1240 		 * its own timer the hrtimer_cancel() will deadlock
1241 		 * since it waits for callback_fn to finish
1242 		 */
1243 		ret = -EDEADLK;
1244 		goto out;
1245 	}
1246 	drop_prog_refcnt(t);
1247 out:
1248 	__bpf_spin_unlock_irqrestore(&timer->lock);
1249 	/* Cancel the timer and wait for associated callback to finish
1250 	 * if it was running.
1251 	 */
1252 	ret = ret ?: hrtimer_cancel(&t->timer);
1253 	return ret;
1254 }
1255 
1256 static const struct bpf_func_proto bpf_timer_cancel_proto = {
1257 	.func		= bpf_timer_cancel,
1258 	.gpl_only	= true,
1259 	.ret_type	= RET_INTEGER,
1260 	.arg1_type	= ARG_PTR_TO_TIMER,
1261 };
1262 
1263 /* This function is called by map_delete/update_elem for individual element and
1264  * by ops->map_release_uref when the user space reference to a map reaches zero.
1265  */
1266 void bpf_timer_cancel_and_free(void *val)
1267 {
1268 	struct bpf_timer_kern *timer = val;
1269 	struct bpf_hrtimer *t;
1270 
1271 	/* Performance optimization: read timer->timer without lock first. */
1272 	if (!READ_ONCE(timer->timer))
1273 		return;
1274 
1275 	__bpf_spin_lock_irqsave(&timer->lock);
1276 	/* re-read it under lock */
1277 	t = timer->timer;
1278 	if (!t)
1279 		goto out;
1280 	drop_prog_refcnt(t);
1281 	/* The subsequent bpf_timer_start/cancel() helpers won't be able to use
1282 	 * this timer, since it won't be initialized.
1283 	 */
1284 	timer->timer = NULL;
1285 out:
1286 	__bpf_spin_unlock_irqrestore(&timer->lock);
1287 	if (!t)
1288 		return;
1289 	/* Cancel the timer and wait for callback to complete if it was running.
1290 	 * If hrtimer_cancel() can be safely called it's safe to call kfree(t)
1291 	 * right after for both preallocated and non-preallocated maps.
1292 	 * The timer->timer = NULL was already done and no code path can
1293 	 * see address 't' anymore.
1294 	 *
1295 	 * Check that bpf_map_delete/update_elem() wasn't called from timer
1296 	 * callback_fn. In such case don't call hrtimer_cancel() (since it will
1297 	 * deadlock) and don't call hrtimer_try_to_cancel() (since it will just
1298 	 * return -1). Though callback_fn is still running on this cpu it's
1299 	 * safe to do kfree(t) because bpf_timer_cb() read everything it needed
1300 	 * from 't'. The bpf subprog callback_fn won't be able to access 't',
1301 	 * since timer->timer = NULL was already done. The timer will be
1302 	 * effectively cancelled because bpf_timer_cb() will return
1303 	 * HRTIMER_NORESTART.
1304 	 */
1305 	if (this_cpu_read(hrtimer_running) != t)
1306 		hrtimer_cancel(&t->timer);
1307 	kfree(t);
1308 }
1309 
1310 const struct bpf_func_proto bpf_get_current_task_proto __weak;
1311 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
1312 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
1313 const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
1314 const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
1315 
1316 const struct bpf_func_proto *
1317 bpf_base_func_proto(enum bpf_func_id func_id)
1318 {
1319 	switch (func_id) {
1320 	case BPF_FUNC_map_lookup_elem:
1321 		return &bpf_map_lookup_elem_proto;
1322 	case BPF_FUNC_map_update_elem:
1323 		return &bpf_map_update_elem_proto;
1324 	case BPF_FUNC_map_delete_elem:
1325 		return &bpf_map_delete_elem_proto;
1326 	case BPF_FUNC_map_push_elem:
1327 		return &bpf_map_push_elem_proto;
1328 	case BPF_FUNC_map_pop_elem:
1329 		return &bpf_map_pop_elem_proto;
1330 	case BPF_FUNC_map_peek_elem:
1331 		return &bpf_map_peek_elem_proto;
1332 	case BPF_FUNC_get_prandom_u32:
1333 		return &bpf_get_prandom_u32_proto;
1334 	case BPF_FUNC_get_smp_processor_id:
1335 		return &bpf_get_raw_smp_processor_id_proto;
1336 	case BPF_FUNC_get_numa_node_id:
1337 		return &bpf_get_numa_node_id_proto;
1338 	case BPF_FUNC_tail_call:
1339 		return &bpf_tail_call_proto;
1340 	case BPF_FUNC_ktime_get_ns:
1341 		return &bpf_ktime_get_ns_proto;
1342 	case BPF_FUNC_ktime_get_boot_ns:
1343 		return &bpf_ktime_get_boot_ns_proto;
1344 	case BPF_FUNC_ktime_get_coarse_ns:
1345 		return &bpf_ktime_get_coarse_ns_proto;
1346 	case BPF_FUNC_ringbuf_output:
1347 		return &bpf_ringbuf_output_proto;
1348 	case BPF_FUNC_ringbuf_reserve:
1349 		return &bpf_ringbuf_reserve_proto;
1350 	case BPF_FUNC_ringbuf_submit:
1351 		return &bpf_ringbuf_submit_proto;
1352 	case BPF_FUNC_ringbuf_discard:
1353 		return &bpf_ringbuf_discard_proto;
1354 	case BPF_FUNC_ringbuf_query:
1355 		return &bpf_ringbuf_query_proto;
1356 	case BPF_FUNC_for_each_map_elem:
1357 		return &bpf_for_each_map_elem_proto;
1358 	default:
1359 		break;
1360 	}
1361 
1362 	if (!bpf_capable())
1363 		return NULL;
1364 
1365 	switch (func_id) {
1366 	case BPF_FUNC_spin_lock:
1367 		return &bpf_spin_lock_proto;
1368 	case BPF_FUNC_spin_unlock:
1369 		return &bpf_spin_unlock_proto;
1370 	case BPF_FUNC_jiffies64:
1371 		return &bpf_jiffies64_proto;
1372 	case BPF_FUNC_per_cpu_ptr:
1373 		return &bpf_per_cpu_ptr_proto;
1374 	case BPF_FUNC_this_cpu_ptr:
1375 		return &bpf_this_cpu_ptr_proto;
1376 	case BPF_FUNC_timer_init:
1377 		return &bpf_timer_init_proto;
1378 	case BPF_FUNC_timer_set_callback:
1379 		return &bpf_timer_set_callback_proto;
1380 	case BPF_FUNC_timer_start:
1381 		return &bpf_timer_start_proto;
1382 	case BPF_FUNC_timer_cancel:
1383 		return &bpf_timer_cancel_proto;
1384 	default:
1385 		break;
1386 	}
1387 
1388 	if (!perfmon_capable())
1389 		return NULL;
1390 
1391 	switch (func_id) {
1392 	case BPF_FUNC_trace_printk:
1393 		return bpf_get_trace_printk_proto();
1394 	case BPF_FUNC_get_current_task:
1395 		return &bpf_get_current_task_proto;
1396 	case BPF_FUNC_probe_read_user:
1397 		return &bpf_probe_read_user_proto;
1398 	case BPF_FUNC_probe_read_kernel:
1399 		return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
1400 		       NULL : &bpf_probe_read_kernel_proto;
1401 	case BPF_FUNC_probe_read_user_str:
1402 		return &bpf_probe_read_user_str_proto;
1403 	case BPF_FUNC_probe_read_kernel_str:
1404 		return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
1405 		       NULL : &bpf_probe_read_kernel_str_proto;
1406 	case BPF_FUNC_snprintf_btf:
1407 		return &bpf_snprintf_btf_proto;
1408 	case BPF_FUNC_snprintf:
1409 		return &bpf_snprintf_proto;
1410 	default:
1411 		return NULL;
1412 	}
1413 }
1414