xref: /openbmc/linux/kernel/bpf/helpers.c (revision fa5d824ce5dd8306c66f45c34fd78536e6ce2488)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  */
4 #include <linux/bpf.h>
5 #include <linux/bpf-cgroup.h>
6 #include <linux/rcupdate.h>
7 #include <linux/random.h>
8 #include <linux/smp.h>
9 #include <linux/topology.h>
10 #include <linux/ktime.h>
11 #include <linux/sched.h>
12 #include <linux/uidgid.h>
13 #include <linux/filter.h>
14 #include <linux/ctype.h>
15 #include <linux/jiffies.h>
16 #include <linux/pid_namespace.h>
17 #include <linux/proc_ns.h>
18 #include <linux/security.h>
19 #include <linux/btf_ids.h>
20 
21 #include "../../lib/kstrtox.h"
22 
23 /* If kernel subsystem is allowing eBPF programs to call this function,
24  * inside its own verifier_ops->get_func_proto() callback it should return
25  * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
26  *
27  * Different map implementations will rely on rcu in map methods
28  * lookup/update/delete, therefore eBPF programs must run under rcu lock
29  * if program is allowed to access maps, so check rcu_read_lock_held in
30  * all three functions.
31  */
32 BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
33 {
34 	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
35 	return (unsigned long) map->ops->map_lookup_elem(map, key);
36 }
37 
38 const struct bpf_func_proto bpf_map_lookup_elem_proto = {
39 	.func		= bpf_map_lookup_elem,
40 	.gpl_only	= false,
41 	.pkt_access	= true,
42 	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
43 	.arg1_type	= ARG_CONST_MAP_PTR,
44 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
45 };
46 
47 BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
48 	   void *, value, u64, flags)
49 {
50 	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
51 	return map->ops->map_update_elem(map, key, value, flags);
52 }
53 
54 const struct bpf_func_proto bpf_map_update_elem_proto = {
55 	.func		= bpf_map_update_elem,
56 	.gpl_only	= false,
57 	.pkt_access	= true,
58 	.ret_type	= RET_INTEGER,
59 	.arg1_type	= ARG_CONST_MAP_PTR,
60 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
61 	.arg3_type	= ARG_PTR_TO_MAP_VALUE,
62 	.arg4_type	= ARG_ANYTHING,
63 };
64 
65 BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
66 {
67 	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
68 	return map->ops->map_delete_elem(map, key);
69 }
70 
71 const struct bpf_func_proto bpf_map_delete_elem_proto = {
72 	.func		= bpf_map_delete_elem,
73 	.gpl_only	= false,
74 	.pkt_access	= true,
75 	.ret_type	= RET_INTEGER,
76 	.arg1_type	= ARG_CONST_MAP_PTR,
77 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
78 };
79 
80 BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
81 {
82 	return map->ops->map_push_elem(map, value, flags);
83 }
84 
85 const struct bpf_func_proto bpf_map_push_elem_proto = {
86 	.func		= bpf_map_push_elem,
87 	.gpl_only	= false,
88 	.pkt_access	= true,
89 	.ret_type	= RET_INTEGER,
90 	.arg1_type	= ARG_CONST_MAP_PTR,
91 	.arg2_type	= ARG_PTR_TO_MAP_VALUE,
92 	.arg3_type	= ARG_ANYTHING,
93 };
94 
95 BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
96 {
97 	return map->ops->map_pop_elem(map, value);
98 }
99 
100 const struct bpf_func_proto bpf_map_pop_elem_proto = {
101 	.func		= bpf_map_pop_elem,
102 	.gpl_only	= false,
103 	.ret_type	= RET_INTEGER,
104 	.arg1_type	= ARG_CONST_MAP_PTR,
105 	.arg2_type	= ARG_PTR_TO_UNINIT_MAP_VALUE,
106 };
107 
108 BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
109 {
110 	return map->ops->map_peek_elem(map, value);
111 }
112 
113 const struct bpf_func_proto bpf_map_peek_elem_proto = {
114 	.func		= bpf_map_peek_elem,
115 	.gpl_only	= false,
116 	.ret_type	= RET_INTEGER,
117 	.arg1_type	= ARG_CONST_MAP_PTR,
118 	.arg2_type	= ARG_PTR_TO_UNINIT_MAP_VALUE,
119 };
120 
121 const struct bpf_func_proto bpf_get_prandom_u32_proto = {
122 	.func		= bpf_user_rnd_u32,
123 	.gpl_only	= false,
124 	.ret_type	= RET_INTEGER,
125 };
126 
127 BPF_CALL_0(bpf_get_smp_processor_id)
128 {
129 	return smp_processor_id();
130 }
131 
132 const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
133 	.func		= bpf_get_smp_processor_id,
134 	.gpl_only	= false,
135 	.ret_type	= RET_INTEGER,
136 };
137 
138 BPF_CALL_0(bpf_get_numa_node_id)
139 {
140 	return numa_node_id();
141 }
142 
143 const struct bpf_func_proto bpf_get_numa_node_id_proto = {
144 	.func		= bpf_get_numa_node_id,
145 	.gpl_only	= false,
146 	.ret_type	= RET_INTEGER,
147 };
148 
149 BPF_CALL_0(bpf_ktime_get_ns)
150 {
151 	/* NMI safe access to clock monotonic */
152 	return ktime_get_mono_fast_ns();
153 }
154 
155 const struct bpf_func_proto bpf_ktime_get_ns_proto = {
156 	.func		= bpf_ktime_get_ns,
157 	.gpl_only	= false,
158 	.ret_type	= RET_INTEGER,
159 };
160 
161 BPF_CALL_0(bpf_ktime_get_boot_ns)
162 {
163 	/* NMI safe access to clock boottime */
164 	return ktime_get_boot_fast_ns();
165 }
166 
167 const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
168 	.func		= bpf_ktime_get_boot_ns,
169 	.gpl_only	= false,
170 	.ret_type	= RET_INTEGER,
171 };
172 
173 BPF_CALL_0(bpf_ktime_get_coarse_ns)
174 {
175 	return ktime_get_coarse_ns();
176 }
177 
178 const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = {
179 	.func		= bpf_ktime_get_coarse_ns,
180 	.gpl_only	= false,
181 	.ret_type	= RET_INTEGER,
182 };
183 
184 BPF_CALL_0(bpf_get_current_pid_tgid)
185 {
186 	struct task_struct *task = current;
187 
188 	if (unlikely(!task))
189 		return -EINVAL;
190 
191 	return (u64) task->tgid << 32 | task->pid;
192 }
193 
194 const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
195 	.func		= bpf_get_current_pid_tgid,
196 	.gpl_only	= false,
197 	.ret_type	= RET_INTEGER,
198 };
199 
200 BPF_CALL_0(bpf_get_current_uid_gid)
201 {
202 	struct task_struct *task = current;
203 	kuid_t uid;
204 	kgid_t gid;
205 
206 	if (unlikely(!task))
207 		return -EINVAL;
208 
209 	current_uid_gid(&uid, &gid);
210 	return (u64) from_kgid(&init_user_ns, gid) << 32 |
211 		     from_kuid(&init_user_ns, uid);
212 }
213 
214 const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
215 	.func		= bpf_get_current_uid_gid,
216 	.gpl_only	= false,
217 	.ret_type	= RET_INTEGER,
218 };
219 
220 BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
221 {
222 	struct task_struct *task = current;
223 
224 	if (unlikely(!task))
225 		goto err_clear;
226 
227 	strncpy(buf, task->comm, size);
228 
229 	/* Verifier guarantees that size > 0. For task->comm exceeding
230 	 * size, guarantee that buf is %NUL-terminated. Unconditionally
231 	 * done here to save the size test.
232 	 */
233 	buf[size - 1] = 0;
234 	return 0;
235 err_clear:
236 	memset(buf, 0, size);
237 	return -EINVAL;
238 }
239 
240 const struct bpf_func_proto bpf_get_current_comm_proto = {
241 	.func		= bpf_get_current_comm,
242 	.gpl_only	= false,
243 	.ret_type	= RET_INTEGER,
244 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
245 	.arg2_type	= ARG_CONST_SIZE,
246 };
247 
248 #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
249 
250 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
251 {
252 	arch_spinlock_t *l = (void *)lock;
253 	union {
254 		__u32 val;
255 		arch_spinlock_t lock;
256 	} u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
257 
258 	compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
259 	BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
260 	BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
261 	arch_spin_lock(l);
262 }
263 
264 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
265 {
266 	arch_spinlock_t *l = (void *)lock;
267 
268 	arch_spin_unlock(l);
269 }
270 
271 #else
272 
273 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
274 {
275 	atomic_t *l = (void *)lock;
276 
277 	BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
278 	do {
279 		atomic_cond_read_relaxed(l, !VAL);
280 	} while (atomic_xchg(l, 1));
281 }
282 
283 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
284 {
285 	atomic_t *l = (void *)lock;
286 
287 	atomic_set_release(l, 0);
288 }
289 
290 #endif
291 
292 static DEFINE_PER_CPU(unsigned long, irqsave_flags);
293 
294 static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
295 {
296 	unsigned long flags;
297 
298 	local_irq_save(flags);
299 	__bpf_spin_lock(lock);
300 	__this_cpu_write(irqsave_flags, flags);
301 }
302 
303 notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
304 {
305 	__bpf_spin_lock_irqsave(lock);
306 	return 0;
307 }
308 
309 const struct bpf_func_proto bpf_spin_lock_proto = {
310 	.func		= bpf_spin_lock,
311 	.gpl_only	= false,
312 	.ret_type	= RET_VOID,
313 	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
314 };
315 
316 static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
317 {
318 	unsigned long flags;
319 
320 	flags = __this_cpu_read(irqsave_flags);
321 	__bpf_spin_unlock(lock);
322 	local_irq_restore(flags);
323 }
324 
325 notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
326 {
327 	__bpf_spin_unlock_irqrestore(lock);
328 	return 0;
329 }
330 
331 const struct bpf_func_proto bpf_spin_unlock_proto = {
332 	.func		= bpf_spin_unlock,
333 	.gpl_only	= false,
334 	.ret_type	= RET_VOID,
335 	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
336 };
337 
338 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
339 			   bool lock_src)
340 {
341 	struct bpf_spin_lock *lock;
342 
343 	if (lock_src)
344 		lock = src + map->spin_lock_off;
345 	else
346 		lock = dst + map->spin_lock_off;
347 	preempt_disable();
348 	__bpf_spin_lock_irqsave(lock);
349 	copy_map_value(map, dst, src);
350 	__bpf_spin_unlock_irqrestore(lock);
351 	preempt_enable();
352 }
353 
354 BPF_CALL_0(bpf_jiffies64)
355 {
356 	return get_jiffies_64();
357 }
358 
359 const struct bpf_func_proto bpf_jiffies64_proto = {
360 	.func		= bpf_jiffies64,
361 	.gpl_only	= false,
362 	.ret_type	= RET_INTEGER,
363 };
364 
365 #ifdef CONFIG_CGROUPS
366 BPF_CALL_0(bpf_get_current_cgroup_id)
367 {
368 	struct cgroup *cgrp;
369 	u64 cgrp_id;
370 
371 	rcu_read_lock();
372 	cgrp = task_dfl_cgroup(current);
373 	cgrp_id = cgroup_id(cgrp);
374 	rcu_read_unlock();
375 
376 	return cgrp_id;
377 }
378 
379 const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
380 	.func		= bpf_get_current_cgroup_id,
381 	.gpl_only	= false,
382 	.ret_type	= RET_INTEGER,
383 };
384 
385 BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level)
386 {
387 	struct cgroup *cgrp;
388 	struct cgroup *ancestor;
389 	u64 cgrp_id;
390 
391 	rcu_read_lock();
392 	cgrp = task_dfl_cgroup(current);
393 	ancestor = cgroup_ancestor(cgrp, ancestor_level);
394 	cgrp_id = ancestor ? cgroup_id(ancestor) : 0;
395 	rcu_read_unlock();
396 
397 	return cgrp_id;
398 }
399 
400 const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
401 	.func		= bpf_get_current_ancestor_cgroup_id,
402 	.gpl_only	= false,
403 	.ret_type	= RET_INTEGER,
404 	.arg1_type	= ARG_ANYTHING,
405 };
406 
407 #ifdef CONFIG_CGROUP_BPF
408 
409 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
410 {
411 	/* flags argument is not used now,
412 	 * but provides an ability to extend the API.
413 	 * verifier checks that its value is correct.
414 	 */
415 	enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
416 	struct bpf_cgroup_storage *storage;
417 	struct bpf_cg_run_ctx *ctx;
418 	void *ptr;
419 
420 	/* get current cgroup storage from BPF run context */
421 	ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
422 	storage = ctx->prog_item->cgroup_storage[stype];
423 
424 	if (stype == BPF_CGROUP_STORAGE_SHARED)
425 		ptr = &READ_ONCE(storage->buf)->data[0];
426 	else
427 		ptr = this_cpu_ptr(storage->percpu_buf);
428 
429 	return (unsigned long)ptr;
430 }
431 
432 const struct bpf_func_proto bpf_get_local_storage_proto = {
433 	.func		= bpf_get_local_storage,
434 	.gpl_only	= false,
435 	.ret_type	= RET_PTR_TO_MAP_VALUE,
436 	.arg1_type	= ARG_CONST_MAP_PTR,
437 	.arg2_type	= ARG_ANYTHING,
438 };
439 #endif
440 
441 #define BPF_STRTOX_BASE_MASK 0x1F
442 
443 static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
444 			  unsigned long long *res, bool *is_negative)
445 {
446 	unsigned int base = flags & BPF_STRTOX_BASE_MASK;
447 	const char *cur_buf = buf;
448 	size_t cur_len = buf_len;
449 	unsigned int consumed;
450 	size_t val_len;
451 	char str[64];
452 
453 	if (!buf || !buf_len || !res || !is_negative)
454 		return -EINVAL;
455 
456 	if (base != 0 && base != 8 && base != 10 && base != 16)
457 		return -EINVAL;
458 
459 	if (flags & ~BPF_STRTOX_BASE_MASK)
460 		return -EINVAL;
461 
462 	while (cur_buf < buf + buf_len && isspace(*cur_buf))
463 		++cur_buf;
464 
465 	*is_negative = (cur_buf < buf + buf_len && *cur_buf == '-');
466 	if (*is_negative)
467 		++cur_buf;
468 
469 	consumed = cur_buf - buf;
470 	cur_len -= consumed;
471 	if (!cur_len)
472 		return -EINVAL;
473 
474 	cur_len = min(cur_len, sizeof(str) - 1);
475 	memcpy(str, cur_buf, cur_len);
476 	str[cur_len] = '\0';
477 	cur_buf = str;
478 
479 	cur_buf = _parse_integer_fixup_radix(cur_buf, &base);
480 	val_len = _parse_integer(cur_buf, base, res);
481 
482 	if (val_len & KSTRTOX_OVERFLOW)
483 		return -ERANGE;
484 
485 	if (val_len == 0)
486 		return -EINVAL;
487 
488 	cur_buf += val_len;
489 	consumed += cur_buf - str;
490 
491 	return consumed;
492 }
493 
494 static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
495 			 long long *res)
496 {
497 	unsigned long long _res;
498 	bool is_negative;
499 	int err;
500 
501 	err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
502 	if (err < 0)
503 		return err;
504 	if (is_negative) {
505 		if ((long long)-_res > 0)
506 			return -ERANGE;
507 		*res = -_res;
508 	} else {
509 		if ((long long)_res < 0)
510 			return -ERANGE;
511 		*res = _res;
512 	}
513 	return err;
514 }
515 
516 BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
517 	   long *, res)
518 {
519 	long long _res;
520 	int err;
521 
522 	err = __bpf_strtoll(buf, buf_len, flags, &_res);
523 	if (err < 0)
524 		return err;
525 	if (_res != (long)_res)
526 		return -ERANGE;
527 	*res = _res;
528 	return err;
529 }
530 
531 const struct bpf_func_proto bpf_strtol_proto = {
532 	.func		= bpf_strtol,
533 	.gpl_only	= false,
534 	.ret_type	= RET_INTEGER,
535 	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
536 	.arg2_type	= ARG_CONST_SIZE,
537 	.arg3_type	= ARG_ANYTHING,
538 	.arg4_type	= ARG_PTR_TO_LONG,
539 };
540 
541 BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
542 	   unsigned long *, res)
543 {
544 	unsigned long long _res;
545 	bool is_negative;
546 	int err;
547 
548 	err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
549 	if (err < 0)
550 		return err;
551 	if (is_negative)
552 		return -EINVAL;
553 	if (_res != (unsigned long)_res)
554 		return -ERANGE;
555 	*res = _res;
556 	return err;
557 }
558 
559 const struct bpf_func_proto bpf_strtoul_proto = {
560 	.func		= bpf_strtoul,
561 	.gpl_only	= false,
562 	.ret_type	= RET_INTEGER,
563 	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
564 	.arg2_type	= ARG_CONST_SIZE,
565 	.arg3_type	= ARG_ANYTHING,
566 	.arg4_type	= ARG_PTR_TO_LONG,
567 };
568 #endif
569 
570 BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2)
571 {
572 	return strncmp(s1, s2, s1_sz);
573 }
574 
575 const struct bpf_func_proto bpf_strncmp_proto = {
576 	.func		= bpf_strncmp,
577 	.gpl_only	= false,
578 	.ret_type	= RET_INTEGER,
579 	.arg1_type	= ARG_PTR_TO_MEM,
580 	.arg2_type	= ARG_CONST_SIZE,
581 	.arg3_type	= ARG_PTR_TO_CONST_STR,
582 };
583 
584 BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
585 	   struct bpf_pidns_info *, nsdata, u32, size)
586 {
587 	struct task_struct *task = current;
588 	struct pid_namespace *pidns;
589 	int err = -EINVAL;
590 
591 	if (unlikely(size != sizeof(struct bpf_pidns_info)))
592 		goto clear;
593 
594 	if (unlikely((u64)(dev_t)dev != dev))
595 		goto clear;
596 
597 	if (unlikely(!task))
598 		goto clear;
599 
600 	pidns = task_active_pid_ns(task);
601 	if (unlikely(!pidns)) {
602 		err = -ENOENT;
603 		goto clear;
604 	}
605 
606 	if (!ns_match(&pidns->ns, (dev_t)dev, ino))
607 		goto clear;
608 
609 	nsdata->pid = task_pid_nr_ns(task, pidns);
610 	nsdata->tgid = task_tgid_nr_ns(task, pidns);
611 	return 0;
612 clear:
613 	memset((void *)nsdata, 0, (size_t) size);
614 	return err;
615 }
616 
617 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
618 	.func		= bpf_get_ns_current_pid_tgid,
619 	.gpl_only	= false,
620 	.ret_type	= RET_INTEGER,
621 	.arg1_type	= ARG_ANYTHING,
622 	.arg2_type	= ARG_ANYTHING,
623 	.arg3_type      = ARG_PTR_TO_UNINIT_MEM,
624 	.arg4_type      = ARG_CONST_SIZE,
625 };
626 
627 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
628 	.func		= bpf_get_raw_cpu_id,
629 	.gpl_only	= false,
630 	.ret_type	= RET_INTEGER,
631 };
632 
633 BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map,
634 	   u64, flags, void *, data, u64, size)
635 {
636 	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
637 		return -EINVAL;
638 
639 	return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
640 }
641 
642 const struct bpf_func_proto bpf_event_output_data_proto =  {
643 	.func		= bpf_event_output_data,
644 	.gpl_only       = true,
645 	.ret_type       = RET_INTEGER,
646 	.arg1_type      = ARG_PTR_TO_CTX,
647 	.arg2_type      = ARG_CONST_MAP_PTR,
648 	.arg3_type      = ARG_ANYTHING,
649 	.arg4_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
650 	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
651 };
652 
653 BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
654 	   const void __user *, user_ptr)
655 {
656 	int ret = copy_from_user(dst, user_ptr, size);
657 
658 	if (unlikely(ret)) {
659 		memset(dst, 0, size);
660 		ret = -EFAULT;
661 	}
662 
663 	return ret;
664 }
665 
666 const struct bpf_func_proto bpf_copy_from_user_proto = {
667 	.func		= bpf_copy_from_user,
668 	.gpl_only	= false,
669 	.ret_type	= RET_INTEGER,
670 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
671 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
672 	.arg3_type	= ARG_ANYTHING,
673 };
674 
675 BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
676 	   const void __user *, user_ptr, struct task_struct *, tsk, u64, flags)
677 {
678 	int ret;
679 
680 	/* flags is not used yet */
681 	if (unlikely(flags))
682 		return -EINVAL;
683 
684 	if (unlikely(!size))
685 		return 0;
686 
687 	ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
688 	if (ret == size)
689 		return 0;
690 
691 	memset(dst, 0, size);
692 	/* Return -EFAULT for partial read */
693 	return ret < 0 ? ret : -EFAULT;
694 }
695 
696 const struct bpf_func_proto bpf_copy_from_user_task_proto = {
697 	.func		= bpf_copy_from_user_task,
698 	.gpl_only	= true,
699 	.ret_type	= RET_INTEGER,
700 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
701 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
702 	.arg3_type	= ARG_ANYTHING,
703 	.arg4_type	= ARG_PTR_TO_BTF_ID,
704 	.arg4_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
705 	.arg5_type	= ARG_ANYTHING
706 };
707 
708 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
709 {
710 	if (cpu >= nr_cpu_ids)
711 		return (unsigned long)NULL;
712 
713 	return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
714 }
715 
716 const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
717 	.func		= bpf_per_cpu_ptr,
718 	.gpl_only	= false,
719 	.ret_type	= RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY,
720 	.arg1_type	= ARG_PTR_TO_PERCPU_BTF_ID,
721 	.arg2_type	= ARG_ANYTHING,
722 };
723 
724 BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
725 {
726 	return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
727 }
728 
729 const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
730 	.func		= bpf_this_cpu_ptr,
731 	.gpl_only	= false,
732 	.ret_type	= RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY,
733 	.arg1_type	= ARG_PTR_TO_PERCPU_BTF_ID,
734 };
735 
736 static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
737 		size_t bufsz)
738 {
739 	void __user *user_ptr = (__force void __user *)unsafe_ptr;
740 
741 	buf[0] = 0;
742 
743 	switch (fmt_ptype) {
744 	case 's':
745 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
746 		if ((unsigned long)unsafe_ptr < TASK_SIZE)
747 			return strncpy_from_user_nofault(buf, user_ptr, bufsz);
748 		fallthrough;
749 #endif
750 	case 'k':
751 		return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
752 	case 'u':
753 		return strncpy_from_user_nofault(buf, user_ptr, bufsz);
754 	}
755 
756 	return -EINVAL;
757 }
758 
759 /* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
760  * arguments representation.
761  */
762 #define MAX_BPRINTF_BUF_LEN	512
763 
764 /* Support executing three nested bprintf helper calls on a given CPU */
765 #define MAX_BPRINTF_NEST_LEVEL	3
766 struct bpf_bprintf_buffers {
767 	char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN];
768 };
769 static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
770 static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
771 
772 static int try_get_fmt_tmp_buf(char **tmp_buf)
773 {
774 	struct bpf_bprintf_buffers *bufs;
775 	int nest_level;
776 
777 	preempt_disable();
778 	nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
779 	if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
780 		this_cpu_dec(bpf_bprintf_nest_level);
781 		preempt_enable();
782 		return -EBUSY;
783 	}
784 	bufs = this_cpu_ptr(&bpf_bprintf_bufs);
785 	*tmp_buf = bufs->tmp_bufs[nest_level - 1];
786 
787 	return 0;
788 }
789 
790 void bpf_bprintf_cleanup(void)
791 {
792 	if (this_cpu_read(bpf_bprintf_nest_level)) {
793 		this_cpu_dec(bpf_bprintf_nest_level);
794 		preempt_enable();
795 	}
796 }
797 
798 /*
799  * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers
800  *
801  * Returns a negative value if fmt is an invalid format string or 0 otherwise.
802  *
803  * This can be used in two ways:
804  * - Format string verification only: when bin_args is NULL
805  * - Arguments preparation: in addition to the above verification, it writes in
806  *   bin_args a binary representation of arguments usable by bstr_printf where
807  *   pointers from BPF have been sanitized.
808  *
809  * In argument preparation mode, if 0 is returned, safe temporary buffers are
810  * allocated and bpf_bprintf_cleanup should be called to free them after use.
811  */
812 int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
813 			u32 **bin_args, u32 num_args)
814 {
815 	char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
816 	size_t sizeof_cur_arg, sizeof_cur_ip;
817 	int err, i, num_spec = 0;
818 	u64 cur_arg;
819 	char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX";
820 
821 	fmt_end = strnchr(fmt, fmt_size, 0);
822 	if (!fmt_end)
823 		return -EINVAL;
824 	fmt_size = fmt_end - fmt;
825 
826 	if (bin_args) {
827 		if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
828 			return -EBUSY;
829 
830 		tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN;
831 		*bin_args = (u32 *)tmp_buf;
832 	}
833 
834 	for (i = 0; i < fmt_size; i++) {
835 		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
836 			err = -EINVAL;
837 			goto out;
838 		}
839 
840 		if (fmt[i] != '%')
841 			continue;
842 
843 		if (fmt[i + 1] == '%') {
844 			i++;
845 			continue;
846 		}
847 
848 		if (num_spec >= num_args) {
849 			err = -EINVAL;
850 			goto out;
851 		}
852 
853 		/* The string is zero-terminated so if fmt[i] != 0, we can
854 		 * always access fmt[i + 1], in the worst case it will be a 0
855 		 */
856 		i++;
857 
858 		/* skip optional "[0 +-][num]" width formatting field */
859 		while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
860 		       fmt[i] == ' ')
861 			i++;
862 		if (fmt[i] >= '1' && fmt[i] <= '9') {
863 			i++;
864 			while (fmt[i] >= '0' && fmt[i] <= '9')
865 				i++;
866 		}
867 
868 		if (fmt[i] == 'p') {
869 			sizeof_cur_arg = sizeof(long);
870 
871 			if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
872 			    fmt[i + 2] == 's') {
873 				fmt_ptype = fmt[i + 1];
874 				i += 2;
875 				goto fmt_str;
876 			}
877 
878 			if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
879 			    ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
880 			    fmt[i + 1] == 'x' || fmt[i + 1] == 's' ||
881 			    fmt[i + 1] == 'S') {
882 				/* just kernel pointers */
883 				if (tmp_buf)
884 					cur_arg = raw_args[num_spec];
885 				i++;
886 				goto nocopy_fmt;
887 			}
888 
889 			if (fmt[i + 1] == 'B') {
890 				if (tmp_buf)  {
891 					err = snprintf(tmp_buf,
892 						       (tmp_buf_end - tmp_buf),
893 						       "%pB",
894 						       (void *)(long)raw_args[num_spec]);
895 					tmp_buf += (err + 1);
896 				}
897 
898 				i++;
899 				num_spec++;
900 				continue;
901 			}
902 
903 			/* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
904 			if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
905 			    (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
906 				err = -EINVAL;
907 				goto out;
908 			}
909 
910 			i += 2;
911 			if (!tmp_buf)
912 				goto nocopy_fmt;
913 
914 			sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16;
915 			if (tmp_buf_end - tmp_buf < sizeof_cur_ip) {
916 				err = -ENOSPC;
917 				goto out;
918 			}
919 
920 			unsafe_ptr = (char *)(long)raw_args[num_spec];
921 			err = copy_from_kernel_nofault(cur_ip, unsafe_ptr,
922 						       sizeof_cur_ip);
923 			if (err < 0)
924 				memset(cur_ip, 0, sizeof_cur_ip);
925 
926 			/* hack: bstr_printf expects IP addresses to be
927 			 * pre-formatted as strings, ironically, the easiest way
928 			 * to do that is to call snprintf.
929 			 */
930 			ip_spec[2] = fmt[i - 1];
931 			ip_spec[3] = fmt[i];
932 			err = snprintf(tmp_buf, tmp_buf_end - tmp_buf,
933 				       ip_spec, &cur_ip);
934 
935 			tmp_buf += err + 1;
936 			num_spec++;
937 
938 			continue;
939 		} else if (fmt[i] == 's') {
940 			fmt_ptype = fmt[i];
941 fmt_str:
942 			if (fmt[i + 1] != 0 &&
943 			    !isspace(fmt[i + 1]) &&
944 			    !ispunct(fmt[i + 1])) {
945 				err = -EINVAL;
946 				goto out;
947 			}
948 
949 			if (!tmp_buf)
950 				goto nocopy_fmt;
951 
952 			if (tmp_buf_end == tmp_buf) {
953 				err = -ENOSPC;
954 				goto out;
955 			}
956 
957 			unsafe_ptr = (char *)(long)raw_args[num_spec];
958 			err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
959 						    fmt_ptype,
960 						    tmp_buf_end - tmp_buf);
961 			if (err < 0) {
962 				tmp_buf[0] = '\0';
963 				err = 1;
964 			}
965 
966 			tmp_buf += err;
967 			num_spec++;
968 
969 			continue;
970 		} else if (fmt[i] == 'c') {
971 			if (!tmp_buf)
972 				goto nocopy_fmt;
973 
974 			if (tmp_buf_end == tmp_buf) {
975 				err = -ENOSPC;
976 				goto out;
977 			}
978 
979 			*tmp_buf = raw_args[num_spec];
980 			tmp_buf++;
981 			num_spec++;
982 
983 			continue;
984 		}
985 
986 		sizeof_cur_arg = sizeof(int);
987 
988 		if (fmt[i] == 'l') {
989 			sizeof_cur_arg = sizeof(long);
990 			i++;
991 		}
992 		if (fmt[i] == 'l') {
993 			sizeof_cur_arg = sizeof(long long);
994 			i++;
995 		}
996 
997 		if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
998 		    fmt[i] != 'x' && fmt[i] != 'X') {
999 			err = -EINVAL;
1000 			goto out;
1001 		}
1002 
1003 		if (tmp_buf)
1004 			cur_arg = raw_args[num_spec];
1005 nocopy_fmt:
1006 		if (tmp_buf) {
1007 			tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32));
1008 			if (tmp_buf_end - tmp_buf < sizeof_cur_arg) {
1009 				err = -ENOSPC;
1010 				goto out;
1011 			}
1012 
1013 			if (sizeof_cur_arg == 8) {
1014 				*(u32 *)tmp_buf = *(u32 *)&cur_arg;
1015 				*(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1);
1016 			} else {
1017 				*(u32 *)tmp_buf = (u32)(long)cur_arg;
1018 			}
1019 			tmp_buf += sizeof_cur_arg;
1020 		}
1021 		num_spec++;
1022 	}
1023 
1024 	err = 0;
1025 out:
1026 	if (err)
1027 		bpf_bprintf_cleanup();
1028 	return err;
1029 }
1030 
1031 BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
1032 	   const void *, data, u32, data_len)
1033 {
1034 	int err, num_args;
1035 	u32 *bin_args;
1036 
1037 	if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 ||
1038 	    (data_len && !data))
1039 		return -EINVAL;
1040 	num_args = data_len / 8;
1041 
1042 	/* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
1043 	 * can safely give an unbounded size.
1044 	 */
1045 	err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args);
1046 	if (err < 0)
1047 		return err;
1048 
1049 	err = bstr_printf(str, str_size, fmt, bin_args);
1050 
1051 	bpf_bprintf_cleanup();
1052 
1053 	return err + 1;
1054 }
1055 
1056 const struct bpf_func_proto bpf_snprintf_proto = {
1057 	.func		= bpf_snprintf,
1058 	.gpl_only	= true,
1059 	.ret_type	= RET_INTEGER,
1060 	.arg1_type	= ARG_PTR_TO_MEM_OR_NULL,
1061 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
1062 	.arg3_type	= ARG_PTR_TO_CONST_STR,
1063 	.arg4_type	= ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
1064 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
1065 };
1066 
1067 /* BPF map elements can contain 'struct bpf_timer'.
1068  * Such map owns all of its BPF timers.
1069  * 'struct bpf_timer' is allocated as part of map element allocation
1070  * and it's zero initialized.
1071  * That space is used to keep 'struct bpf_timer_kern'.
1072  * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and
1073  * remembers 'struct bpf_map *' pointer it's part of.
1074  * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn.
1075  * bpf_timer_start() arms the timer.
1076  * If user space reference to a map goes to zero at this point
1077  * ops->map_release_uref callback is responsible for cancelling the timers,
1078  * freeing their memory, and decrementing prog's refcnts.
1079  * bpf_timer_cancel() cancels the timer and decrements prog's refcnt.
1080  * Inner maps can contain bpf timers as well. ops->map_release_uref is
1081  * freeing the timers when inner map is replaced or deleted by user space.
1082  */
1083 struct bpf_hrtimer {
1084 	struct hrtimer timer;
1085 	struct bpf_map *map;
1086 	struct bpf_prog *prog;
1087 	void __rcu *callback_fn;
1088 	void *value;
1089 };
1090 
1091 /* the actual struct hidden inside uapi struct bpf_timer */
1092 struct bpf_timer_kern {
1093 	struct bpf_hrtimer *timer;
1094 	/* bpf_spin_lock is used here instead of spinlock_t to make
1095 	 * sure that it always fits into space resereved by struct bpf_timer
1096 	 * regardless of LOCKDEP and spinlock debug flags.
1097 	 */
1098 	struct bpf_spin_lock lock;
1099 } __attribute__((aligned(8)));
1100 
1101 static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
1102 
1103 static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
1104 {
1105 	struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
1106 	struct bpf_map *map = t->map;
1107 	void *value = t->value;
1108 	bpf_callback_t callback_fn;
1109 	void *key;
1110 	u32 idx;
1111 
1112 	callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held());
1113 	if (!callback_fn)
1114 		goto out;
1115 
1116 	/* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and
1117 	 * cannot be preempted by another bpf_timer_cb() on the same cpu.
1118 	 * Remember the timer this callback is servicing to prevent
1119 	 * deadlock if callback_fn() calls bpf_timer_cancel() or
1120 	 * bpf_map_delete_elem() on the same timer.
1121 	 */
1122 	this_cpu_write(hrtimer_running, t);
1123 	if (map->map_type == BPF_MAP_TYPE_ARRAY) {
1124 		struct bpf_array *array = container_of(map, struct bpf_array, map);
1125 
1126 		/* compute the key */
1127 		idx = ((char *)value - array->value) / array->elem_size;
1128 		key = &idx;
1129 	} else { /* hash or lru */
1130 		key = value - round_up(map->key_size, 8);
1131 	}
1132 
1133 	callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0);
1134 	/* The verifier checked that return value is zero. */
1135 
1136 	this_cpu_write(hrtimer_running, NULL);
1137 out:
1138 	return HRTIMER_NORESTART;
1139 }
1140 
1141 BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map,
1142 	   u64, flags)
1143 {
1144 	clockid_t clockid = flags & (MAX_CLOCKS - 1);
1145 	struct bpf_hrtimer *t;
1146 	int ret = 0;
1147 
1148 	BUILD_BUG_ON(MAX_CLOCKS != 16);
1149 	BUILD_BUG_ON(sizeof(struct bpf_timer_kern) > sizeof(struct bpf_timer));
1150 	BUILD_BUG_ON(__alignof__(struct bpf_timer_kern) != __alignof__(struct bpf_timer));
1151 
1152 	if (in_nmi())
1153 		return -EOPNOTSUPP;
1154 
1155 	if (flags >= MAX_CLOCKS ||
1156 	    /* similar to timerfd except _ALARM variants are not supported */
1157 	    (clockid != CLOCK_MONOTONIC &&
1158 	     clockid != CLOCK_REALTIME &&
1159 	     clockid != CLOCK_BOOTTIME))
1160 		return -EINVAL;
1161 	__bpf_spin_lock_irqsave(&timer->lock);
1162 	t = timer->timer;
1163 	if (t) {
1164 		ret = -EBUSY;
1165 		goto out;
1166 	}
1167 	if (!atomic64_read(&map->usercnt)) {
1168 		/* maps with timers must be either held by user space
1169 		 * or pinned in bpffs.
1170 		 */
1171 		ret = -EPERM;
1172 		goto out;
1173 	}
1174 	/* allocate hrtimer via map_kmalloc to use memcg accounting */
1175 	t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node);
1176 	if (!t) {
1177 		ret = -ENOMEM;
1178 		goto out;
1179 	}
1180 	t->value = (void *)timer - map->timer_off;
1181 	t->map = map;
1182 	t->prog = NULL;
1183 	rcu_assign_pointer(t->callback_fn, NULL);
1184 	hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
1185 	t->timer.function = bpf_timer_cb;
1186 	timer->timer = t;
1187 out:
1188 	__bpf_spin_unlock_irqrestore(&timer->lock);
1189 	return ret;
1190 }
1191 
1192 static const struct bpf_func_proto bpf_timer_init_proto = {
1193 	.func		= bpf_timer_init,
1194 	.gpl_only	= true,
1195 	.ret_type	= RET_INTEGER,
1196 	.arg1_type	= ARG_PTR_TO_TIMER,
1197 	.arg2_type	= ARG_CONST_MAP_PTR,
1198 	.arg3_type	= ARG_ANYTHING,
1199 };
1200 
1201 BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn,
1202 	   struct bpf_prog_aux *, aux)
1203 {
1204 	struct bpf_prog *prev, *prog = aux->prog;
1205 	struct bpf_hrtimer *t;
1206 	int ret = 0;
1207 
1208 	if (in_nmi())
1209 		return -EOPNOTSUPP;
1210 	__bpf_spin_lock_irqsave(&timer->lock);
1211 	t = timer->timer;
1212 	if (!t) {
1213 		ret = -EINVAL;
1214 		goto out;
1215 	}
1216 	if (!atomic64_read(&t->map->usercnt)) {
1217 		/* maps with timers must be either held by user space
1218 		 * or pinned in bpffs. Otherwise timer might still be
1219 		 * running even when bpf prog is detached and user space
1220 		 * is gone, since map_release_uref won't ever be called.
1221 		 */
1222 		ret = -EPERM;
1223 		goto out;
1224 	}
1225 	prev = t->prog;
1226 	if (prev != prog) {
1227 		/* Bump prog refcnt once. Every bpf_timer_set_callback()
1228 		 * can pick different callback_fn-s within the same prog.
1229 		 */
1230 		prog = bpf_prog_inc_not_zero(prog);
1231 		if (IS_ERR(prog)) {
1232 			ret = PTR_ERR(prog);
1233 			goto out;
1234 		}
1235 		if (prev)
1236 			/* Drop prev prog refcnt when swapping with new prog */
1237 			bpf_prog_put(prev);
1238 		t->prog = prog;
1239 	}
1240 	rcu_assign_pointer(t->callback_fn, callback_fn);
1241 out:
1242 	__bpf_spin_unlock_irqrestore(&timer->lock);
1243 	return ret;
1244 }
1245 
1246 static const struct bpf_func_proto bpf_timer_set_callback_proto = {
1247 	.func		= bpf_timer_set_callback,
1248 	.gpl_only	= true,
1249 	.ret_type	= RET_INTEGER,
1250 	.arg1_type	= ARG_PTR_TO_TIMER,
1251 	.arg2_type	= ARG_PTR_TO_FUNC,
1252 };
1253 
1254 BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, flags)
1255 {
1256 	struct bpf_hrtimer *t;
1257 	int ret = 0;
1258 
1259 	if (in_nmi())
1260 		return -EOPNOTSUPP;
1261 	if (flags)
1262 		return -EINVAL;
1263 	__bpf_spin_lock_irqsave(&timer->lock);
1264 	t = timer->timer;
1265 	if (!t || !t->prog) {
1266 		ret = -EINVAL;
1267 		goto out;
1268 	}
1269 	hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT);
1270 out:
1271 	__bpf_spin_unlock_irqrestore(&timer->lock);
1272 	return ret;
1273 }
1274 
1275 static const struct bpf_func_proto bpf_timer_start_proto = {
1276 	.func		= bpf_timer_start,
1277 	.gpl_only	= true,
1278 	.ret_type	= RET_INTEGER,
1279 	.arg1_type	= ARG_PTR_TO_TIMER,
1280 	.arg2_type	= ARG_ANYTHING,
1281 	.arg3_type	= ARG_ANYTHING,
1282 };
1283 
1284 static void drop_prog_refcnt(struct bpf_hrtimer *t)
1285 {
1286 	struct bpf_prog *prog = t->prog;
1287 
1288 	if (prog) {
1289 		bpf_prog_put(prog);
1290 		t->prog = NULL;
1291 		rcu_assign_pointer(t->callback_fn, NULL);
1292 	}
1293 }
1294 
1295 BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer)
1296 {
1297 	struct bpf_hrtimer *t;
1298 	int ret = 0;
1299 
1300 	if (in_nmi())
1301 		return -EOPNOTSUPP;
1302 	__bpf_spin_lock_irqsave(&timer->lock);
1303 	t = timer->timer;
1304 	if (!t) {
1305 		ret = -EINVAL;
1306 		goto out;
1307 	}
1308 	if (this_cpu_read(hrtimer_running) == t) {
1309 		/* If bpf callback_fn is trying to bpf_timer_cancel()
1310 		 * its own timer the hrtimer_cancel() will deadlock
1311 		 * since it waits for callback_fn to finish
1312 		 */
1313 		ret = -EDEADLK;
1314 		goto out;
1315 	}
1316 	drop_prog_refcnt(t);
1317 out:
1318 	__bpf_spin_unlock_irqrestore(&timer->lock);
1319 	/* Cancel the timer and wait for associated callback to finish
1320 	 * if it was running.
1321 	 */
1322 	ret = ret ?: hrtimer_cancel(&t->timer);
1323 	return ret;
1324 }
1325 
1326 static const struct bpf_func_proto bpf_timer_cancel_proto = {
1327 	.func		= bpf_timer_cancel,
1328 	.gpl_only	= true,
1329 	.ret_type	= RET_INTEGER,
1330 	.arg1_type	= ARG_PTR_TO_TIMER,
1331 };
1332 
1333 /* This function is called by map_delete/update_elem for individual element and
1334  * by ops->map_release_uref when the user space reference to a map reaches zero.
1335  */
1336 void bpf_timer_cancel_and_free(void *val)
1337 {
1338 	struct bpf_timer_kern *timer = val;
1339 	struct bpf_hrtimer *t;
1340 
1341 	/* Performance optimization: read timer->timer without lock first. */
1342 	if (!READ_ONCE(timer->timer))
1343 		return;
1344 
1345 	__bpf_spin_lock_irqsave(&timer->lock);
1346 	/* re-read it under lock */
1347 	t = timer->timer;
1348 	if (!t)
1349 		goto out;
1350 	drop_prog_refcnt(t);
1351 	/* The subsequent bpf_timer_start/cancel() helpers won't be able to use
1352 	 * this timer, since it won't be initialized.
1353 	 */
1354 	timer->timer = NULL;
1355 out:
1356 	__bpf_spin_unlock_irqrestore(&timer->lock);
1357 	if (!t)
1358 		return;
1359 	/* Cancel the timer and wait for callback to complete if it was running.
1360 	 * If hrtimer_cancel() can be safely called it's safe to call kfree(t)
1361 	 * right after for both preallocated and non-preallocated maps.
1362 	 * The timer->timer = NULL was already done and no code path can
1363 	 * see address 't' anymore.
1364 	 *
1365 	 * Check that bpf_map_delete/update_elem() wasn't called from timer
1366 	 * callback_fn. In such case don't call hrtimer_cancel() (since it will
1367 	 * deadlock) and don't call hrtimer_try_to_cancel() (since it will just
1368 	 * return -1). Though callback_fn is still running on this cpu it's
1369 	 * safe to do kfree(t) because bpf_timer_cb() read everything it needed
1370 	 * from 't'. The bpf subprog callback_fn won't be able to access 't',
1371 	 * since timer->timer = NULL was already done. The timer will be
1372 	 * effectively cancelled because bpf_timer_cb() will return
1373 	 * HRTIMER_NORESTART.
1374 	 */
1375 	if (this_cpu_read(hrtimer_running) != t)
1376 		hrtimer_cancel(&t->timer);
1377 	kfree(t);
1378 }
1379 
1380 const struct bpf_func_proto bpf_get_current_task_proto __weak;
1381 const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
1382 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
1383 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
1384 const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
1385 const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
1386 const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
1387 
1388 const struct bpf_func_proto *
1389 bpf_base_func_proto(enum bpf_func_id func_id)
1390 {
1391 	switch (func_id) {
1392 	case BPF_FUNC_map_lookup_elem:
1393 		return &bpf_map_lookup_elem_proto;
1394 	case BPF_FUNC_map_update_elem:
1395 		return &bpf_map_update_elem_proto;
1396 	case BPF_FUNC_map_delete_elem:
1397 		return &bpf_map_delete_elem_proto;
1398 	case BPF_FUNC_map_push_elem:
1399 		return &bpf_map_push_elem_proto;
1400 	case BPF_FUNC_map_pop_elem:
1401 		return &bpf_map_pop_elem_proto;
1402 	case BPF_FUNC_map_peek_elem:
1403 		return &bpf_map_peek_elem_proto;
1404 	case BPF_FUNC_get_prandom_u32:
1405 		return &bpf_get_prandom_u32_proto;
1406 	case BPF_FUNC_get_smp_processor_id:
1407 		return &bpf_get_raw_smp_processor_id_proto;
1408 	case BPF_FUNC_get_numa_node_id:
1409 		return &bpf_get_numa_node_id_proto;
1410 	case BPF_FUNC_tail_call:
1411 		return &bpf_tail_call_proto;
1412 	case BPF_FUNC_ktime_get_ns:
1413 		return &bpf_ktime_get_ns_proto;
1414 	case BPF_FUNC_ktime_get_boot_ns:
1415 		return &bpf_ktime_get_boot_ns_proto;
1416 	case BPF_FUNC_ringbuf_output:
1417 		return &bpf_ringbuf_output_proto;
1418 	case BPF_FUNC_ringbuf_reserve:
1419 		return &bpf_ringbuf_reserve_proto;
1420 	case BPF_FUNC_ringbuf_submit:
1421 		return &bpf_ringbuf_submit_proto;
1422 	case BPF_FUNC_ringbuf_discard:
1423 		return &bpf_ringbuf_discard_proto;
1424 	case BPF_FUNC_ringbuf_query:
1425 		return &bpf_ringbuf_query_proto;
1426 	case BPF_FUNC_for_each_map_elem:
1427 		return &bpf_for_each_map_elem_proto;
1428 	case BPF_FUNC_loop:
1429 		return &bpf_loop_proto;
1430 	case BPF_FUNC_strncmp:
1431 		return &bpf_strncmp_proto;
1432 	default:
1433 		break;
1434 	}
1435 
1436 	if (!bpf_capable())
1437 		return NULL;
1438 
1439 	switch (func_id) {
1440 	case BPF_FUNC_spin_lock:
1441 		return &bpf_spin_lock_proto;
1442 	case BPF_FUNC_spin_unlock:
1443 		return &bpf_spin_unlock_proto;
1444 	case BPF_FUNC_jiffies64:
1445 		return &bpf_jiffies64_proto;
1446 	case BPF_FUNC_per_cpu_ptr:
1447 		return &bpf_per_cpu_ptr_proto;
1448 	case BPF_FUNC_this_cpu_ptr:
1449 		return &bpf_this_cpu_ptr_proto;
1450 	case BPF_FUNC_timer_init:
1451 		return &bpf_timer_init_proto;
1452 	case BPF_FUNC_timer_set_callback:
1453 		return &bpf_timer_set_callback_proto;
1454 	case BPF_FUNC_timer_start:
1455 		return &bpf_timer_start_proto;
1456 	case BPF_FUNC_timer_cancel:
1457 		return &bpf_timer_cancel_proto;
1458 	default:
1459 		break;
1460 	}
1461 
1462 	if (!perfmon_capable())
1463 		return NULL;
1464 
1465 	switch (func_id) {
1466 	case BPF_FUNC_trace_printk:
1467 		return bpf_get_trace_printk_proto();
1468 	case BPF_FUNC_get_current_task:
1469 		return &bpf_get_current_task_proto;
1470 	case BPF_FUNC_get_current_task_btf:
1471 		return &bpf_get_current_task_btf_proto;
1472 	case BPF_FUNC_probe_read_user:
1473 		return &bpf_probe_read_user_proto;
1474 	case BPF_FUNC_probe_read_kernel:
1475 		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1476 		       NULL : &bpf_probe_read_kernel_proto;
1477 	case BPF_FUNC_probe_read_user_str:
1478 		return &bpf_probe_read_user_str_proto;
1479 	case BPF_FUNC_probe_read_kernel_str:
1480 		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1481 		       NULL : &bpf_probe_read_kernel_str_proto;
1482 	case BPF_FUNC_snprintf_btf:
1483 		return &bpf_snprintf_btf_proto;
1484 	case BPF_FUNC_snprintf:
1485 		return &bpf_snprintf_proto;
1486 	case BPF_FUNC_task_pt_regs:
1487 		return &bpf_task_pt_regs_proto;
1488 	case BPF_FUNC_trace_vprintk:
1489 		return bpf_get_trace_vprintk_proto();
1490 	default:
1491 		return NULL;
1492 	}
1493 }
1494